Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/elfmz/far2l.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorelfmz <fenix1905@tut.by>2022-11-07 20:03:55 +0300
committerelfmz <fenix1905@tut.by>2022-11-07 20:03:55 +0300
commitad077aea28ed66b6c70a798d375eef0b938736ae (patch)
tree2a1267838bf574f95a115ac05928b8f0f30eb69c
parent8a3550ad514179509ff5a60eafb7c550881861c1 (diff)
update 7z decoder sources
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7z.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zAlloc.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zAlloc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zArcIn.c44
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf2.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrc.c196
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrcOpt.c16
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zDec.c47
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zFile.c252
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zFile.h14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zStream.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zTypes.h128
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zVersion.h10
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zVersion.rc0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Aes.c143
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Aes.h26
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/AesOpt.c796
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Alloc.c40
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Alloc.h11
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2.c10
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2Enc.c8
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Blake2.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Blake2s.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra.c14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra86.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BraIA64.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BwtSort.c6
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BwtSort.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Compiler.h12
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/CpuArch.c280
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/CpuArch.h169
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Delta.c167
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Delta.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/DllSecur.c16
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/DllSecur.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/HuffEnc.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/HuffEnc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFind.c1317
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFind.h41
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFindMt.c1303
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFindMt.h46
-rwxr-xr-xmultiarc/src/formats/7z/C/LzFindOpt.c578
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzHash.h67
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Dec.c5
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Dec.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2DecMt.c36
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2DecMt.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Enc.c14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Enc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86Dec.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86Enc.c2
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaDec.c414
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaDec.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaEnc.c448
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaEnc.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaLib.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaLib.h25
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtCoder.c22
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtCoder.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtDec.c65
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtDec.h9
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd.h144
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7.c890
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7.h175
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7Dec.c330
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7Enc.c318
-rwxr-xr-xmultiarc/src/formats/7z/C/Ppmd7aDec.c279
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8.c1116
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8.h128
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8Dec.c276
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8Enc.c291
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Precomp.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/RotateDefs.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha1.c623
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha1.h56
-rwxr-xr-xmultiarc/src/formats/7z/C/Sha1Opt.c373
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha256.c452
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha256.h62
-rwxr-xr-xmultiarc/src/formats/7z/C/Sha256Opt.c373
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sort.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sort.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Threads.c471
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Threads.h182
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Xz.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Xz.h107
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64Opt.c8
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzDec.c317
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzEnc.c11
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzEnc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzIn.c28
98 files changed, 10612 insertions, 3231 deletions
diff --git a/multiarc/src/formats/7z/C/7z.h b/multiarc/src/formats/7z/C/7z.h
index 6c7886e3..304f75ff 100644..100755
--- a/multiarc/src/formats/7z/C/7z.h
+++ b/multiarc/src/formats/7z/C/7z.h
@@ -1,5 +1,5 @@
/* 7z.h -- 7z interface
-2017-04-03 : Igor Pavlov : Public domain */
+2018-07-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
@@ -91,6 +91,8 @@ typedef struct
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
+
+ UInt64 RangeLimit;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
diff --git a/multiarc/src/formats/7z/C/7zAlloc.c b/multiarc/src/formats/7z/C/7zAlloc.c
index c924a529..c924a529 100644..100755
--- a/multiarc/src/formats/7z/C/7zAlloc.c
+++ b/multiarc/src/formats/7z/C/7zAlloc.c
diff --git a/multiarc/src/formats/7z/C/7zAlloc.h b/multiarc/src/formats/7z/C/7zAlloc.h
index 44778f9b..44778f9b 100644..100755
--- a/multiarc/src/formats/7z/C/7zAlloc.h
+++ b/multiarc/src/formats/7z/C/7zAlloc.h
diff --git a/multiarc/src/formats/7z/C/7zArcIn.c b/multiarc/src/formats/7z/C/7zArcIn.c
index f74d0fad..0d9dec41 100644..100755
--- a/multiarc/src/formats/7z/C/7zArcIn.c
+++ b/multiarc/src/formats/7z/C/7zArcIn.c
@@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
-2018-12-31 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
return SZ_OK;
}
-void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
+static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
-void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
+static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p)
p->CoderUnpackSizes = NULL;
p->CodersData = NULL;
+
+ p->RangeLimit = 0;
}
static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
@@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
return SZ_ERROR_ARCHIVE;
if (propsSize >= 0x80)
return SZ_ERROR_UNSUPPORTED;
- coder->PropsOffset = sd->Data - dataStart;
+ coder->PropsOffset = (size_t)(sd->Data - dataStart);
coder->PropsSize = (Byte)propsSize;
sd->Data += (size_t)propsSize;
sd->Size -= (size_t)propsSize;
@@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
{
UInt32 numCoders, ci, numInStreams = 0;
- p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
+ p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
RINOK(SzReadNumber32(&sd, &numCoders));
if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
@@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
{
- size_t dataSize = sd.Data - startBufPtr;
+ const size_t dataSize = (size_t)(sd.Data - startBufPtr);
p->FoStartPackStreamIndex[fo] = packStreamIndex;
p->FoCodersOffsets[fo] = dataSize;
MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
@@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
numSubDigests += numStreams;
}
- ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
+ ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data);
continue;
}
if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
@@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdSizes.Data = sd->Data;
RINOK(SkipNumbers(sd, numUnpackSizesInData));
- ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
+ ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
RINOK(ReadID(sd, &type));
}
@@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdCRCs.Data = sd->Data;
RINOK(SkipBitUi32s(sd, numSubDigests));
- ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
+ ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
}
else
{
@@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p,
if (type == k7zIdPackInfo)
{
RINOK(ReadNumber(sd, dataOffset));
+ if (*dataOffset > p->RangeLimit)
+ return SZ_ERROR_ARCHIVE;
RINOK(ReadPackInfo(p, sd, alloc));
+ if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
+ return SZ_ERROR_ARCHIVE;
RINOK(ReadID(sd, &type));
}
if (type == k7zIdUnpackInfo)
@@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
return SZ_ERROR_ARCHIVE;
for (p = data + pos;
#ifdef _WIN32
- *(const UInt16 *)p != 0
+ *(const UInt16 *)(const void *)p != 0
#else
p[0] != 0 || p[1] != 0
#endif
; p += 2);
- pos = p - data + 2;
+ pos = (size_t)(p - data) + 2;
*offsets++ = (pos >> 1);
}
while (--numFiles);
@@ -1133,6 +1139,8 @@ static SRes SzReadHeader2(
SRes res;
SzAr_Init(&tempAr);
+ tempAr.RangeLimit = p->db.RangeLimit;
+
res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
p->startPosAfterHeader, &tempAr, allocTemp);
*numTempBufs = tempAr.NumFolders;
@@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2(
nextHeaderSize = GetUi64(header + 20);
nextHeaderCRC = GetUi32(header + 28);
- p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
+ p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize;
if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
return SZ_ERROR_CRC;
+ p->db.RangeLimit = nextHeaderOffset;
+
nextHeaderSizeT = (size_t)nextHeaderSize;
if (nextHeaderSizeT != nextHeaderSize)
return SZ_ERROR_MEM;
@@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2(
{
Int64 pos = 0;
RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
- if ((UInt64)pos < startArcPos + nextHeaderOffset ||
- (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
- (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+ if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
+ (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+ (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
return SZ_ERROR_INPUT_EOF;
}
- RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
+ RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
return SZ_ERROR_MEM;
@@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2(
Buf_Init(&tempBuf);
SzAr_Init(&tempAr);
+ tempAr.RangeLimit = p->db.RangeLimit;
+
res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
SzAr_Free(&tempAr, allocTemp);
diff --git a/multiarc/src/formats/7z/C/7zBuf.c b/multiarc/src/formats/7z/C/7zBuf.c
index 8865c32a..8865c32a 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf.c
+++ b/multiarc/src/formats/7z/C/7zBuf.c
diff --git a/multiarc/src/formats/7z/C/7zBuf.h b/multiarc/src/formats/7z/C/7zBuf.h
index 81d1b5b6..81d1b5b6 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf.h
+++ b/multiarc/src/formats/7z/C/7zBuf.h
diff --git a/multiarc/src/formats/7z/C/7zBuf2.c b/multiarc/src/formats/7z/C/7zBuf2.c
index 20834741..20834741 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf2.c
+++ b/multiarc/src/formats/7z/C/7zBuf2.c
diff --git a/multiarc/src/formats/7z/C/7zCrc.c b/multiarc/src/formats/7z/C/7zCrc.c
index b4d84f02..f186324d 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrc.c
+++ b/multiarc/src/formats/7z/C/7zCrc.c
@@ -1,5 +1,5 @@
/* 7zCrc.c -- CRC32 init
-2017-06-06 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -26,8 +26,20 @@
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
+extern
CRC_FUNC g_CrcUpdateT4;
+CRC_FUNC g_CrcUpdateT4;
+extern
+CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
+extern
+CRC_FUNC g_CrcUpdateT0_32;
+CRC_FUNC g_CrcUpdateT0_32;
+extern
+CRC_FUNC g_CrcUpdateT0_64;
+CRC_FUNC g_CrcUpdateT0_64;
+extern
+CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
@@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
return v;
}
+
+/* ---------- hardware CRC ---------- */
+
+#ifdef MY_CPU_LE
+
+#if defined(MY_CPU_ARM_OR_ARM64)
+
+// #pragma message("ARM*")
+
+ #if defined(_MSC_VER)
+ #if defined(MY_CPU_ARM64)
+ #if (_MSC_VER >= 1910)
+ #define USE_ARM64_CRC
+ #endif
+ #endif
+ #elif (defined(__clang__) && (__clang_major__ >= 3)) \
+ || (defined(__GNUC__) && (__GNUC__ > 4))
+ #if !defined(__ARM_FEATURE_CRC32)
+ #define __ARM_FEATURE_CRC32 1
+ #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
+ #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
+ #endif
+ #endif
+ #if defined(__ARM_FEATURE_CRC32)
+ #define USE_ARM64_CRC
+ #include <arm_acle.h>
+ #endif
+ #endif
+
+#else
+
+// no hardware CRC
+
+// #define USE_CRC_EMU
+
+#ifdef USE_CRC_EMU
+
+#pragma message("ARM64 CRC emulation")
+
+MY_FORCE_INLINE
+UInt32 __crc32b(UInt32 v, UInt32 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data);
+ return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32w(UInt32 v, UInt32 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32d(UInt32 v, UInt64 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ return v;
+}
+
+#endif // USE_CRC_EMU
+
+#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
+
+
+
+#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#define T0_32_UNROLL_BYTES (4 * 4)
+#define T0_64_UNROLL_BYTES (4 * 8)
+
+#ifndef ATTRIB_CRC
+#define ATTRIB_CRC
+#endif
+// #pragma message("USE ARM HW CRC")
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+ const Byte *p = (const Byte *)data;
+ UNUSED_VAR(table);
+
+ for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
+ v = __crc32b(v, *p++);
+
+ if (size >= T0_32_UNROLL_BYTES)
+ {
+ const Byte *lim = p + size;
+ size &= (T0_32_UNROLL_BYTES - 1);
+ lim -= size;
+ do
+ {
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+ }
+ while (p != lim);
+ }
+
+ for (; size != 0; size--)
+ v = __crc32b(v, *p++);
+
+ return v;
+}
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+ const Byte *p = (const Byte *)data;
+ UNUSED_VAR(table);
+
+ for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
+ v = __crc32b(v, *p++);
+
+ if (size >= T0_64_UNROLL_BYTES)
+ {
+ const Byte *lim = p + size;
+ size &= (T0_64_UNROLL_BYTES - 1);
+ lim -= size;
+ do
+ {
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+ }
+ while (p != lim);
+ }
+
+ for (; size != 0; size--)
+ v = __crc32b(v, *p++);
+
+ return v;
+}
+
+#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#endif // MY_CPU_LE
+
+
+
+
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
@@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable()
}
}
#endif
+ #endif
+ #ifdef MY_CPU_LE
+ #ifdef USE_ARM64_CRC
+ if (CPU_IsSupported_CRC32())
+ {
+ g_CrcUpdateT0_32 = CrcUpdateT0_32;
+ g_CrcUpdateT0_64 = CrcUpdateT0_64;
+ g_CrcUpdate =
+ #if defined(MY_CPU_ARM)
+ CrcUpdateT0_32;
+ #else
+ CrcUpdateT0_64;
+ #endif
+ }
+ #endif
+
+ #ifdef USE_CRC_EMU
+ g_CrcUpdateT0_32 = CrcUpdateT0_32;
+ g_CrcUpdateT0_64 = CrcUpdateT0_64;
+ g_CrcUpdate = CrcUpdateT0_64;
+ #endif
#endif
}
diff --git a/multiarc/src/formats/7z/C/7zCrc.h b/multiarc/src/formats/7z/C/7zCrc.h
index 8fd57958..8fd57958 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrc.h
+++ b/multiarc/src/formats/7z/C/7zCrc.h
diff --git a/multiarc/src/formats/7z/C/7zCrcOpt.c b/multiarc/src/formats/7z/C/7zCrcOpt.c
index 73beba29..69fad9ca 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrcOpt.c
+++ b/multiarc/src/formats/7z/C/7zCrcOpt.c
@@ -1,5 +1,5 @@
/* 7zCrcOpt.c -- CRC32 calculation
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
@@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
return v;
}
+UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
- d = *((const UInt32 *)p + 1);
+ d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
@@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
- d = *((const UInt32 *)p + 1);
+ d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
diff --git a/multiarc/src/formats/7z/C/7zDec.c b/multiarc/src/formats/7z/C/7zDec.c
index 7c463521..fbfd016e 100644..100755
--- a/multiarc/src/formats/7z/C/7zDec.c
+++ b/multiarc/src/formats/7z/C/7zDec.c
@@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -21,17 +21,20 @@
#endif
#define k_Copy 0
-#define k_Delta 3
+#ifndef _7Z_NO_METHOD_LZMA2
#define k_LZMA2 0x21
+#endif
#define k_LZMA 0x30101
-#define k_BCJ 0x3030103
#define k_BCJ2 0x303011B
+#ifndef _7Z_NO_METHODS_FILTERS
+#define k_Delta 3
+#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
-
+#endif
#ifdef _7ZIP_PPMD_SUPPPORT
@@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp)
return *p->cur++;
if (p->res == SZ_OK)
{
- size_t size = p->cur - p->begin;
+ size_t size = (size_t)(p->cur - p->begin);
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
@@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
Ppmd7_Init(&ppmd, order);
}
{
- CPpmd7z_RangeDec rc;
- Ppmd7z_RangeDec_CreateVTable(&rc);
- rc.Stream = &s.vt;
- if (!Ppmd7z_RangeDec_Init(&rc))
+ ppmd.rc.dec.Stream = &s.vt;
+ if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
res = SZ_ERROR_DATA;
- else if (s.extra)
- res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
- else
+ else if (!s.extra)
{
- SizeT i;
- for (i = 0; i < outSize; i++)
+ Byte *buf = outBuffer;
+ const Byte *lim = buf + outSize;
+ for (; buf != lim; buf++)
{
- int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
+ int sym = Ppmd7z_DecodeSymbol(&ppmd);
if (s.extra || sym < 0)
break;
- outBuffer[i] = (Byte)sym;
+ *buf = (Byte)sym;
}
- if (i != outSize)
- res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
- else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
+ if (buf != lim)
+ res = SZ_ERROR_DATA;
+ else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
+ {
+ /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
res = SZ_ERROR_DATA;
+ }
}
+ if (s.extra)
+ res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+ else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
+ res = SZ_ERROR_DATA;
}
Ppmd7_Free(&ppmd, allocMain);
return res;
@@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED;
}
+#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+#endif
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,
diff --git a/multiarc/src/formats/7z/C/7zFile.c b/multiarc/src/formats/7z/C/7zFile.c
index 8992fb1c..13d2efa4 100644..100755
--- a/multiarc/src/formats/7z/C/7zFile.c
+++ b/multiarc/src/formats/7z/C/7zFile.c
@@ -1,5 +1,5 @@
/* 7zFile.c -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-04-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,9 +7,19 @@
#ifndef USE_WINDOWS_FILE
-#ifndef UNDER_CE
-#include <errno.h>
-#endif
+ #include <errno.h>
+
+ #ifndef USE_FOPEN
+ #include <stdio.h>
+ #include <fcntl.h>
+ #ifdef _WIN32
+ #include <io.h>
+ typedef int ssize_t;
+ typedef int off_t;
+ #else
+ #include <unistd.h>
+ #endif
+ #endif
#else
@@ -23,30 +33,36 @@
And message can be "Network connection was lost"
*/
-#define kChunkSizeMax (1 << 22)
-
#endif
+#define kChunkSizeMax (1 << 22)
+
void File_Construct(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
p->handle = INVALID_HANDLE_VALUE;
- #else
+ #elif defined(USE_FOPEN)
p->file = NULL;
+ #else
+ p->fd = -1;
#endif
}
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
+
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
{
#ifdef USE_WINDOWS_FILE
+
p->handle = CreateFileA(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
- #else
+
+ #elif defined(USE_FOPEN)
+
p->file = fopen(name, writeMode ? "wb+" : "rb");
return (p->file != 0) ? 0 :
#ifdef UNDER_CE
@@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode)
#else
errno;
#endif
+
+ #else
+
+ int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY);
+ #ifdef O_BINARY
+ flags |= O_BINARY;
+ #endif
+ p->fd = open(name, flags, 0666);
+ return (p->fd != -1) ? 0 : errno;
+
#endif
}
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
-WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
+
+WRes OutFile_Open(CSzFile *p, const char *name)
+{
+ #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN)
+ return File_Open(p, name, 1);
+ #else
+ p->fd = creat(name, 0666);
+ return (p->fd != -1) ? 0 : errno;
+ #endif
+}
+
#endif
+
#ifdef USE_WINDOWS_FILE
static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
{
@@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1
WRes File_Close(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
+
if (p->handle != INVALID_HANDLE_VALUE)
{
if (!CloseHandle(p->handle))
return GetLastError();
p->handle = INVALID_HANDLE_VALUE;
}
- #else
+
+ #elif defined(USE_FOPEN)
+
if (p->file != NULL)
{
int res = fclose(p->file);
if (res != 0)
+ {
+ if (res == EOF)
+ return errno;
return res;
+ }
p->file = NULL;
}
+
+ #else
+
+ if (p->fd != -1)
+ {
+ if (close(p->fd) != 0)
+ return errno;
+ p->fd = -1;
+ }
+
#endif
+
return 0;
}
+
WRes File_Read(CSzFile *p, void *data, size_t *size)
{
size_t originalSize = *size;
+ *size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
- *size = 0;
do
{
- DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+ const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
- BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
+ const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
+ // debug : we can break here for partial reading mode
+ if (processed == 0)
+ break;
+ }
+ while (originalSize > 0);
+
+ #elif defined(USE_FOPEN)
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const size_t processed = fread(data, 1, curSize, p->file);
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= processed;
+ *size += processed;
+ if (processed != curSize)
+ return ferror(p->file);
+ // debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
- return 0;
#else
-
- *size = fread(data, 1, originalSize, p->file);
- if (*size == originalSize)
- return 0;
- return ferror(p->file);
-
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const ssize_t processed = read(p->fd, data, curSize);
+ if (processed == -1)
+ return errno;
+ if (processed == 0)
+ break;
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= (size_t)processed;
+ *size += (size_t)processed;
+ // debug : we can break here for partial reading mode
+ // break;
+ }
+ while (originalSize > 0);
+
#endif
+
+ return 0;
}
+
WRes File_Write(CSzFile *p, const void *data, size_t *size)
{
size_t originalSize = *size;
+ *size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
- *size = 0;
do
{
- DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+ const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
- BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
- data = (void *)((Byte *)data + processed);
+ const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
+ data = (const void *)((const Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
@@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
break;
}
while (originalSize > 0);
- return 0;
+
+ #elif defined(USE_FOPEN)
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const size_t processed = fwrite(data, 1, curSize, p->file);
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= processed;
+ *size += processed;
+ if (processed != curSize)
+ return ferror(p->file);
+ if (processed == 0)
+ break;
+ }
+ while (originalSize > 0);
#else
- *size = fwrite(data, 1, originalSize, p->file);
- if (*size == originalSize)
- return 0;
- return ferror(p->file);
-
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const ssize_t processed = write(p->fd, data, curSize);
+ if (processed == -1)
+ return errno;
+ if (processed == 0)
+ break;
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= (size_t)processed;
+ *size += (size_t)processed;
+ }
+ while (originalSize > 0);
+
#endif
+
+ return 0;
}
+
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
{
#ifdef USE_WINDOWS_FILE
- LARGE_INTEGER value;
DWORD moveMethod;
- value.LowPart = (DWORD)*pos;
- value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
+ UInt32 low = (UInt32)*pos;
+ LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin)
{
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
@@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
case SZ_SEEK_END: moveMethod = FILE_END; break;
default: return ERROR_INVALID_PARAMETER;
}
- value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
- if (value.LowPart == 0xFFFFFFFF)
+ low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod);
+ if (low == (UInt32)0xFFFFFFFF)
{
WRes res = GetLastError();
if (res != NO_ERROR)
return res;
}
- *pos = ((Int64)value.HighPart << 32) | value.LowPart;
+ *pos = ((Int64)high << 32) | low;
return 0;
#else
- int moveMethod;
- int res;
+ int moveMethod; // = origin;
+
switch (origin)
{
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
case SZ_SEEK_END: moveMethod = SEEK_END; break;
- default: return 1;
+ default: return EINVAL;
}
- res = fseek(p->file, (long)*pos, moveMethod);
- *pos = ftell(p->file);
- return res;
- #endif
+ #if defined(USE_FOPEN)
+ {
+ int res = fseek(p->file, (long)*pos, moveMethod);
+ if (res == -1)
+ return errno;
+ *pos = ftell(p->file);
+ if (*pos == -1)
+ return errno;
+ return 0;
+ }
+ #else
+ {
+ off_t res = lseek(p->fd, (off_t)*pos, moveMethod);
+ if (res == -1)
+ return errno;
+ *pos = res;
+ return 0;
+ }
+
+ #endif // USE_FOPEN
+ #endif // USE_WINDOWS_FILE
}
+
WRes File_GetLength(CSzFile *p, UInt64 *length)
{
#ifdef USE_WINDOWS_FILE
@@ -224,13 +355,31 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
return 0;
- #else
+ #elif defined(USE_FOPEN)
long pos = ftell(p->file);
int res = fseek(p->file, 0, SEEK_END);
*length = ftell(p->file);
fseek(p->file, pos, SEEK_SET);
return res;
+
+ #else
+
+ off_t pos;
+ *length = 0;
+ pos = lseek(p->fd, 0, SEEK_CUR);
+ if (pos != -1)
+ {
+ const off_t len2 = lseek(p->fd, 0, SEEK_END);
+ const off_t res2 = lseek(p->fd, pos, SEEK_SET);
+ if (len2 != -1)
+ {
+ *length = (UInt64)len2;
+ if (res2 != -1)
+ return 0;
+ }
+ }
+ return errno;
#endif
}
@@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
- return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
+ WRes wres = File_Read(&p->file, buf, size);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
@@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
- return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
+ WRes wres = File_Read(&p->file, buf, size);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
- return File_Seek(&p->file, pos, origin);
+ WRes wres = File_Seek(&p->file, pos, origin);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileInStream_CreateVTable(CFileInStream *p)
@@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p)
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
{
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
- File_Write(&p->file, data, &size);
+ WRes wres = File_Write(&p->file, data, &size);
+ p->wres = wres;
return size;
}
diff --git a/multiarc/src/formats/7z/C/7zFile.h b/multiarc/src/formats/7z/C/7zFile.h
index 0e792538..788abb6b 100644..100755
--- a/multiarc/src/formats/7z/C/7zFile.h
+++ b/multiarc/src/formats/7z/C/7zFile.h
@@ -1,17 +1,20 @@
/* 7zFile.h -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-15 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H
#define __7Z_FILE_H
#ifdef _WIN32
#define USE_WINDOWS_FILE
+// #include <windows.h>
#endif
#ifdef USE_WINDOWS_FILE
#include <windows.h>
#else
-#include <stdio.h>
+// note: USE_FOPEN mode is limited to 32-bit file size
+// #define USE_FOPEN
+// #include <stdio.h>
#endif
#include "7zTypes.h"
@@ -24,8 +27,10 @@ typedef struct
{
#ifdef USE_WINDOWS_FILE
HANDLE handle;
- #else
+ #elif defined(USE_FOPEN)
FILE *file;
+ #else
+ int fd;
#endif
} CSzFile;
@@ -56,6 +61,7 @@ typedef struct
{
ISeqInStream vt;
CSzFile file;
+ WRes wres;
} CFileSeqInStream;
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
@@ -65,6 +71,7 @@ typedef struct
{
ISeekInStream vt;
CSzFile file;
+ WRes wres;
} CFileInStream;
void FileInStream_CreateVTable(CFileInStream *p);
@@ -74,6 +81,7 @@ typedef struct
{
ISeqOutStream vt;
CSzFile file;
+ WRes wres;
} CFileOutStream;
void FileOutStream_CreateVTable(CFileOutStream *p);
diff --git a/multiarc/src/formats/7z/C/7zStream.c b/multiarc/src/formats/7z/C/7zStream.c
index 6b5aa162..28a14604 100644..100755
--- a/multiarc/src/formats/7z/C/7zStream.c
+++ b/multiarc/src/formats/7z/C/7zStream.c
@@ -1,5 +1,5 @@
/* 7zStream.c -- 7z Stream functions
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
- Int64 t = offset;
+ Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}
diff --git a/multiarc/src/formats/7z/C/7zTypes.h b/multiarc/src/formats/7z/C/7zTypes.h
index 65b3af63..c5065a7c 100644..100755
--- a/multiarc/src/formats/7z/C/7zTypes.h
+++ b/multiarc/src/formats/7z/C/7zTypes.h
@@ -1,11 +1,13 @@
/* 7zTypes.h -- Basic types
-2018-08-04 : Igor Pavlov : Public domain */
+2022-04-01 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
+#else
+#include <errno.h>
#endif
#include <stddef.h>
@@ -43,18 +45,76 @@ EXTERN_C_BEGIN
typedef int SRes;
+#ifdef _MSC_VER
+ #if _MSC_VER > 1200
+ #define MY_ALIGN(n) __declspec(align(n))
+ #else
+ #define MY_ALIGN(n)
+ #endif
+#else
+ #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+#endif
+
+
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
-#else
+// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+#else // _WIN32
+
+// #define ENV_HAVE_LSTAT
typedef int WRes;
-#define MY__FACILITY_WIN32 7
-#define MY__FACILITY__WRes MY__FACILITY_WIN32
-#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+#define MY__FACILITY_ERRNO 0x800
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_ERRNO
+
+#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+ ( (HRESULT)(x) & 0x0000FFFF) \
+ | (MY__FACILITY__WRes << 16) \
+ | (HRESULT)0x80000000 ))
+
+#define MY_SRes_HRESULT_FROM_WRes(x) \
+ ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
+
+// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
+#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
+
+/*
+#define ERROR_FILE_NOT_FOUND 2L
+#define ERROR_ACCESS_DENIED 5L
+#define ERROR_NO_MORE_FILES 18L
+#define ERROR_LOCK_VIOLATION 33L
+#define ERROR_FILE_EXISTS 80L
+#define ERROR_DISK_FULL 112L
+#define ERROR_NEGATIVE_SEEK 131L
+#define ERROR_ALREADY_EXISTS 183L
+#define ERROR_DIRECTORY 267L
+#define ERROR_TOO_MANY_POSTS 298L
+
+#define ERROR_INTERNAL_ERROR 1359L
+#define ERROR_INVALID_REPARSE_DATA 4392L
+#define ERROR_REPARSE_TAG_INVALID 4393L
+#define ERROR_REPARSE_TAG_MISMATCH 4394L
+*/
+
+// if (MY__FACILITY__WRes != FACILITY_WIN32),
+// we use FACILITY_WIN32 for COM errors:
+#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
+#define E_INVALIDARG ((HRESULT)0x80070057L)
+#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
+
+/*
+// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+*/
#endif
@@ -63,6 +123,10 @@ typedef int WRes;
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
+#ifndef RINOK_WRes
+#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
@@ -75,6 +139,12 @@ typedef int Int32;
typedef unsigned int UInt32;
#endif
+
+
+
+#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
+
+
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
@@ -128,25 +198,37 @@ typedef int BoolInt;
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
-#else
+#else // _MSC_VER
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
+ || (defined(__clang__) && (__clang_major__ >= 4)) \
+ || defined(__INTEL_COMPILER) \
+ || defined(__xlC__)
+#define MY_NO_INLINE __attribute__((noinline))
+// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
+#else
#define MY_NO_INLINE
+#endif
+
#define MY_FORCE_INLINE
-#define MY_CDECL
-#define MY_FAST_CALL
-/* inline keyword : for C++ / C99 */
-/* GCC, clang: */
-/*
-#if defined (__GNUC__) && (__GNUC__ >= 4)
-#define MY_FORCE_INLINE __attribute__((always_inline))
-#define MY_NO_INLINE __attribute__((noinline))
-#endif
-*/
+#define MY_CDECL
+#if defined(_M_IX86) \
+ || defined(__i386__)
+// #define MY_FAST_CALL __attribute__((fastcall))
+// #define MY_FAST_CALL __attribute__((cdecl))
+#define MY_FAST_CALL
+#elif defined(MY_CPU_AMD64)
+// #define MY_FAST_CALL __attribute__((ms_abi))
+#define MY_FAST_CALL
+#else
+#define MY_FAST_CALL
#endif
+#endif // _MSC_VER
+
/* The following interfaces use first parameter as pointer to structure */
@@ -335,12 +417,11 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members"
*/
-#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
-
+#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
@@ -353,6 +434,7 @@ struct ISzAlloc
*/
+#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifdef _WIN32
@@ -370,6 +452,14 @@ struct ISzAlloc
#endif
+#define k_PropVar_TimePrec_0 0
+#define k_PropVar_TimePrec_Unix 1
+#define k_PropVar_TimePrec_DOS 2
+#define k_PropVar_TimePrec_HighPrec 3
+#define k_PropVar_TimePrec_Base 16
+#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
+#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/7zVersion.h b/multiarc/src/formats/7z/C/7zVersion.h
index c176823a..49ea81dd 100644..100755
--- a/multiarc/src/formats/7z/C/7zVersion.h
+++ b/multiarc/src/formats/7z/C/7zVersion.h
@@ -1,7 +1,7 @@
-#define MY_VER_MAJOR 19
-#define MY_VER_MINOR 00
+#define MY_VER_MAJOR 22
+#define MY_VER_MINOR 01
#define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "19.00"
+#define MY_VERSION_NUMBERS "22.01"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
-#define MY_DATE "2019-02-21"
+#define MY_DATE "2022-07-15"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR
diff --git a/multiarc/src/formats/7z/C/7zVersion.rc b/multiarc/src/formats/7z/C/7zVersion.rc
index e520995d..e520995d 100644..100755
--- a/multiarc/src/formats/7z/C/7zVersion.rc
+++ b/multiarc/src/formats/7z/C/7zVersion.rc
diff --git a/multiarc/src/formats/7z/C/Aes.c b/multiarc/src/formats/7z/C/Aes.c
index 1cdd0e78..27e32e62 100644..100755
--- a/multiarc/src/formats/7z/C/Aes.c
+++ b/multiarc/src/formats/7z/C/Aes.c
@@ -1,10 +1,17 @@
/* Aes.c -- AES encryption / decryption
-2017-01-24 : Igor Pavlov : Public domain */
+2021-05-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#include "Aes.h"
#include "CpuArch.h"
+#include "Aes.h"
+
+AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+AES_CODE_FUNC g_AesCbc_Encode;
+AES_CODE_FUNC g_AesCtr_Code;
+UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
static UInt32 T[256 * 4];
static const Byte Sbox[256] = {
@@ -25,23 +32,10 @@ static const Byte Sbox[256] = {
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-AES_CODE_FUNC g_AesCbc_Encode;
-AES_CODE_FUNC g_AesCbc_Decode;
-AES_CODE_FUNC g_AesCtr_Code;
static UInt32 D[256 * 4];
static Byte InvS[256];
-static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
-
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
@@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0
#define DD(x) (D + (x << 8))
+// #define _SHOW_AES_STATUS
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #define USE_HW_AES
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_AES
+ #endif
+ #endif
+#endif
+
+#ifdef USE_HW_AES
+#ifdef _SHOW_AES_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+#else
+#define _PRF(x)
+#endif
+#endif
+
+
void AesGenTables(void)
{
unsigned i;
@@ -90,18 +114,48 @@ void AesGenTables(void)
}
}
- g_AesCbc_Encode = AesCbc_Encode;
- g_AesCbc_Decode = AesCbc_Decode;
- g_AesCtr_Code = AesCtr_Code;
+ {
+ AES_CODE_FUNC d = AesCbc_Decode;
+ #ifndef _SFX
+ AES_CODE_FUNC e = AesCbc_Encode;
+ AES_CODE_FUNC c = AesCtr_Code;
+ UInt32 flags = 0;
+ #endif
- #ifdef MY_CPU_X86_OR_AMD64
- if (CPU_Is_Aes_Supported())
+ #ifdef USE_HW_AES
+ if (CPU_IsSupported_AES())
{
- g_AesCbc_Encode = AesCbc_Encode_Intel;
- g_AesCbc_Decode = AesCbc_Decode_Intel;
- g_AesCtr_Code = AesCtr_Code_Intel;
+ // #pragma message ("AES HW")
+ _PRF(printf("\n===AES HW\n"));
+ d = AesCbc_Decode_HW;
+
+ #ifndef _SFX
+ e = AesCbc_Encode_HW;
+ c = AesCtr_Code_HW;
+ flags = k_Aes_SupportedFunctions_HW;
+ #endif
+
+ #ifdef MY_CPU_X86_OR_AMD64
+ if (CPU_IsSupported_VAES_AVX2())
+ {
+ _PRF(printf("\n===vaes avx2\n"));
+ d = AesCbc_Decode_HW_256;
+ #ifndef _SFX
+ c = AesCtr_Code_HW_256;
+ flags |= k_Aes_SupportedFunctions_HW_256;
+ #endif
+ }
+ #endif
}
#endif
+
+ g_AesCbc_Decode = d;
+ #ifndef _SFX
+ g_AesCbc_Encode = e;
+ g_AesCtr_Code = c;
+ g_Aes_SupportedFunctions_Flags = flags;
+ #endif
+ }
}
@@ -142,8 +196,11 @@ void AesGenTables(void)
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{
- unsigned i, wSize;
- wSize = keySize + 28;
+ unsigned i, m;
+ const UInt32 *wLim;
+ UInt32 t;
+ UInt32 rcon = 1;
+
keySize /= 4;
w[0] = ((UInt32)keySize / 2) + 3;
w += 4;
@@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
for (i = 0; i < keySize; i++, key += 4)
w[i] = GetUi32(key);
- for (; i < wSize; i++)
+ t = w[(size_t)keySize - 1];
+ wLim = w + (size_t)keySize * 3 + 28;
+ m = 0;
+ do
{
- UInt32 t = w[(size_t)i - 1];
- unsigned rem = i % keySize;
- if (rem == 0)
- t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
- else if (keySize > 6 && rem == 4)
+ if (m == 0)
+ {
+ t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
+ rcon <<= 1;
+ if (rcon & 0x100)
+ rcon = 0x1b;
+ m = keySize;
+ }
+ else if (m == 4 && keySize > 6)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
- w[i] = w[i - keySize] ^ t;
+ m--;
+ t ^= w[0];
+ w[keySize] = t;
}
+ while (++w != wLim);
}
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
@@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */
+// MY_FORCE_INLINE
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
FT4(0); FT4(1); FT4(2); FT4(3);
}
+MY_FORCE_INLINE
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -294,12 +363,12 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN
- *((UInt32 *)data) ^= t;
+ *((UInt32 *)(void *)data) ^= t;
#else
- data[0] ^= (t & 0xFF);
- data[1] ^= ((t >> 8) & 0xFF);
- data[2] ^= ((t >> 16) & 0xFF);
- data[3] ^= ((t >> 24));
+ data[0] = (Byte)(data[0] ^ (t & 0xFF));
+ data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF));
+ data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF));
+ data[3] = (Byte)(data[3] ^ ((t >> 24)));
#endif
}
}
diff --git a/multiarc/src/formats/7z/C/Aes.h b/multiarc/src/formats/7z/C/Aes.h
index 64979b5b..2aa22564 100644..100755
--- a/multiarc/src/formats/7z/C/Aes.h
+++ b/multiarc/src/formats/7z/C/Aes.h
@@ -1,5 +1,5 @@
/* Aes.h -- AES encryption / decryption
-2013-01-18 : Igor Pavlov : Public domain */
+2018-04-28 : Igor Pavlov : Public domain */
#ifndef __AES_H
#define __AES_H
@@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize)
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
+
/* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
-extern AES_CODE_FUNC g_AesCbc_Encode;
+
extern AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCtr_Code;
+#define k_Aes_SupportedFunctions_HW (1 << 2)
+#define k_Aes_SupportedFunctions_HW_256 (1 << 3)
+extern UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
+
+
+#define DECLARE__AES_CODE_FUNC(funcName) \
+ void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode)
+DECLARE__AES_CODE_FUNC (AesCtr_Code)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/AesOpt.c b/multiarc/src/formats/7z/C/AesOpt.c
index 9571c467..8be8ff69 100644..100755
--- a/multiarc/src/formats/7z/C/AesOpt.c
+++ b/multiarc/src/formats/7z/C/AesOpt.c
@@ -1,184 +1,776 @@
-/* AesOpt.c -- Intel's AES
-2017-06-08 : Igor Pavlov : Public domain */
+/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
-#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
-#define USE_INTEL_AES
+
+ #if defined(__clang__)
+ #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
+ #define USE_INTEL_AES
+ #define ATTRIB_AES __attribute__((__target__("aes")))
+ #if (__clang_major__ >= 8)
+ #define USE_INTEL_VAES
+ #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+ #endif
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+ #define USE_INTEL_AES
+ #ifndef __AES__
+ #define ATTRIB_AES __attribute__((__target__("aes")))
+ #endif
+ #if (__GNUC__ >= 8)
+ #define USE_INTEL_VAES
+ #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1110)
+ #define USE_INTEL_AES
+ #if (__INTEL_COMPILER >= 1900)
+ #define USE_INTEL_VAES
+ #endif
+ #endif
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
+ #define USE_INTEL_AES
+ #if (_MSC_VER >= 1910)
+ #define USE_INTEL_VAES
+ #endif
+ #endif
+ #endif
+
+#ifndef ATTRIB_AES
+ #define ATTRIB_AES
#endif
+#ifndef ATTRIB_VAES
+ #define ATTRIB_VAES
#endif
+
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
-void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#ifndef USE_INTEL_VAES
+#define AES_TYPE_keys __m128i
+#define AES_TYPE_data __m128i
+#endif
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src) dest = op(dest, src);
+#define MM_OP_m(op, src) MM_OP(op, m, src);
+
+#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src);
+#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
{
__m128i m = *p;
+ const __m128i k0 = p[2];
+ const __m128i k1 = p[3];
+ const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
for (; numBlocks != 0; numBlocks--, data++)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p + 3;
- m = _mm_xor_si128(m, *data);
- m = _mm_xor_si128(m, p[2]);
+ UInt32 r = numRounds2;
+ const __m128i *w = p + 4;
+ __m128i temp = *data;
+ MM_XOR (temp, k0);
+ MM_XOR (m, temp);
+ MM_OP_m (_mm_aesenc_si128, k1);
do
{
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
+ while (--r);
+ MM_OP_m (_mm_aesenclast_si128, w[0]);
*data = m;
}
*p = m;
}
-#define NUM_WAYS 3
-#define AES_OP_W(op, n) { \
- const __m128i t = w[n]; \
- m0 = op(m0, t); \
- m1 = op(m1, t); \
- m2 = op(m2, t); \
- }
+#define WOP_1(op)
+#define WOP_2(op) WOP_1 (op) op (m1, 1);
+#define WOP_3(op) WOP_2 (op) op (m2, 2);
+#define WOP_4(op) WOP_3 (op) op (m3, 3);
+#ifdef MY_CPU_AMD64
+#define WOP_5(op) WOP_4 (op) op (m4, 4);
+#define WOP_6(op) WOP_5 (op) op (m5, 5);
+#define WOP_7(op) WOP_6 (op) op (m6, 6);
+#define WOP_8(op) WOP_7 (op) op (m7, 7);
+#endif
+/*
+#define WOP_9(op) WOP_8 (op) op (m8, 8);
+#define WOP_10(op) WOP_9 (op) op (m9, 9);
+#define WOP_11(op) WOP_10(op) op (m10, 10);
+#define WOP_12(op) WOP_11(op) op (m11, 11);
+#define WOP_13(op) WOP_12(op) op (m12, 12);
+#define WOP_14(op) WOP_13(op) op (m13, 13);
+*/
+
+#ifdef MY_CPU_AMD64
+ #define NUM_WAYS 8
+ #define WOP_M1 WOP_8
+#else
+ #define NUM_WAYS 4
+ #define WOP_M1 WOP_4
+#endif
+
+#define WOP(op) op (m0, 0); WOP_M1(op)
+
+
+#define DECLARE_VAR(reg, ii) __m128i reg
+#define LOAD_data( reg, ii) reg = data[ii];
+#define STORE_data( reg, ii) data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define AVX__DECLARE_VAR(reg, ii) __m256i reg
+#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
+#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
+#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]));
+
+#define MM_OP_key(op, reg) MM_OP(op, reg, key);
+
+#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
+#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg)
+#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg)
+#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
+#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
+
-#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
-#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
-#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
-#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
+#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
+#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
+#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
+#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
+#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
-void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr;
+#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
+
+#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key);
+#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg);
+
+#define WOP_KEY(op, n) { \
+ const __m128i key = w[n]; \
+ WOP(op); }
+
+#define AVX__WOP_KEY(op, n) { \
+ const __m256i key = w[n]; \
+ WOP(op); }
+
+
+#define WIDE_LOOP_START \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS) \
+ { dataEnd -= NUM_WAYS; do { \
+
+
+#define WIDE_LOOP_END \
+ data += NUM_WAYS; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS; } \
+
+
+#define SINGLE_LOOP \
+ for (; data < dataEnd; data++)
+
+
+#define NUM_AES_KEYS_MAX 15
+
+#define WIDE_LOOP_START_AVX(OP) \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS * 2) \
+ { __m256i keys[NUM_AES_KEYS_MAX]; \
+ UInt32 ii; \
+ OP \
+ for (ii = 0; ii < numRounds; ii++) \
+ keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \
+ dataEnd -= NUM_WAYS * 2; do { \
+
+
+#define WIDE_LOOP_END_AVX(OP) \
+ data += NUM_WAYS * 2; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS * 2; \
+ OP \
+ _mm256_zeroupper(); \
+ } \
+
+/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
+ MSVC still can insert vzeroupper instruction. */
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
{
__m128i iv = *p;
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
+ const __m128i *dataEnd;
+ p += 2;
+
+ WIDE_LOOP_START
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m0, m1, m2;
+ const __m128i *w = wStart;
+
+ WOP (DECLARE_VAR)
+ WOP (LOAD_data);
+ WOP_KEY (AES_XOR, 1)
+
+ do
{
- const __m128i t = w[2];
- m0 = _mm_xor_si128(t, data[0]);
- m1 = _mm_xor_si128(t, data[1]);
- m2 = _mm_xor_si128(t, data[2]);
+ WOP_KEY (AES_DEC, 0)
+ w--;
}
- numRounds2--;
+ while (w != p);
+ WOP_KEY (AES_DEC_LAST, 0)
+
+ MM_XOR (m0, iv);
+ WOP_M1 (XOR_data_M1)
+ iv = data[NUM_WAYS - 1];
+ WOP (STORE_data);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const __m128i *w = wStart - 1;
+ __m128i m = _mm_xor_si128 (w[2], *data);
do
{
- AES_DEC(1)
- AES_DEC(0)
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
- while (--numRounds2 != 0);
- AES_DEC(1)
- AES_DEC_LAST(0)
+ while (w != p);
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdeclast_si128, w[0]);
+ MM_XOR (m, iv);
+ iv = *data;
+ *data = m;
+ }
+
+ p[-2] = iv;
+}
+
+
+AES_FUNC_START2 (AesCtr_Code_HW)
+{
+ __m128i ctr = *p;
+ UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
+ const __m128i *dataEnd;
+ __m128i one = _mm_cvtsi32_si128(1);
+
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const __m128i *w = p;
+ UInt32 r = numRoundsMinus2;
+ WOP (DECLARE_VAR)
+ WOP (CTR_START);
+ WOP_KEY (AES_XOR, 0)
+ w += 1;
+ do
{
- __m128i t;
- t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
- t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
- t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
+ WOP_KEY (AES_ENC, 0)
+ w += 1;
}
+ while (--r);
+ WOP_KEY (AES_ENC_LAST, 0)
+
+ WOP (CTR_END);
}
- for (; numBlocks != 0; numBlocks--, data++)
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
+ const __m128i *w = p;
+ __m128i m;
+ MM_OP (_mm_add_epi64, ctr, one);
+ m = _mm_xor_si128 (ctr, p[0]);
+ w += 1;
+ do
+ {
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
+ w += 2;
+ }
+ while (--numRounds2);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenclast_si128, w[1]);
+ MM_XOR (*data, m);
+ }
+
+ p[-2] = ctr;
+}
+
+
+
+#ifdef USE_INTEL_VAES
+
+#if defined(__clang__) && defined(_MSC_VER)
+#define __SSE4_2__
+#define __AES__
+#define __AVX__
+#define __AVX2__
+#define __VAES__
+#define __AVX512F__
+#define __AVX512VL__
+#endif
+
+#include <immintrin.h>
+
+#define VAES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_VAES \
+AES_FUNC_START (name)
+
+VAES_FUNC_START2 (AesCbc_Decode_HW_256)
+{
+ __m128i iv = *p;
+ const __m128i *dataEnd;
+ UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+ p += 2;
+
+ WIDE_LOOP_START_AVX(;)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m = _mm_xor_si128(w[2], *data);
- numRounds2--;
+ const __m256i *w = keys + numRounds - 2;
+
+ WOP (AVX__DECLARE_VAR)
+ WOP (AVX__LOAD_data);
+ AVX__WOP_KEY (AVX__AES_XOR, 1)
+
do
{
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdec_si128(m, w[0]);
+ AVX__WOP_KEY (AVX__AES_DEC, 0)
+ w--;
+ }
+ while (w != keys);
+ AVX__WOP_KEY (AVX__AES_DEC_LAST, 0)
+
+ AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]));
+ WOP_M1 (AVX__XOR_data_M1)
+ iv = data[NUM_WAYS * 2 - 1];
+ WOP (AVX__STORE_data);
+ }
+ WIDE_LOOP_END_AVX(;)
+
+ SINGLE_LOOP
+ {
+ const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3;
+ __m128i m = _mm_xor_si128 (w[2], *data);
+ do
+ {
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdeclast_si128(m, w[0]);
+ while (w != p);
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdeclast_si128, w[0]);
- m = _mm_xor_si128(m, iv);
+ MM_XOR (m, iv);
iv = *data;
*data = m;
}
- *p = iv;
+
+ p[-2] = iv;
}
-void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+
+/*
+SSE2: _mm_cvtsi32_si128 : movd
+AVX: _mm256_setr_m128i : vinsertf128
+AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm
+ _mm256_extracti128_si256 : vextracti128
+ _mm256_broadcastsi128_si256 : vbroadcasti128
+*/
+
+#define AVX__CTR_LOOP_START \
+ ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
+ two = _mm256_setr_m128i(one, one); \
+ two = _mm256_add_epi64(two, two); \
+
+// two = _mm256_setr_epi64x(2, 0, 2, 0);
+
+#define AVX__CTR_LOOP_ENC \
+ ctr = _mm256_extracti128_si256 (ctr2, 1); \
+
+VAES_FUNC_START2 (AesCtr_Code_HW_256)
{
__m128i ctr = *p;
- __m128i one;
- one.m128i_u64[0] = 1;
- one.m128i_u64[1] = 0;
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+ const __m128i *dataEnd;
+ __m128i one = _mm_cvtsi32_si128(1);
+ __m256i ctr2, two;
+ p += 2;
+
+ WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p;
- __m128i m0, m1, m2;
- {
- const __m128i t = w[2];
- ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
- }
- w += 3;
+ const __m256i *w = keys;
+ UInt32 r = numRounds - 2;
+ WOP (AVX__DECLARE_VAR)
+ AVX__WOP_KEY (AVX__CTR_START, 0);
+
+ w += 1;
do
{
- AES_ENC(0)
- AES_ENC(1)
- w += 2;
+ AVX__WOP_KEY (AVX__AES_ENC, 0)
+ w += 1;
}
- while (--numRounds2 != 0);
- AES_ENC(0)
- AES_ENC_LAST(1)
- data[0] = _mm_xor_si128(data[0], m0);
- data[1] = _mm_xor_si128(data[1], m1);
- data[2] = _mm_xor_si128(data[2], m2);
+ while (--r);
+ AVX__WOP_KEY (AVX__AES_ENC_LAST, 0)
+
+ WOP (AVX__CTR_END);
}
- for (; numBlocks != 0; numBlocks--, data++)
+ WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC)
+
+ SINGLE_LOOP
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+ UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
- ctr = _mm_add_epi64(ctr, one);
- m = _mm_xor_si128(ctr, p[2]);
- w += 3;
+ MM_OP (_mm_add_epi64, ctr, one);
+ m = _mm_xor_si128 (ctr, p[0]);
+ w += 1;
do
{
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
- *data = _mm_xor_si128(*data, m);
+ while (--numRounds2);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenclast_si128, w[1]);
+ MM_XOR (*data, m);
}
- *p = ctr;
+
+ p[-2] = ctr;
}
+#endif // USE_INTEL_VAES
+
+#else // USE_INTEL_AES
+
+/* no USE_INTEL_AES */
+
+#pragma message("AES HW_SW stub was used")
+
+#define AES_TYPE_keys UInt32
+#define AES_TYPE_data Byte
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \
+
+#define AES_COMPAT_STUB(name) \
+ AES_FUNC_START(name); \
+ AES_FUNC_START(name ## _HW) \
+ { name(p, data, numBlocks); }
+
+AES_COMPAT_STUB (AesCbc_Encode)
+AES_COMPAT_STUB (AesCbc_Decode)
+AES_COMPAT_STUB (AesCtr_Code)
+
+#endif // USE_INTEL_AES
+
+
+#ifndef USE_INTEL_VAES
+
+#pragma message("VAES HW_SW stub was used")
+
+#define VAES_COMPAT_STUB(name) \
+ void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
+ void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
+ { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
+
+VAES_COMPAT_STUB (AesCbc_Decode_HW)
+VAES_COMPAT_STUB (AesCtr_Code_HW)
+
+#endif // ! USE_INTEL_VAES
+
+
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_AES
+ #endif
+ #endif
+
+#ifdef USE_HW_AES
+
+// #pragma message("=== AES HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_AES __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#ifndef ATTRIB_AES
+ #define ATTRIB_AES
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
#else
+#include <arm_neon.h>
+#endif
+
+typedef uint8x16_t v128;
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src) dest = op(dest, src);
+#define MM_OP_m(op, src) MM_OP(op, m, src);
+#define MM_OP1_m(op) m = op(m);
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
+#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src);
+#define MM_XOR_m( src) MM_XOR(m, src);
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+#define AES_E_m(k) MM_OP_m (vaeseq_u8, k);
+#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
{
- AesCbc_Encode(p, data, numBlocks);
+ v128 m = *p;
+ const v128 k0 = p[2];
+ const v128 k1 = p[3];
+ const v128 k2 = p[4];
+ const v128 k3 = p[5];
+ const v128 k4 = p[6];
+ const v128 k5 = p[7];
+ const v128 k6 = p[8];
+ const v128 k7 = p[9];
+ const v128 k8 = p[10];
+ const v128 k9 = p[11];
+ const UInt32 numRounds2 = *(const UInt32 *)(p + 1);
+ const v128 *w = p + ((size_t)numRounds2 * 2);
+ const v128 k_z1 = w[1];
+ const v128 k_z0 = w[2];
+ for (; numBlocks != 0; numBlocks--, data++)
+ {
+ MM_XOR_m (*data);
+ AES_E_MC_m (k0)
+ AES_E_MC_m (k1)
+ AES_E_MC_m (k2)
+ AES_E_MC_m (k3)
+ AES_E_MC_m (k4)
+ AES_E_MC_m (k5)
+ AES_E_MC_m (k6)
+ AES_E_MC_m (k7)
+ AES_E_MC_m (k8)
+ if (numRounds2 >= 6)
+ {
+ AES_E_MC_m (k9)
+ AES_E_MC_m (p[12])
+ if (numRounds2 != 6)
+ {
+ AES_E_MC_m (p[13])
+ AES_E_MC_m (p[14])
+ }
+ }
+ AES_E_m (k_z1);
+ MM_XOR_m (k_z0);
+ *data = m;
+ }
+ *p = m;
}
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+#define WOP_1(op)
+#define WOP_2(op) WOP_1 (op) op (m1, 1);
+#define WOP_3(op) WOP_2 (op) op (m2, 2);
+#define WOP_4(op) WOP_3 (op) op (m3, 3);
+#define WOP_5(op) WOP_4 (op) op (m4, 4);
+#define WOP_6(op) WOP_5 (op) op (m5, 5);
+#define WOP_7(op) WOP_6 (op) op (m6, 6);
+#define WOP_8(op) WOP_7 (op) op (m7, 7);
+
+ #define NUM_WAYS 8
+ #define WOP_M1 WOP_8
+
+#define WOP(op) op (m0, 0); WOP_M1(op)
+
+#define DECLARE_VAR(reg, ii) v128 reg
+#define LOAD_data( reg, ii) reg = data[ii];
+#define STORE_data( reg, ii) data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define MM_OP_key(op, reg) MM_OP (op, reg, key);
+
+#define AES_D_m(k) MM_OP_m (vaesdq_u8, k);
+#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8);
+
+#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg)
+#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg)
+#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg)
+
+#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg)
+#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg)
+
+#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr);
+#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
+
+#define WOP_KEY(op, n) { \
+ const v128 key = w[n]; \
+ WOP(op); }
+
+#define WIDE_LOOP_START \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS) \
+ { dataEnd -= NUM_WAYS; do { \
+
+#define WIDE_LOOP_END \
+ data += NUM_WAYS; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS; } \
+
+#define SINGLE_LOOP \
+ for (; data < dataEnd; data++)
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
{
- AesCbc_Decode(p, data, numBlocks);
+ v128 iv = *p;
+ const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+ const v128 *dataEnd;
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const v128 *w = wStart;
+ WOP (DECLARE_VAR)
+ WOP (LOAD_data);
+ WOP_KEY (AES_D_IMC, 2)
+ do
+ {
+ WOP_KEY (AES_D_IMC, 1)
+ WOP_KEY (AES_D_IMC, 0)
+ w -= 2;
+ }
+ while (w != p);
+ WOP_KEY (AES_D, 1)
+ WOP_KEY (AES_XOR, 0)
+ MM_XOR (m0, iv);
+ WOP_M1 (XOR_data_M1)
+ iv = data[NUM_WAYS - 1];
+ WOP (STORE_data);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const v128 *w = wStart;
+ v128 m = *data;
+ AES_D_IMC_m (w[2])
+ do
+ {
+ AES_D_IMC_m (w[1]);
+ AES_D_IMC_m (w[0]);
+ w -= 2;
+ }
+ while (w != p);
+ AES_D_m (w[1]);
+ MM_XOR_m (w[0]);
+ MM_XOR_m (iv);
+ iv = *data;
+ *data = m;
+ }
+
+ p[-2] = iv;
}
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+AES_FUNC_START2 (AesCtr_Code_HW)
{
- AesCtr_Code(p, data, numBlocks);
+ uint64x2_t ctr = vreinterpretq_u64_u8(*p);
+ const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+ const v128 *dataEnd;
+ uint64x2_t one = vdupq_n_u64(0);
+ one = vsetq_lane_u64(1, one, 0);
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const v128 *w = p;
+ WOP (DECLARE_VAR)
+ WOP (CTR_START);
+ do
+ {
+ WOP_KEY (AES_E_MC, 0)
+ WOP_KEY (AES_E_MC, 1)
+ w += 2;
+ }
+ while (w != wEnd);
+ WOP_KEY (AES_E_MC, 0)
+ WOP_KEY (AES_E, 1)
+ WOP_KEY (AES_XOR, 2)
+ WOP (CTR_END);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const v128 *w = p;
+ v128 m;
+ CTR_START (m, 0);
+ do
+ {
+ AES_E_MC_m (w[0]);
+ AES_E_MC_m (w[1]);
+ w += 2;
+ }
+ while (w != wEnd);
+ AES_E_MC_m (w[0]);
+ AES_E_m (w[1]);
+ MM_XOR_m (w[2]);
+ CTR_END (m, 0);
+ }
+
+ p[-2] = vreinterpretq_u8_u64(ctr);
}
-#endif
+#endif // USE_HW_AES
+
+#endif // MY_CPU_ARM_OR_ARM64
diff --git a/multiarc/src/formats/7z/C/Alloc.c b/multiarc/src/formats/7z/C/Alloc.c
index bcede4b8..d1af76c5 100644..100755
--- a/multiarc/src/formats/7z/C/Alloc.c
+++ b/multiarc/src/formats/7z/C/Alloc.c
@@ -1,12 +1,12 @@
/* Alloc.c -- Memory allocation functions
-2018-04-27 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
#endif
#include <stdlib.h>
@@ -122,7 +122,6 @@ static void PrintAddr(void *p)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
-#define PrintDec(v, align)
#define PrintAddr(p)
#endif
@@ -133,10 +132,11 @@ void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
+ PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
- PRINT_ALLOC("Alloc ", g_allocCount, size, p);
+ // PRINT_ALLOC("Alloc ", g_allocCount, size, p);
return p;
}
#else
@@ -172,14 +172,20 @@ void MidFree(void *address)
VirtualFree(address, 0, MEM_RELEASE);
}
-#ifndef MEM_LARGE_PAGES
-#undef _7ZIP_LARGE_PAGES
+#ifdef _7ZIP_LARGE_PAGES
+
+#ifdef MEM_LARGE_PAGES
+ #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
+#else
+ #define MY__MEM_LARGE_PAGES 0x20000000
#endif
-#ifdef _7ZIP_LARGE_PAGES
+extern
+SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0;
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
-#endif
+typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
+
+#endif // _7ZIP_LARGE_PAGES
void SetLargePageSize()
{
@@ -214,7 +220,7 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
- void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+ void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res)
return res;
}
@@ -241,14 +247,14 @@ static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
-const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
-
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
-
+#endif
/*
uintptr_t : <stdint.h> C99 (optional)
@@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
-#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
-
-#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
+#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#define USE_posix_memalign
#endif
+#ifndef USE_posix_memalign
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+#endif
+
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),
diff --git a/multiarc/src/formats/7z/C/Alloc.h b/multiarc/src/formats/7z/C/Alloc.h
index 64823764..3be2041e 100644..100755
--- a/multiarc/src/formats/7z/C/Alloc.h
+++ b/multiarc/src/formats/7z/C/Alloc.h
@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
-2018-02-19 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
@@ -13,7 +13,7 @@ void MyFree(void *address);
#ifdef _WIN32
-void SetLargePageSize();
+void SetLargePageSize(void);
void *MidAlloc(size_t size);
void MidFree(void *address);
@@ -30,8 +30,15 @@ void BigFree(void *address);
#endif
extern const ISzAlloc g_Alloc;
+
+#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
+#else
+#define g_BigAlloc g_AlignedAlloc
+#define g_MidAlloc g_AlignedAlloc
+#endif
+
extern const ISzAlloc g_AlignedAlloc;
diff --git a/multiarc/src/formats/7z/C/Bcj2.c b/multiarc/src/formats/7z/C/Bcj2.c
index 9a0046a6..c7b95670 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2.c
+++ b/multiarc/src/formats/7z/C/Bcj2.c
@@ -1,5 +1,5 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
-2018-04-28 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
- SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
+ SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{
@@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
- num = p->destLim - dest;
+ num = (SizeT)(p->destLim - dest);
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
@@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
break;
}
- num = src - p->bufs[BCJ2_STREAM_MAIN];
+ num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
@@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
p->ip += 4;
val -= p->ip;
dest = p->dest;
- rem = p->destLim - dest;
+ rem = (SizeT)(p->destLim - dest);
if (rem < 4)
{
diff --git a/multiarc/src/formats/7z/C/Bcj2.h b/multiarc/src/formats/7z/C/Bcj2.h
index 8824080a..8824080a 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2.h
+++ b/multiarc/src/formats/7z/C/Bcj2.h
diff --git a/multiarc/src/formats/7z/C/Bcj2Enc.c b/multiarc/src/formats/7z/C/Bcj2Enc.c
index bfbeb8e4..682362a1 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2Enc.c
+++ b/multiarc/src/formats/7z/C/Bcj2Enc.c
@@ -1,5 +1,5 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
const Byte *src = p->src;
const Byte *srcLim;
Byte *dest;
- SizeT num = p->srcLim - src;
+ SizeT num = (SizeT)(p->srcLim - src);
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
@@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
dest = p->bufs[BCJ2_STREAM_MAIN];
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
{
- num = p->lims[BCJ2_STREAM_MAIN] - dest;
+ num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
@@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
break;
}
- num = src - p->src;
+ num = (SizeT)(src - p->src);
if (src == srcLim)
{
diff --git a/multiarc/src/formats/7z/C/Blake2.h b/multiarc/src/formats/7z/C/Blake2.h
index 14f3cb64..14f3cb64 100644..100755
--- a/multiarc/src/formats/7z/C/Blake2.h
+++ b/multiarc/src/formats/7z/C/Blake2.h
diff --git a/multiarc/src/formats/7z/C/Blake2s.c b/multiarc/src/formats/7z/C/Blake2s.c
index 6527415e..3c56a8b8 100644..100755
--- a/multiarc/src/formats/7z/C/Blake2s.c
+++ b/multiarc/src/formats/7z/C/Blake2s.c
@@ -1,5 +1,5 @@
/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash
-2015-06-30 : Igor Pavlov : Public domain
+2021-02-09 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#include <string.h>
@@ -34,7 +34,7 @@ static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] =
};
-void Blake2s_Init0(CBlake2s *p)
+static void Blake2s_Init0(CBlake2s *p)
{
unsigned i;
for (i = 0; i < 8; i++)
diff --git a/multiarc/src/formats/7z/C/Bra.c b/multiarc/src/formats/7z/C/Bra.c
index aed17e33..3b854d9c 100644..100755
--- a/multiarc/src/formats/7z/C/Bra.c
+++ b/multiarc/src/formats/7z/C/Bra.c
@@ -1,5 +1,5 @@
/* Bra.c -- Converters for RISC code
-2017-04-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
- return p - data;
+ return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
- return p - data;
+ return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
@@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;
diff --git a/multiarc/src/formats/7z/C/Bra.h b/multiarc/src/formats/7z/C/Bra.h
index 855e37a6..855e37a6 100644..100755
--- a/multiarc/src/formats/7z/C/Bra.h
+++ b/multiarc/src/formats/7z/C/Bra.h
diff --git a/multiarc/src/formats/7z/C/Bra86.c b/multiarc/src/formats/7z/C/Bra86.c
index 93ed4d76..10a0fbd1 100644..100755
--- a/multiarc/src/formats/7z/C/Bra86.c
+++ b/multiarc/src/formats/7z/C/Bra86.c
@@ -1,5 +1,5 @@
/* Bra86.c -- Converter for x86 code (BCJ)
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding
break;
{
- SizeT d = (SizeT)(p - data - pos);
+ SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{
diff --git a/multiarc/src/formats/7z/C/BraIA64.c b/multiarc/src/formats/7z/C/BraIA64.c
index d1dbc62c..d1dbc62c 100644..100755
--- a/multiarc/src/formats/7z/C/BraIA64.c
+++ b/multiarc/src/formats/7z/C/BraIA64.c
diff --git a/multiarc/src/formats/7z/C/BwtSort.c b/multiarc/src/formats/7z/C/BwtSort.c
index cc2f4b29..3eb57efa 100644..100755
--- a/multiarc/src/formats/7z/C/BwtSort.c
+++ b/multiarc/src/formats/7z/C/BwtSort.c
@@ -1,5 +1,5 @@
/* BwtSort.c -- BWT block sorting
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -60,7 +60,7 @@ SortGroup - is recursive Range-Sort function with HeapSort optimization for smal
returns: 1 - if there are groups, 0 - no more groups
*/
-UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
+static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
@@ -116,7 +116,7 @@ UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 group
}
HeapSort(temp, groupSize);
- mask = ((1 << NumRefBits) - 1);
+ mask = (((UInt32)1 << NumRefBits) - 1);
thereAreGroups = 0;
group = groupOffset;
diff --git a/multiarc/src/formats/7z/C/BwtSort.h b/multiarc/src/formats/7z/C/BwtSort.h
index 7e989a99..7e989a99 100644..100755
--- a/multiarc/src/formats/7z/C/BwtSort.h
+++ b/multiarc/src/formats/7z/C/BwtSort.h
diff --git a/multiarc/src/formats/7z/C/Compiler.h b/multiarc/src/formats/7z/C/Compiler.h
index 0cc409d8..a9816fa5 100644..100755
--- a/multiarc/src/formats/7z/C/Compiler.h
+++ b/multiarc/src/formats/7z/C/Compiler.h
@@ -1,9 +1,13 @@
/* Compiler.h
-2017-04-03 : Igor Pavlov : Public domain */
+2021-01-05 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
+ #ifdef __clang__
+ #pragma clang diagnostic ignored "-Wunused-private-field"
+ #endif
+
#ifdef _MSC_VER
#ifdef UNDER_CE
@@ -25,6 +29,12 @@
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
+ #ifdef __clang__
+ #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
+ // #pragma clang diagnostic ignored "-Wreserved-id-macro"
+ #endif
+
#endif
#define UNUSED_VAR(x) (void)x;
diff --git a/multiarc/src/formats/7z/C/CpuArch.c b/multiarc/src/formats/7z/C/CpuArch.c
index 02e482e0..fa9afe39 100644..100755
--- a/multiarc/src/formats/7z/C/CpuArch.c
+++ b/multiarc/src/formats/7z/C/CpuArch.c
@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
-2018-02-18: Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag)
#define CHECK_CPUID_IS_SUPPORTED
#endif
+#ifndef USE_ASM
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1600
+ #define MY__cpuidex __cpuidex
+ #else
+
+/*
+ __cpuid (function == 4) requires subfunction number in ECX.
+ MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+ __cpuid() in new MSVC clears ECX.
+ __cpuid() in old MSVC (14.00) doesn't clear ECX
+ We still can use __cpuid for low (function) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FAST_CALL / NO_INLINE function,
+ So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+
+ DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
+*/
+
+static
+MY_NO_INLINE
+void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
+{
+ UNUSED_VAR(subFunction);
+ __cpuid(CPUInfo, function);
+}
+
+ #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
+ #pragma message("======== MY__cpuidex_HACK WAS USED ========")
+ #endif
+ #else
+ #define MY__cpuidex(info, func, func2) __cpuid(info, func)
+ #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
+ #endif
+#endif
+
+
+
+
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
@@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
#endif
"=c" (*c) ,
"=d" (*d)
- : "0" (function)) ;
+ : "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
- __cpuid(CPUInfo, function);
- *a = CPUInfo[0];
- *b = CPUInfo[1];
- *c = CPUInfo[2];
- *d = CPUInfo[3];
+
+ MY__cpuidex(CPUInfo, (int)function, 0);
+
+ *a = (UInt32)CPUInfo[0];
+ *b = (UInt32)CPUInfo[1];
+ *c = (UInt32)CPUInfo[2];
+ *d = (UInt32)CPUInfo[3];
#endif
}
@@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder()
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
-#include <windows.h>
+#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
@@ -188,13 +231,101 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#define CHECK_SYS_SSE_SUPPORT
#endif
-BoolInt CPU_Is_Aes_Supported()
+
+static UInt32 X86_CPUID_ECX_Get_Flags()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_CheckAndRead(&p))
+ return 0;
+ return p.c;
+}
+
+BoolInt CPU_IsSupported_AES()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_CheckAndRead(&p))
+ return False;
+
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ return (d[1] >> 29) & 1;
+ }
+}
+
+// #include <stdio.h>
+
+#ifdef _WIN32
+#include <Windows.h>
+#endif
+
+BoolInt CPU_IsSupported_AVX2()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+
+ #ifdef _WIN32
+ #define MY__PF_XSAVE_ENABLED 17
+ if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+ return False;
+ #endif
+
+ if (!x86cpuid_CheckAndRead(&p))
+ return False;
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5); // avx2
+ }
+}
+
+BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
+
+ #ifdef _WIN32
+ #define MY__PF_XSAVE_ENABLED 17
+ if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+ return False;
+ #endif
+
if (!x86cpuid_CheckAndRead(&p))
return False;
- return (p.c >> 25) & 1;
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5) // avx2
+ // & (d[1] >> 31) // avx512vl
+ & (d[2] >> 9); // vaes // VEX-256/EVEX
+ }
}
BoolInt CPU_IsSupported_PageGB()
@@ -215,4 +346,133 @@ BoolInt CPU_IsSupported_PageGB()
}
}
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+
+#else
+
+#if defined(__APPLE__)
+
+/*
+#include <stdio.h>
+#include <string.h>
+static void Print_sysctlbyname(const char *name)
+{
+ size_t bufSize = 256;
+ char buf[256];
+ int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
+ {
+ int i;
+ printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
+ for (i = 0; i < 20; i++)
+ printf(" %2x", (unsigned)(Byte)buf[i]);
+
+ }
+}
+*/
+
+static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
+{
+ UInt32 val = 0;
+ if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+ return 1;
+ return 0;
+}
+
+ /*
+ Print_sysctlbyname("hw.pagesize");
+ Print_sysctlbyname("machdep.cpu.brand_string");
+ */
+
+BoolInt CPU_IsSupported_CRC32(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+}
+
+BoolInt CPU_IsSupported_NEON(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
+}
+
+#ifdef MY_CPU_ARM64
+#define APPLE_CRYPTO_SUPPORT_VAL 1
+#else
+#define APPLE_CRYPTO_SUPPORT_VAL 0
+#endif
+
+BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+
+
+#else // __APPLE__
+
+#include <sys/auxv.h>
+
+#define USE_HWCAP
+
+#ifdef USE_HWCAP
+
+#include <asm/hwcap.h>
+
+ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+ BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
+
+#ifdef MY_CPU_ARM64
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ MY_HWCAP_CHECK_FUNC_2(name, name)
+ MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+// MY_HWCAP_CHECK_FUNC (ASIMD)
+#elif defined(MY_CPU_ARM)
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
+ MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
+#endif
+
+#else // USE_HWCAP
+
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return 0; }
+ MY_HWCAP_CHECK_FUNC(NEON)
+
+#endif // USE_HWCAP
+
+MY_HWCAP_CHECK_FUNC (CRC32)
+MY_HWCAP_CHECK_FUNC (SHA1)
+MY_HWCAP_CHECK_FUNC (SHA2)
+MY_HWCAP_CHECK_FUNC (AES)
+
+#endif // __APPLE__
+#endif // _WIN32
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+
+#ifdef __APPLE__
+
+#include <sys/sysctl.h>
+
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+{
+ return sysctlbyname(name, buf, bufSize, NULL, 0);
+}
+
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+{
+ size_t bufSize = sizeof(*val);
+ int res = My_sysctlbyname_Get(name, val, &bufSize);
+ if (res == 0 && bufSize != sizeof(*val))
+ return EFAULT;
+ return res;
+}
+
#endif
diff --git a/multiarc/src/formats/7z/C/CpuArch.h b/multiarc/src/formats/7z/C/CpuArch.h
index bd429388..4856fbb1 100644..100755
--- a/multiarc/src/formats/7z/C/CpuArch.h
+++ b/multiarc/src/formats/7z/C/CpuArch.h
@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
-2018-02-18 : Igor Pavlov : Public domain */
+2022-07-15 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+
+MY_CPU_64BIT means that processor can work with 64-bit registers.
+ MY_CPU_64BIT can be used to select fast code branch
+ MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
@@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
+ #define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
+ #define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
@@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
- #define MY_CPU_32BIT
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
- #define MY_CPU_NAME "arm"
- #define MY_CPU_32BIT
+
+ #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+ #define MY_CPU_NAME "armt"
+ #else
+ #define MY_CPU_NAME "arm"
+ #endif
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -84,26 +97,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#if defined(__ppc64__) \
- || defined(__powerpc64__)
+ || defined(__powerpc64__) \
+ || defined(__ppc__) \
+ || defined(__powerpc__) \
+ || defined(__PPC__) \
+ || defined(_POWER)
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(_LP64) \
+ || defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
+ #define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
-#elif defined(__ppc__) \
- || defined(__powerpc__)
+#else
#define MY_CPU_NAME "ppc"
- #define MY_CPU_32BIT
+ #define MY_CPU_SIZEOF_POINTER 4
+ /* #define MY_CPU_32BIT */
+#endif
#endif
-#if defined(__sparc64__)
- #define MY_CPU_NAME "sparc64"
- #define MY_CPU_64BIT
-#elif defined(__sparc__)
- #define MY_CPU_NAME "sparc"
- /* #define MY_CPU_32BIT */
+#if defined(__riscv) \
+ || defined(__riscv__)
+ #if __riscv_xlen == 32
+ #define MY_CPU_NAME "riscv32"
+ #elif __riscv_xlen == 64
+ #define MY_CPU_NAME "riscv64"
+ #else
+ #define MY_CPU_NAME "riscv"
+ #endif
#endif
@@ -111,6 +139,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_X86_OR_AMD64
#endif
+#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
+#define MY_CPU_ARM_OR_ARM64
+#endif
+
#ifdef _WIN32
@@ -170,6 +202,40 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#error Stop_Compiling_Bad_32_64_BIT
#endif
+#ifdef __SIZEOF_POINTER__
+ #ifdef MY_CPU_SIZEOF_POINTER
+ #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+ #endif
+ #else
+ #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
+ #endif
+#endif
+
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+#if defined (_LP64)
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+#endif
+#endif
+
+#ifdef _MSC_VER
+ #if _MSC_VER >= 1300
+ #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
+ #define MY_CPU_pragma_pop __pragma(pack(pop))
+ #else
+ #define MY_CPU_pragma_pack_push_1
+ #define MY_CPU_pragma_pop
+ #endif
+#else
+ #ifdef __xlC__
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
+ #define MY_CPU_pragma_pop _Pragma("pack()")
+ #else
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
+ #define MY_CPU_pragma_pop _Pragma("pack(pop)")
+ #endif
+#endif
+
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
@@ -189,8 +255,12 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
- || defined(MY_CPU_ARM64) \
- || defined(__ARM_FEATURE_UNALIGNED)
+ || defined(MY_CPU_ARM64)
+ #define MY_CPU_LE_UNALIGN
+ #define MY_CPU_LE_UNALIGN_64
+ #elif defined(__ARM_FEATURE_UNALIGNED)
+ /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
+ So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
@@ -200,11 +270,15 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#endif
-#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
-#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
-#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#ifdef MY_CPU_LE_UNALIGN_64
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+#endif
#else
@@ -218,8 +292,6 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
-#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
-
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
@@ -230,19 +302,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
+#endif
+
+
+#ifndef MY_CPU_LE_UNALIGN_64
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
+
+
+
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
#define MY__has_builtin(x) 0
#endif
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
@@ -253,8 +335,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
@@ -262,9 +344,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
@@ -325,10 +407,37 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
-BoolInt CPU_Is_InOrder();
-BoolInt CPU_Is_Aes_Supported();
-BoolInt CPU_IsSupported_PageGB();
+BoolInt CPU_Is_InOrder(void);
+
+BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX2(void);
+BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_SSSE3(void);
+BoolInt CPU_IsSupported_SSE41(void);
+BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_PageGB(void);
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+BoolInt CPU_IsSupported_CRC32(void);
+BoolInt CPU_IsSupported_NEON(void);
+
+#if defined(_WIN32)
+BoolInt CPU_IsSupported_CRYPTO(void);
+#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
+#else
+BoolInt CPU_IsSupported_SHA1(void);
+BoolInt CPU_IsSupported_SHA2(void);
+BoolInt CPU_IsSupported_AES(void);
+#endif
+
+#endif
+#if defined(__APPLE__)
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Delta.c b/multiarc/src/formats/7z/C/Delta.c
index e3edd21e..c4a4499f 100644..100755
--- a/multiarc/src/formats/7z/C/Delta.c
+++ b/multiarc/src/formats/7z/C/Delta.c
@@ -1,5 +1,5 @@
/* Delta.c -- Delta converter
-2009-05-26 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,53 +12,158 @@ void Delta_Init(Byte *state)
state[i] = 0;
}
-static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
-{
- unsigned i;
- for (i = 0; i < size; i++)
- dest[i] = src[i];
-}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
+ Byte temp[DELTA_STATE_SIZE];
+
+ if (size == 0)
+ return;
+
+ {
+ unsigned i = 0;
+ do
+ temp[i] = state[i];
+ while (++i != delta);
+ }
+
+ if (size <= delta)
+ {
+ unsigned i = 0, k;
+ do
+ {
+ Byte b = *data;
+ *data++ = (Byte)(b - temp[i]);
+ temp[i] = b;
+ }
+ while (++i != size);
+
+ k = 0;
+
+ do
+ {
+ if (i == delta)
+ i = 0;
+ state[k] = temp[i++];
+ }
+ while (++k != delta);
+
+ return;
+ }
+
{
- SizeT i;
- for (i = 0; i < size;)
+ Byte *p = data + size - delta;
+ {
+ unsigned i = 0;
+ do
+ state[i] = *p++;
+ while (++i != delta);
+ }
{
- for (j = 0; j < delta && i < size; i++, j++)
+ const Byte *lim = data + delta;
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+
+ if (((ptrdiff_t)size + dif) & 1)
{
- Byte b = data[i];
- data[i] = (Byte)(b - buf[j]);
- buf[j] = b;
+ --p; *p = (Byte)(*p - p[dif]);
}
+
+ while (p != lim)
+ {
+ --p; *p = (Byte)(*p - p[dif]);
+ --p; *p = (Byte)(*p - p[dif]);
+ }
+
+ dif = -dif;
+
+ do
+ {
+ --p; *p = (Byte)(*p - temp[--dif]);
+ }
+ while (dif != 0);
}
}
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
}
+
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
+ unsigned i;
+ const Byte *lim;
+
+ if (size == 0)
+ return;
+
+ i = 0;
+ lim = data + size;
+
+ if (size <= delta)
+ {
+ do
+ *data = (Byte)(*data + state[i++]);
+ while (++data != lim);
+
+ for (; delta != i; state++, delta--)
+ *state = state[i];
+ data -= i;
+ }
+ else
{
- SizeT i;
- for (i = 0; i < size;)
+ /*
+ #define B(n) b ## n
+ #define I(n) Byte B(n) = state[n];
+ #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
+ #define F(n) if (data != lim) { U(n) }
+
+ if (delta == 1)
+ {
+ I(0)
+ if ((lim - data) & 1) { U(0) }
+ while (data != lim) { U(0) U(0) }
+ data -= 1;
+ }
+ else if (delta == 2)
{
- for (j = 0; j < delta && i < size; i++, j++)
+ I(0) I(1)
+ lim -= 1; while (data < lim) { U(0) U(1) }
+ lim += 1; F(0)
+ data -= 2;
+ }
+ else if (delta == 3)
+ {
+ I(0) I(1) I(2)
+ lim -= 2; while (data < lim) { U(0) U(1) U(2) }
+ lim += 2; F(0) F(1)
+ data -= 3;
+ }
+ else if (delta == 4)
+ {
+ I(0) I(1) I(2) I(3)
+ lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
+ lim += 3; F(0) F(1) F(2)
+ data -= 4;
+ }
+ else
+ */
+ {
+ do
+ {
+ *data = (Byte)(*data + state[i++]);
+ data++;
+ }
+ while (i != delta);
+
{
- buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+ do
+ *data = (Byte)(*data + data[dif]);
+ while (++data != lim);
+ data += dif;
}
}
}
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
+
+ do
+ *state++ = *data;
+ while (++data != lim);
}
diff --git a/multiarc/src/formats/7z/C/Delta.h b/multiarc/src/formats/7z/C/Delta.h
index 2fa54ad6..2fa54ad6 100644..100755
--- a/multiarc/src/formats/7z/C/Delta.h
+++ b/multiarc/src/formats/7z/C/Delta.h
diff --git a/multiarc/src/formats/7z/C/DllSecur.c b/multiarc/src/formats/7z/C/DllSecur.c
index 5ea108ab..dce0c96c 100644..100755
--- a/multiarc/src/formats/7z/C/DllSecur.c
+++ b/multiarc/src/formats/7z/C/DllSecur.c
@@ -1,16 +1,20 @@
/* DllSecur.c -- DLL loading security
-2018-02-21 : Igor Pavlov : Public domain */
+2022-07-15 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
#include "DllSecur.h"
#ifndef UNDER_CE
+#if defined(__GNUC__) && (__GNUC__ >= 8)
+ #pragma GCC diagnostic ignored "-Wcast-function-type"
+#endif
+
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
@@ -33,17 +37,19 @@ static const char * const g_Dlls =
#endif
+// #define MY_CAST_FUNC (void(*)())
+#define MY_CAST_FUNC
+
void My_SetDefaultDllDirectories()
{
#ifndef UNDER_CE
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
- GetVersionEx(&vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+ MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
@@ -66,7 +72,7 @@ void LoadSecurityDlls()
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+ MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
diff --git a/multiarc/src/formats/7z/C/DllSecur.h b/multiarc/src/formats/7z/C/DllSecur.h
index e2a049ad..64ff26cd 100644..100755
--- a/multiarc/src/formats/7z/C/DllSecur.h
+++ b/multiarc/src/formats/7z/C/DllSecur.h
@@ -10,8 +10,8 @@ EXTERN_C_BEGIN
#ifdef _WIN32
-void My_SetDefaultDllDirectories();
-void LoadSecurityDlls();
+void My_SetDefaultDllDirectories(void);
+void LoadSecurityDlls(void);
#endif
diff --git a/multiarc/src/formats/7z/C/HuffEnc.c b/multiarc/src/formats/7z/C/HuffEnc.c
index a54b3d87..f3c2996d 100644..100755
--- a/multiarc/src/formats/7z/C/HuffEnc.c
+++ b/multiarc/src/formats/7z/C/HuffEnc.c
@@ -1,5 +1,5 @@
/* HuffEnc.c -- functions for Huffman encoding
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -8,7 +8,7 @@
#define kMaxLen 16
#define NUM_BITS 10
-#define MASK ((1 << NUM_BITS) - 1)
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
#define NUM_COUNTERS 64
diff --git a/multiarc/src/formats/7z/C/HuffEnc.h b/multiarc/src/formats/7z/C/HuffEnc.h
index 92b6878d..92b6878d 100644..100755
--- a/multiarc/src/formats/7z/C/HuffEnc.h
+++ b/multiarc/src/formats/7z/C/HuffEnc.h
diff --git a/multiarc/src/formats/7z/C/LzFind.c b/multiarc/src/formats/7z/C/LzFind.c
index df55e86c..1b73c284 100644..100755
--- a/multiarc/src/formats/7z/C/LzFind.c
+++ b/multiarc/src/formats/7z/C/LzFind.c
@@ -1,20 +1,69 @@
/* LzFind.c -- Match finder for LZ algorithms
-2018-07-08 : Igor Pavlov : Public domain */
+2021-11-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
+// #include <stdio.h>
+#include "CpuArch.h"
#include "LzFind.h"
#include "LzHash.h"
+#define kBlockMoveAlign (1 << 7) // alignment for memmove()
+#define kBlockSizeAlign (1 << 16) // alignment for block allocation
+#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
+
#define kEmptyHashValue 0
-#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
-#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
-#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
-#define kMaxHistorySize ((UInt32)7 << 29)
-#define kStartMaxLen 3
+#define kMaxValForNormalize ((UInt32)0)
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
+
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#define GET_AVAIL_BYTES(p) \
+ Inline_MatchFinder_GetNumAvailableBytes(p)
+
+
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+#define kFix5HashSize kFix4HashSize
+
+/*
+ HASH2_CALC:
+ if (hv) match, then cur[0] and cur[1] also match
+*/
+#define HASH2_CALC hv = GetUi16(cur);
+
+// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
+
+/*
+ HASH3_CALC:
+ if (cur[0]) and (h2) match, then cur[1] also match
+ if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
+*/
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
+ /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
+ hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
+
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
{
@@ -25,46 +74,57 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
}
}
-/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
-static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
{
- UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
- if (p->directInput)
- {
- p->blockSize = blockSize;
- return 1;
- }
+ if (blockSize == 0)
+ return 0;
if (!p->bufferBase || p->blockSize != blockSize)
{
+ // size_t blockSizeT;
LzInWindow_Free(p, alloc);
p->blockSize = blockSize;
- p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+ // blockSizeT = blockSize;
+
+ // printf("\nblockSize = 0x%x\n", blockSize);
+ /*
+ #if defined _WIN64
+ // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
+ // we use UInt32 type for (p->blockSize), because
+ // we don't want to wrap over 4 GiB,
+ // when we use (p->streamPos - p->pos) that is UInt32.
+ if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
+ {
+ blockSizeT = ((size_t)1 << 32);
+ printf("\nchanged to blockSizeT = 4GiB\n");
+ }
+ #endif
+ */
+
+ p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+ // printf("\nbufferBase = %p\n", p->bufferBase);
+ // return 0; // for debug
}
return (p->bufferBase != NULL);
}
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
-{
- p->posLimit -= subValue;
- p->pos -= subValue;
- p->streamPos -= subValue;
-}
+MY_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p)
{
if (p->streamEndWasReached || p->result != SZ_OK)
return;
- /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+ /* We use (p->streamPos - p->pos) value.
+ (p->streamPos < p->pos) is allowed. */
if (p->directInput)
{
- UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+ UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem;
p->directInputRem -= curSize;
@@ -76,10 +136,22 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;)
{
- Byte *dest = p->buffer + (p->streamPos - p->pos);
- size_t size = (p->bufferBase + p->blockSize - dest);
+ Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+ size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
if (size == 0)
+ {
+ /* we call ReadBlock() after NeedMove() and MoveBlock().
+ NeedMove() and MoveBlock() povide more than (keepSizeAfter)
+ to the end of (blockSize).
+ So we don't execute this branch in normal code flow.
+ We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
+ */
+ // p->result = SZ_ERROR_FAIL; // we can show error here
return;
+ }
+
+ // #define kRead 3
+ // if (size > kRead) size = kRead; // for debug
p->result = ISeqInStream_Read(p->stream, dest, &size);
if (p->result != SZ_OK)
@@ -90,41 +162,52 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
return;
}
p->streamPos += (UInt32)size;
- if (p->streamPos - p->pos > p->keepSizeAfter)
+ if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
return;
+ /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
+ (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
}
+
+ // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
}
+
+
+MY_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p)
{
+ const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
+ const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+ p->buffer = p->bufferBase + keepBefore;
memmove(p->bufferBase,
- p->buffer - p->keepSizeBefore,
- (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
- p->buffer = p->bufferBase + p->keepSizeBefore;
+ p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+ keepBefore + (size_t)GET_AVAIL_BYTES(p));
}
+/* We call MoveBlock() before ReadBlock().
+ So MoveBlock() can be wasteful operation, if the whole input data
+ can fit in current block even without calling MoveBlock().
+ in important case where (dataSize <= historySize)
+ condition (p->blockSize > dataSize + p->keepSizeAfter) is met
+ So there is no MoveBlock() in that case case.
+*/
+
int MatchFinder_NeedMove(CMatchFinder *p)
{
if (p->directInput)
return 0;
- /* if (p->streamEndWasReached) return 0; */
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return 0;
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
}
void MatchFinder_ReadIfRequired(CMatchFinder *p)
{
- if (p->streamEndWasReached)
- return;
- if (p->keepSizeAfter >= p->streamPos - p->pos)
+ if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
MatchFinder_ReadBlock(p);
}
-static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
-{
- if (MatchFinder_NeedMove(p))
- MatchFinder_MoveBlock(p);
- MatchFinder_ReadBlock(p);
-}
+
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
{
@@ -175,39 +258,74 @@ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
}
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc)
+#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
+ #error Stop_Compiling_Bad_Reserve
+#endif
+
+
+
+static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
{
- UInt32 sizeReserv;
-
+ UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
+ /*
if (historySize > kMaxHistorySize)
- {
- MatchFinder_Free(p, alloc);
return 0;
- }
+ */
+ // printf("\nhistorySize == 0x%x\n", historySize);
- sizeReserv = historySize >> 1;
- if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
- else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+ if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
+ return 0;
- sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
+ const UInt32 rem = kBlockSizeMax - blockSize;
+ const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
+ if (blockSize >= kBlockSizeMax
+ || rem < kBlockSizeReserveMin) // we reject settings that will be slow
+ return 0;
+ if (reserve >= rem)
+ blockSize = kBlockSizeMax;
+ else
+ {
+ blockSize += reserve;
+ blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
+ }
+ }
+ // printf("\n LzFind_blockSize = %x\n", blockSize);
+ // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
+ return blockSize;
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ /* we need one additional byte in (p->keepSizeBefore),
+ since we use MoveBlock() after (p->pos++) and before dictionary using */
+ // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
- p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
-
- /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
-
- if (LzInWindow_Create(p, sizeReserv, alloc))
+
+ keepAddBufferAfter += matchMaxLen;
+ /* we need (p->keepSizeAfter >= p->numHashBytes) */
+ if (keepAddBufferAfter < p->numHashBytes)
+ keepAddBufferAfter = p->numHashBytes;
+ // keepAddBufferAfter -= 2; // for debug
+ p->keepSizeAfter = keepAddBufferAfter;
+
+ if (p->directInput)
+ p->blockSize = 0;
+ if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
{
- UInt32 newCyclicBufferSize = historySize + 1;
+ const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{
+ // UInt32 hs4;
p->fixedHashSize = 0;
- if (p->numHashBytes == 2)
- hs = (1 << 16) - 1;
- else
+ hs = (1 << 16) - 1;
+ if (p->numHashBytes != 2)
{
hs = historySize;
if (hs > p->expectedDataSize)
@@ -218,9 +336,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
- hs |= 0xFFFF; /* don't change it! It's required for Deflate */
- if (hs > (1 << 24))
+ if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
@@ -228,12 +346,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
+
+ // hs = ((UInt32)1 << 25) - 1; // for test
+
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
}
p->hashMask = hs;
hs++;
+
+ /*
+ hs4 = (1 << 20);
+ if (hs4 > hs)
+ hs4 = hs;
+ // hs4 = (1 << 16); // for test
+ p->hash4Mask = hs4 - 1;
+ */
+
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
- if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+ // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hs += p->fixedHashSize;
}
@@ -242,13 +378,17 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
size_t numSons;
p->historySize = historySize;
p->hashSizeSum = hs;
- p->cyclicBufferSize = newCyclicBufferSize;
+ p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize;
if (p->btMode)
numSons <<= 1;
newSize = hs + numSons;
+ // aligned size is not required here, but it can be better for some loops
+ #define NUM_REFS_ALIGN_MASK 0xF
+ newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+
if (p->hash && p->numRefs == newSize)
return 1;
@@ -268,33 +408,43 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
return 0;
}
+
static void MatchFinder_SetLimits(CMatchFinder *p)
{
- UInt32 limit = kMaxValForNormalize - p->pos;
- UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
-
- if (limit2 < limit)
- limit = limit2;
- limit2 = p->streamPos - p->pos;
+ UInt32 k;
+ UInt32 n = kMaxValForNormalize - p->pos;
+ if (n == 0)
+ n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
- if (limit2 <= p->keepSizeAfter)
+ k = p->cyclicBufferSize - p->cyclicBufferPos;
+ if (k < n)
+ n = k;
+
+ k = GET_AVAIL_BYTES(p);
{
- if (limit2 > 0)
- limit2 = 1;
+ const UInt32 ksa = p->keepSizeAfter;
+ UInt32 mm = p->matchMaxLen;
+ if (k > ksa)
+ k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
+ else if (k >= mm)
+ {
+ // the limitation for (p->lenLimit) update
+ k -= mm; // optimization : to reduce the number of checks
+ k++;
+ // k = 1; // non-optimized version : for debug
+ }
+ else
+ {
+ mm = k;
+ if (k != 0)
+ k = 1;
+ }
+ p->lenLimit = mm;
}
- else
- limit2 -= p->keepSizeAfter;
-
- if (limit2 < limit)
- limit = limit2;
+ if (k < n)
+ n = k;
- {
- UInt32 lenLimit = p->streamPos - p->pos;
- if (lenLimit > p->matchMaxLen)
- lenLimit = p->matchMaxLen;
- p->lenLimit = lenLimit;
- }
- p->posLimit = p->pos + limit;
+ p->posLimit = p->pos + n;
}
@@ -302,7 +452,7 @@ void MatchFinder_Init_LowHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash;
- size_t numItems = p->fixedHashSize;
+ const size_t numItems = p->fixedHashSize;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
@@ -312,72 +462,322 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash + p->fixedHashSize;
- size_t numItems = (size_t)p->hashMask + 1;
+ const size_t numItems = (size_t)p->hashMask + 1;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
-void MatchFinder_Init_3(CMatchFinder *p, int readData)
+void MatchFinder_Init_4(CMatchFinder *p)
{
- p->cyclicBufferPos = 0;
p->buffer = p->bufferBase;
- p->pos =
- p->streamPos = p->cyclicBufferSize;
+ {
+ /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+ the code in CMatchFinderMt expects (pos = 1) */
+ p->pos =
+ p->streamPos =
+ 1; // it's smallest optimal value. do not change it
+ // 0; // for debug
+ }
p->result = SZ_OK;
p->streamEndWasReached = 0;
-
- if (readData)
- MatchFinder_ReadBlock(p);
-
- MatchFinder_SetLimits(p);
}
+// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
void MatchFinder_Init(CMatchFinder *p)
{
MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p);
- MatchFinder_Init_3(p, True);
+ MatchFinder_Init_4(p);
+ // if (readData)
+ MatchFinder_ReadBlock(p);
+
+ /* if we init (cyclicBufferPos = pos), then we can use one variable
+ instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
+ // p->cyclicBufferPos = 0; // smallest value
+ // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
+ MatchFinder_SetLimits(p);
}
-
-static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8) \
+ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+ #define USE_SATUR_SUB_128
+ #define USE_AVX2
+ #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+ #define ATTRIB_AVX2 __attribute__((__target__("avx2")))
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1600)
+ #define USE_SATUR_SUB_128
+ #if (_MSC_VER >= 1900)
+ #define USE_AVX2
+ #include <immintrin.h> // avx
+ #endif
+ #endif
+ #endif
+
+// #elif defined(MY_CPU_ARM_OR_ARM64)
+#elif defined(MY_CPU_ARM64)
+
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8)
+ #define USE_SATUR_SUB_128
+ #ifdef MY_CPU_ARM64
+ // #define ATTRIB_SSE41 __attribute__((__target__("")))
+ #else
+ // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1910)
+ #define USE_SATUR_SUB_128
+ #endif
+ #endif
+
+ #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
+ #include <arm_neon.h>
+ #endif
+
+#endif
+
+/*
+#ifndef ATTRIB_SSE41
+ #define ATTRIB_SSE41
+#endif
+#ifndef ATTRIB_AVX2
+ #define ATTRIB_AVX2
+#endif
+*/
+
+#ifdef USE_SATUR_SUB_128
+
+// #define _SHOW_HW_STATUS
+
+#ifdef _SHOW_HW_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+_PRF(;)
+#else
+#define _PRF(x)
+#endif
+
+#ifdef MY_CPU_ARM_OR_ARM64
+
+#ifdef MY_CPU_ARM64
+// #define FORCE_SATUR_SUB_128
+#endif
+
+typedef uint32x4_t v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
+
+#else
+
+#include <smmintrin.h> // sse4.1
+
+typedef __m128i v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
+
+#endif
+
+
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_SSE41
+ATTRIB_SSE41
+#endif
+void
+MY_FAST_CALL
+LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- return (p->pos - p->historySize - 1) & kNormalizeMask;
+ v128 sub2 =
+ #ifdef MY_CPU_ARM_OR_ARM64
+ vdupq_n_u32(subValue);
+ #else
+ _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ #endif
+ do
+ {
+ SASUB_128(0)
+ SASUB_128(1)
+ SASUB_128(2)
+ SASUB_128(3)
+ items += 4 * 4;
+ }
+ while (items != lim);
}
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+
+
+#ifdef USE_AVX2
+
+#include <immintrin.h> // avx
+
+#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_AVX2
+ATTRIB_AVX2
+#endif
+void
+MY_FAST_CALL
+LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- size_t i;
- for (i = 0; i < numItems; i++)
+ __m256i sub2 = _mm256_set_epi32(
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ do
{
- UInt32 value = items[i];
- if (value <= subValue)
- value = kEmptyHashValue;
- else
- value -= subValue;
- items[i] = value;
+ SASUB_256(0)
+ SASUB_256(1)
+ items += 2 * 8;
+ }
+ while (items != lim);
+}
+#endif // USE_AVX2
+
+#ifndef FORCE_SATUR_SUB_128
+typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+ UInt32 subValue, CLzRef *items, const CLzRef *lim);
+static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+#endif // FORCE_SATUR_SUB_128
+
+#endif // USE_SATUR_SUB_128
+
+
+// kEmptyHashValue must be zero
+// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m;
+#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue;
+
+#ifdef FORCE_SATUR_SUB_128
+
+#define DEFAULT_SaturSub LzFind_SaturSub_128
+
+#else
+
+#define DEFAULT_SaturSub LzFind_SaturSub_32
+
+MY_NO_INLINE
+static
+void
+MY_FAST_CALL
+LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ do
+ {
+ UInt32 v;
+ SASUB_32(0)
+ SASUB_32(1)
+ SASUB_32(2)
+ SASUB_32(3)
+ SASUB_32(4)
+ SASUB_32(5)
+ SASUB_32(6)
+ SASUB_32(7)
+ items += 8;
}
+ while (items != lim);
}
-static void MatchFinder_Normalize(CMatchFinder *p)
+#endif
+
+
+MY_NO_INLINE
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{
- UInt32 subValue = MatchFinder_GetSubValue(p);
- MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
- MatchFinder_ReduceOffsets(p, subValue);
+ #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
+
+ CLzRef *lim;
+
+ for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
+
+ {
+ #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
+ lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
+ numItems &= K_NORM_ALIGN_MASK;
+ if (items != lim)
+ {
+ #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
+ if (g_LzFind_SaturSub)
+ g_LzFind_SaturSub(subValue, items, lim);
+ else
+ #endif
+ DEFAULT_SaturSub(subValue, items, lim);
+ }
+ items = lim;
+ }
+
+
+ for (; numItems != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
}
+
+// call MatchFinder_CheckLimits() only after (p->pos++) update
+
MY_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p)
{
+ if (// !p->streamEndWasReached && p->result == SZ_OK &&
+ p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ {
+ // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+ }
+
if (p->pos == kMaxValForNormalize)
- MatchFinder_Normalize(p);
- if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
- MatchFinder_CheckAndMoveAndRead(p);
+ if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
+ /*
+ if we disable normalization for last bytes of data, and
+ if (data_size == 4 GiB), we don't call wastfull normalization,
+ but (pos) will be wrapped over Zero (0) in that case.
+ And we cannot resume later to normal operation
+ */
+ {
+ // MatchFinder_Normalize(p);
+ /* after normalization we need (p->pos >= p->historySize + 1); */
+ /* we can reduce subValue to aligned value, if want to keep alignment
+ of (p->pos) and (p->buffer) for speculated accesses. */
+ const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+ // const UInt32 subValue = (1 << 15); // for debug
+ // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+ size_t numSonRefs = p->cyclicBufferSize;
+ if (p->btMode)
+ numSonRefs <<= 1;
+ Inline_MatchFinder_ReduceOffsets(p, subValue);
+ MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
+ }
+
if (p->cyclicBufferPos == p->cyclicBufferSize)
p->cyclicBufferPos = 0;
+
MatchFinder_SetLimits(p);
}
@@ -386,9 +786,9 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
(lenLimit > maxLen)
*/
MY_FORCE_INLINE
-static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, unsigned maxLen)
+static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, unsigned maxLen)
{
/*
son[_cyclicBufferPos] = curMatch;
@@ -396,7 +796,7 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
{
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- return distances;
+ return d;
{
const Byte *pb = cur - delta;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
@@ -409,10 +809,10 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
if (maxLen < len)
{
maxLen = len;
- *distances++ = len;
- *distances++ = delta - 1;
+ *d++ = len;
+ *d++ = delta - 1;
if (len == lenLimit)
- return distances;
+ return d;
}
}
}
@@ -421,35 +821,41 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
const Byte *lim = cur + lenLimit;
son[_cyclicBufferPos] = curMatch;
+
do
{
- UInt32 delta = pos - curMatch;
+ UInt32 delta;
+
+ if (curMatch == 0)
+ break;
+ // if (curMatch2 >= curMatch) return NULL;
+ delta = pos - curMatch;
if (delta >= _cyclicBufferSize)
break;
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
- diff = (ptrdiff_t)0 - delta;
- if (cur[maxLen] == cur[maxLen + diff])
+ diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
{
const Byte *c = cur;
while (*c == c[diff])
{
if (++c == lim)
{
- distances[0] = (UInt32)(lim - cur);
- distances[1] = delta - 1;
- return distances + 2;
+ d[0] = (UInt32)(lim - cur);
+ d[1] = delta - 1;
+ return d + 2;
}
}
{
- unsigned len = (unsigned)(c - cur);
+ const unsigned len = (unsigned)(c - cur);
if (maxLen < len)
{
maxLen = len;
- distances[0] = (UInt32)len;
- distances[1] = delta - 1;
- distances += 2;
+ d[0] = (UInt32)len;
+ d[1] = delta - 1;
+ d += 2;
}
}
}
@@ -457,31 +863,36 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
}
while (--cutValue);
- return distances;
+ return d;
}
MY_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, UInt32 maxLen)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, UInt32 maxLen)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return distances;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = pair[0];
+ const UInt32 pair0 = pair[0];
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@@ -491,48 +902,60 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
if (maxLen < len)
{
maxLen = (UInt32)len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair0;
*ptr0 = pair[1];
- return distances;
+ return d;
}
}
}
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ // const UInt32 curMatch2 = pair[1];
+ // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+ // curMatch = curMatch2;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return d;
}
+
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (// curMatch >= pos || // failure
+ cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
@@ -554,80 +977,108 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
}
+
#define MOVE_POS \
++p->cyclicBufferPos; \
p->buffer++; \
- if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+ { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
-#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+#define MOVE_POS_RET MOVE_POS return distances;
-static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+MY_NO_INLINE
+static void MatchFinder_MovePos(CMatchFinder *p)
+{
+ /* we go here at the end of stream data, when (avail < num_hash_bytes)
+ We don't update sons[cyclicBufferPos << btMode].
+ So (sons) record will contain junk. And we cannot resume match searching
+ to normal operation, even if we will provide more input data in buffer.
+ p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
+ if (p->btMode)
+ p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
+ */
+ MOVE_POS;
+}
#define GET_MATCHES_HEADER2(minLen, ret_op) \
- unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer;
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
-#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
-#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+ distances = func(MF_PARAMS(p), \
+ distances, (UInt32)_maxLen_); MOVE_POS_RET;
+
+#define GET_MATCHES_FOOTER_BT(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+
+#define GET_MATCHES_FOOTER_HC(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
-#define GET_MATCHES_FOOTER(offset, maxLen) \
- offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
- distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
-#define SKIP_FOOTER \
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
#define UPDATE_maxLen { \
- ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); }
-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 1)
+ GET_MATCHES_FOOTER_BT(1)
}
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 2)
+ GET_MATCHES_FOOTER_BT(2)
}
-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+#define SET_mmm \
+ mmm = p->cyclicBufferSize; \
+ if (pos < mmm) \
+ mmm = pos;
+
+
+static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, d2, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(3)
@@ -643,29 +1094,32 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash[h2] = pos;
(hash + kFix3HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 2;
- offset = 0;
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
UPDATE_maxLen
distances[0] = (UInt32)maxLen;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
}
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -676,53 +1130,63 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
-
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
+ SET_mmm
+
+ maxLen = 3;
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ for (;;)
{
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
- if (offset != 0)
- {
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
- MOVE_POS_RET;
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
}
+ break;
}
- if (maxLen < 3)
- maxLen = 3;
-
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-/*
-static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -733,73 +1197,69 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ maxLen = 4;
+
+ for (;;)
{
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
+ break;
}
-
- if (maxLen < 4)
- maxLen = 4;
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-*/
-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -816,48 +1276,57 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
-
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
-
- if (offset != 0)
+ maxLen = 3;
+
+ for (;;)
{
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
+ break;
}
- if (maxLen < 3)
- maxLen = 3;
-
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(maxLen);
}
-/*
-static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -865,242 +1334,237 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
-
+
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
+
+ maxLen = 4;
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ for (;;)
{
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
+ break;
}
- if (maxLen < 4)
- maxLen = 4;
-
- offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(maxLen);
}
-*/
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances, 2) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(2)
}
+
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(2)
{
- SKIP_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
- SKIP_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
UInt32 h2;
UInt32 *hash;
- SKIP_HEADER(3)
HASH3_CALC;
hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv];
hash[h2] =
(hash + kFix3HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(4)
{
UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
-/*
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(5)
{
- UInt32 h2, h3, h4;
+ UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(5)
HASH5_CALC;
hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
+ // (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
-*/
+
+
+#define HC_SKIP_HEADER(minLen) \
+ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+ Byte *cur; \
+ UInt32 *hash; \
+ UInt32 *son; \
+ UInt32 pos = p->pos; \
+ UInt32 num2 = num; \
+ /* (p->pos == p->posLimit) is not allowed here !!! */ \
+ { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+ num -= num2; \
+ { const UInt32 cycPos = p->cyclicBufferPos; \
+ son = p->son + cycPos; \
+ p->cyclicBufferPos = cycPos + num2; } \
+ cur = p->buffer; \
+ hash = p->hash; \
+ do { \
+ UInt32 curMatch; \
+ UInt32 hv;
+
+
+#define HC_SKIP_FOOTER \
+ cur++; pos++; *son++ = curMatch; \
+ } while (--num2); \
+ p->buffer = cur; \
+ p->pos = pos; \
+ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
+ }} while(num); \
+
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
+ HC_SKIP_HEADER(4)
+
UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
- hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ (hash + kFix4HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-/*
+
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
- UInt32 h2, h3, h4;
- UInt32 *hash;
- SKIP_HEADER(5)
- HASH5_CALC;
- hash = p->hash;
- curMatch = hash + kFix5HashSize)[hv];
+ HC_SKIP_HEADER(5)
+
+ UInt32 h2, h3;
+ HASH5_CALC
+ curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-*/
+
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
- SKIP_HEADER(3)
+ HC_SKIP_HEADER(3)
+
HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ curMatch = hash[hv];
+ hash[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
if (!p->btMode)
{
- /* if (p->numHashBytes <= 4) */
+ if (p->numHashBytes <= 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
}
- /*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
}
- */
}
else if (p->numHashBytes == 2)
{
@@ -1112,16 +1576,53 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
}
- else /* if (p->numHashBytes == 4) */
+ else if (p->numHashBytes == 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
}
- /*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
}
- */
+}
+
+
+
+void LzFindPrepare()
+{
+ #ifndef FORCE_SATUR_SUB_128
+ #ifdef USE_SATUR_SUB_128
+ LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+ #ifdef MY_CPU_ARM_OR_ARM64
+ {
+ if (CPU_IsSupported_NEON())
+ {
+ // #pragma message ("=== LzFind NEON")
+ _PRF(printf("\n=== LzFind NEON\n"));
+ f = LzFind_SaturSub_128;
+ }
+ // f = 0; // for debug
+ }
+ #else // MY_CPU_ARM_OR_ARM64
+ if (CPU_IsSupported_SSE41())
+ {
+ // #pragma message ("=== LzFind SSE41")
+ _PRF(printf("\n=== LzFind SSE41\n"));
+ f = LzFind_SaturSub_128;
+
+ #ifdef USE_AVX2
+ if (CPU_IsSupported_AVX2())
+ {
+ // #pragma message ("=== LzFind AVX2")
+ _PRF(printf("\n=== LzFind AVX2\n"));
+ f = LzFind_SaturSub_256;
+ }
+ #endif
+ }
+ #endif // MY_CPU_ARM_OR_ARM64
+ g_LzFind_SaturSub = f;
+ #endif // USE_SATUR_SUB_128
+ #endif // FORCE_SATUR_SUB_128
}
diff --git a/multiarc/src/formats/7z/C/LzFind.h b/multiarc/src/formats/7z/C/LzFind.h
index 42c13be1..eea873ff 100644..100755
--- a/multiarc/src/formats/7z/C/LzFind.h
+++ b/multiarc/src/formats/7z/C/LzFind.h
@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
-2017-06-10 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@@ -15,7 +15,7 @@ typedef struct _CMatchFinder
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
- UInt32 streamPos;
+ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
@@ -51,17 +51,19 @@ typedef struct _CMatchFinder
UInt64 expectedDataSize;
} CMatchFinder;
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
+/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
+*/
int MatchFinder_NeedMove(CMatchFinder *p);
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
@@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+/*
+#define Inline_MatchFinder_InitPos(p, val) \
+ (p)->pos = (val); \
+ (p)->streamPos = (val);
+*/
+
+#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
+ (p)->pos -= (subValue); \
+ (p)->streamPos -= (subValue);
+
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@@ -91,7 +104,7 @@ Conditions:
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
-typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
@@ -101,21 +114,23 @@ typedef struct _IMatchFinder
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
-} IMatchFinder;
+} IMatchFinder2;
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
-void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void LzFindPrepare(void);
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/LzFindMt.c b/multiarc/src/formats/7z/C/LzFindMt.c
index bb0f42c3..4e67fc3f 100644..100755
--- a/multiarc/src/formats/7z/C/LzFindMt.c
+++ b/multiarc/src/formats/7z/C/LzFindMt.c
@@ -1,97 +1,215 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
-2018-12-29 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#include "LzHash.h"
+// #include <stdio.h>
+
+#include "CpuArch.h"
+#include "LzHash.h"
#include "LzFindMt.h"
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+extern UInt64 g_NumIters_Tree;
+extern UInt64 g_NumIters_Loop;
+extern UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+#define kMtHashBlockSize ((UInt32)1 << 17)
+#define kMtHashNumBlocks (1 << 1)
+
+#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
+
+#define kMtBtBlockSize ((UInt32)1 << 16)
+#define kMtBtNumBlocks (1 << 4)
+
+#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
+
+/*
+ HASH functions:
+ We use raw 8/16 bits from a[1] and a[2],
+ xored with crc(a[0]) and crc(a[3]).
+ We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
+ our crc() function provides one-to-one correspondence for low 8-bit values:
+ (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
+*/
+
+#define MF(mt) ((mt)->MatchFinder)
+#define MF_CRC (p->crc)
+
+// #define MF(mt) (&(mt)->MatchFinder)
+// #define MF_CRC (p->MatchFinder.crc)
+
+#define MT_HASH2_CALC \
+ h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+ UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+/*
+#define MT_HASH3_CALC__NO_2 { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define __MT_HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
+ // (kHash4Size - 1);
+*/
+
+
+MY_NO_INLINE
static void MtSync_Construct(CMtSync *p)
{
+ p->affinity = 0;
p->wasCreated = False;
p->csWasInitialized = False;
p->csWasEntered = False;
Thread_Construct(&p->thread);
Event_Construct(&p->canStart);
- Event_Construct(&p->wasStarted);
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
}
-static void MtSync_GetNextBlock(CMtSync *p)
+
+#define DEBUG_BUFFER_LOCK // define it to debug lock state
+
+#ifdef DEBUG_BUFFER_LOCK
+#include <stdlib.h>
+#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
+#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
+#else
+#define BUFFER_MUST_BE_LOCKED(p)
+#define BUFFER_MUST_BE_UNLOCKED(p)
+#endif
+
+#define LOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_UNLOCKED(p); \
+ CriticalSection_Enter(&(p)->cs); \
+ (p)->csWasEntered = True; }
+
+#define UNLOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_LOCKED(p); \
+ CriticalSection_Leave(&(p)->cs); \
+ (p)->csWasEntered = False; }
+
+
+MY_NO_INLINE
+static UInt32 MtSync_GetNextBlock(CMtSync *p)
{
+ UInt32 numBlocks = 0;
if (p->needStart)
{
+ BUFFER_MUST_BE_UNLOCKED(p)
p->numProcessedBlocks = 1;
p->needStart = False;
p->stopWriting = False;
p->exit = False;
- Event_Reset(&p->wasStarted);
Event_Reset(&p->wasStopped);
-
Event_Set(&p->canStart);
- Event_Wait(&p->wasStarted);
-
- // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
}
else
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
- p->numProcessedBlocks++;
+ UNLOCK_BUFFER(p)
+ // we free current block
+ numBlocks = p->numProcessedBlocks++;
Semaphore_Release1(&p->freeSemaphore);
}
+
+ // buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore);
- CriticalSection_Enter(&p->cs);
- p->csWasEntered = True;
+ LOCK_BUFFER(p);
+ return numBlocks;
}
-/* MtSync_StopWriting must be called if Writing was started */
+/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
+
+MY_NO_INLINE
static void MtSync_StopWriting(CMtSync *p)
{
- UInt32 myNumBlocks = p->numProcessedBlocks;
if (!Thread_WasCreated(&p->thread) || p->needStart)
return;
- p->stopWriting = True;
+
+ PRF(printf("\nMtSync_StopWriting %p\n", p));
+
if (p->csWasEntered)
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
+ /* we don't use buffer in this thread after StopWriting().
+ So we UNLOCK buffer.
+ And we restore default UNLOCKED state for stopped thread */
+ UNLOCK_BUFFER(p)
}
- Semaphore_Release1(&p->freeSemaphore);
-
+
+ /* We send (p->stopWriting) message and release freeSemaphore
+ to free current block.
+ So the thread will see (p->stopWriting) at some
+ iteration after Wait(freeSemaphore).
+ The thread doesn't need to fill all avail free blocks,
+ so we can get fast thread stop.
+ */
+
+ p->stopWriting = True;
+ Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
+
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
Event_Wait(&p->wasStopped);
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
+
+ /* 21.03 : we don't restore samaphore counters here.
+ We will recreate and reinit samaphores in next start */
- while (myNumBlocks++ != p->numProcessedBlocks)
- {
- Semaphore_Wait(&p->filledSemaphore);
- Semaphore_Release1(&p->freeSemaphore);
- }
p->needStart = True;
}
+
+MY_NO_INLINE
static void MtSync_Destruct(CMtSync *p)
{
+ PRF(printf("\nMtSync_Destruct %p\n", p));
+
if (Thread_WasCreated(&p->thread))
{
+ /* we want thread to be in Stopped state before sending EXIT command.
+ note: stop(btSync) will stop (htSync) also */
MtSync_StopWriting(p);
+ /* thread in Stopped state here : (p->needStart == true) */
p->exit = True;
- if (p->needStart)
- Event_Set(&p->canStart);
- Thread_Wait(&p->thread);
- Thread_Close(&p->thread);
+ // if (p->needStart) // it's (true)
+ Event_Set(&p->canStart); // we send EXIT command to thread
+ Thread_Wait_Close(&p->thread); // we wait thread finishing
}
+
if (p->csWasInitialized)
{
CriticalSection_Delete(&p->cs);
p->csWasInitialized = False;
}
+ p->csWasEntered = False;
Event_Close(&p->canStart);
- Event_Close(&p->wasStarted);
Event_Close(&p->wasStopped);
Semaphore_Close(&p->freeSemaphore);
Semaphore_Close(&p->filledSemaphore);
@@ -99,80 +217,251 @@ static void MtSync_Destruct(CMtSync *p)
p->wasCreated = False;
}
-#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
-static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
+// we want to get real system error codes here instead of SZ_ERROR_THREAD
+#define RINOK_THREAD(x) RINOK(x)
+
+
+// call it before each new file (when new starting is required):
+MY_NO_INLINE
+static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
+{
+ WRes wres;
+ // BUFFER_MUST_BE_UNLOCKED(p)
+ if (!p->needStart || p->csWasEntered)
+ return SZ_ERROR_FAIL;
+ wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
+ if (wres == 0)
+ wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
+}
+
+
+static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
+ WRes wres;
+
if (p->wasCreated)
return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs));
p->csWasInitialized = True;
+ p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
- RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
-
- RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
- RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
p->needStart = True;
-
- RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
+ p->exit = True; /* p->exit is unused before (canStart) Event.
+ But in case of some unexpected code failure we will get fast exit from thread */
+
+ // return ERROR_TOO_MANY_POSTS; // for debug
+ // return EINVAL; // for debug
+
+ if (p->affinity != 0)
+ wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
+ else
+ wres = Thread_Create(&p->thread, startAddress, obj);
+
+ RINOK_THREAD(wres);
p->wasCreated = True;
return SZ_OK;
}
-static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+
+MY_NO_INLINE
+static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
- SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
- if (res != SZ_OK)
- MtSync_Destruct(p);
- return res;
+ const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
+ if (wres == 0)
+ return 0;
+ MtSync_Destruct(p);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
}
-void MtSync_Init(CMtSync *p) { p->needStart = True; }
+
+// ---------- HASH THREAD ----------
#define kMtMaxValForNormalize 0xFFFFFFFF
+// #define kMtMaxValForNormalize ((1 << 21)) // for debug
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
-#define DEF_GetHeads2(name, v, action) \
- static void GetHeads ## name(const Byte *p, UInt32 pos, \
- UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
- { action; for (; numHeads != 0; numHeads--) { \
- const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } }
+#ifdef MY_CPU_LE_UNALIGN
+ #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
+#else
+ #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
+#endif
+
+#define GetHeads_DECL(name) \
+ static void GetHeads ## name(const Byte *p, UInt32 pos, \
+ UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
+
+#define GetHeads_LOOP(v) \
+ for (; numHeads != 0; numHeads--) { \
+ const UInt32 value = (v); \
+ p++; \
+ *heads++ = pos - hash[value]; \
+ hash[value] = pos++; }
+#define DEF_GetHeads2(name, v, action) \
+ GetHeads_DECL(name) { action \
+ GetHeads_LOOP(v) }
+
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
-DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
-DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
-DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
-DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
-/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
+DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
+DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+// BT3 is not good for crc collisions for big hashMask values.
+
+/*
+GetHeads_DECL(3b)
+{
+ UNUSED_VAR(hashMask);
+ UNUSED_VAR(crc);
+ {
+ const Byte *pLim = p + numHeads;
+ if (numHeads == 0)
+ return;
+ pLim--;
+ while (p < pLim)
+ {
+ UInt32 v1 = GetUi32(p);
+ UInt32 v0 = v1 & 0xFFFFFF;
+ UInt32 h0, h1;
+ p += 2;
+ v1 >>= 8;
+ h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
+ h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
+ heads += 2;
+ }
+ if (p == pLim)
+ {
+ UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
+ *heads = pos - hash[v0];
+ hash[v0] = pos;
+ }
+ }
+}
+*/
+
+/*
+GetHeads_DECL(4)
+{
+ unsigned sh = 0;
+ UNUSED_VAR(crc)
+ while ((hashMask & 0x80000000) == 0)
+ {
+ hashMask <<= 1;
+ sh++;
+ }
+ GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
+}
+#define GetHeads4b GetHeads4
+*/
+
+#define USE_GetHeads_LOCAL_CRC
+
+#ifdef USE_GetHeads_LOCAL_CRC
+
+GetHeads_DECL(4)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ // crc1[i] = rotlFixed(v, 8) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(4b)
+{
+ UInt32 crc0[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ crc0[i] = crc[i] & hashMask;
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
+}
+
+GetHeads_DECL(5)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ UInt32 crc2[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(5b)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
+}
+
+#else
+
+DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
+DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
+DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
+DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
+
+#endif
+
static void HashThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->hashSync;
+ PRF(printf("\nHashThreadFunc\n"));
+
for (;;)
{
- UInt32 numProcessedBlocks = 0;
+ UInt32 blockIndex = 0;
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
+ if (p->exit)
+ {
+ PRF(printf("\nHashThreadFunc : exit \n"));
+ return;
+ }
- MatchFinder_Init_HighHash(mt->MatchFinder);
+ MatchFinder_Init_HighHash(MF(mt));
for (;;)
{
- if (p->exit)
- return;
- if (p->stopWriting)
- {
- p->numProcessedBlocks = numProcessedBlocks;
- Event_Set(&p->wasStopped);
- break;
- }
+ PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
{
- CMatchFinder *mf = mt->MatchFinder;
+ CMatchFinder *mf = MF(mt);
if (MatchFinder_NeedMove(mf))
{
CriticalSection_Enter(&mt->btSync.cs);
@@ -185,194 +474,178 @@ static void HashThreadFunc(CMatchFinderMt *mt)
mt->pointerToCurPos -= offset;
mt->buffer -= offset;
}
- CriticalSection_Leave(&mt->btSync.cs);
CriticalSection_Leave(&mt->hashSync.cs);
+ CriticalSection_Leave(&mt->btSync.cs);
continue;
}
Semaphore_Wait(&p->freeSemaphore);
+ if (p->exit) // exit is unexpected here. But we check it here for some failure case
+ return;
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
+ if (p->stopWriting)
+ break;
+
MatchFinder_ReadIfRequired(mf);
- if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
- {
- UInt32 subValue = (mf->pos - mf->historySize - 1);
- MatchFinder_ReduceOffsets(mf, subValue);
- MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
- }
{
- UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- UInt32 num = mf->streamPos - mf->pos;
+ UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
+ UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
heads[0] = 2;
heads[1] = num;
+
+ /* heads[1] contains the number of avail bytes:
+ if (avail < mf->numHashBytes) :
+ {
+ it means that stream was finished
+ HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
+ HASH_THREAD doesn't stop,
+ HASH_THREAD fills only the header (2 numbers) for all next blocks:
+ {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
+ }
+ else
+ {
+ HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
+ }
+ */
+
if (num >= mf->numHashBytes)
{
num = num - mf->numHashBytes + 1;
if (num > kMtHashBlockSize - 2)
num = kMtHashBlockSize - 2;
- mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
+
+ if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
+ {
+ const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ Inline_MatchFinder_ReduceOffsets(mf, subValue);
+ MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
+ }
+
heads[0] = 2 + num;
+ mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
}
- mf->pos += num;
+
+ mf->pos += num; // wrap over zero is allowed at the end of stream
mf->buffer += num;
}
}
Semaphore_Release1(&p->filledSemaphore);
- }
- }
-}
+ } // for() processing end
-static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
-{
- MtSync_GetNextBlock(&p->hashSync);
- p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
- p->hashNumAvail = p->hashBuf[p->hashBufPos++];
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
+ } // for() thread end
}
-#define kEmptyHashValue 0
+
+
+
+// ---------- BT THREAD ----------
+
+/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
+ here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
/*
- we use size_t for _cyclicBufferPos instead of UInt32
+ we use size_t for (pos) instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
-MY_NO_INLINE
-static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
- UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
-{
- do
- {
- UInt32 *_distances = ++distances;
- UInt32 delta = *hash++;
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- UInt32 cutValue = _cutValue;
- unsigned maxLen = (unsigned)_maxLen;
-
- /*
- if (size > 1)
- {
- UInt32 delta = *hash;
- if (delta < _cyclicBufferSize)
- {
- UInt32 cyc1 = _cyclicBufferPos + 1;
- CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
- Byte b = *(cur + 1 - delta);
- _distances[0] = pair[0];
- _distances[1] = b;
- }
- }
- */
- if (cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- }
- else
- for(;;)
- {
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = *pair;
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
- if (len == lenLimit)
- {
- UInt32 pair1 = pair[1];
- *ptr1 = pair0;
- *ptr0 = pair1;
- break;
- }
- }
- }
- {
- UInt32 curMatch = pos - delta;
- // delta = pos - *pair;
- // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
- if (pb[len] < cur[len])
- {
- delta = pos - pair[1];
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- delta = pos - *pair;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- }
- }
- }
- if (--cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- break;
- }
- }
- pos++;
- _cyclicBufferPos++;
- cur++;
- {
- UInt32 num = (UInt32)(distances - _distances);
- _distances[-1] = num;
- }
- }
- while (distances < limit && --size != 0);
- *posRes = pos;
- return distances;
-}
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
#endif
-
-static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
+static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
- UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
- distances[1] = p->hashNumAvail;
+ /* GetMatchesSpec() functions don't create (len = 1)
+ in [len, dist] match pairs, if (p->numHashBytes >= 2)
+ Also we suppose here that (matchMaxLen >= 2).
+ So the following code for (reserve) is not required
+ UInt32 reserve = (p->matchMaxLen * 2);
+ const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
+ if (reserve < kNumHashBytes_Max - 1)
+ reserve = kNumHashBytes_Max - 1;
+ const UInt32 limit = kMtBtBlockSize - (reserve);
+ */
+
+ const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
+
+ d[1] = p->hashNumAvail;
+
+ if (p->failure_BT)
+ {
+ // printf("\n == 1 BtGetMatches() p->failure_BT\n");
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
- MatchFinderMt_GetNextBlock_Hash(p);
- distances[1] = numProcessed + p->hashNumAvail;
- if (p->hashNumAvail >= p->numHashBytes)
+ // MatchFinderMt_GetNextBlock_Hash(p);
+ UInt32 avail;
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
+ const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
+ const UInt32 *h = p->hashBuf + k;
+ avail = h[1];
+ p->hashBufPosLimit = k + h[0];
+ p->hashNumAvail = avail;
+ p->hashBufPos = k + 2;
+ }
+
+ {
+ /* we must prevent UInt32 overflow for avail total value,
+ if avail was increased with new hash block */
+ UInt32 availSum = numProcessed + avail;
+ if (availSum < numProcessed)
+ availSum = (UInt32)(Int32)-1;
+ d[1] = availSum;
+ }
+
+ if (avail >= p->numHashBytes)
continue;
- distances[0] = curPos + p->hashNumAvail;
- distances += curPos;
- for (; p->hashNumAvail != 0; p->hashNumAvail--)
- *distances++ = 0;
+
+ // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
+
+ /* (avail < p->numHashBytes)
+ It means that stream was finished.
+ And (avail) - is a number of remaining bytes,
+ we fill (d) for (avail) bytes for LZ_THREAD (receiver).
+ but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
+
+ /* here we suppose that we have space enough:
+ (kMtBtBlockSize - curPos >= p->hashNumAvail) */
+ p->hashNumAvail = 0;
+ d[0] = curPos + avail;
+ d += curPos;
+ for (; avail != 0; avail--)
+ *d++ = 0;
return;
}
{
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
- UInt32 lenLimit = p->matchMaxLen;
UInt32 pos = p->pos;
UInt32 cyclicBufferPos = p->cyclicBufferPos;
+ UInt32 lenLimit = p->matchMaxLen;
if (lenLimit >= p->hashNumAvail)
lenLimit = p->hashNumAvail;
{
@@ -384,10 +657,18 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
size = size2;
}
+ if (pos > (UInt32)kMtMaxValForNormalize - size)
+ {
+ const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
+ pos -= subValue;
+ p->pos = pos;
+ MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
+ }
+
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
- UInt32 *startDistances = distances + curPos;
+ UInt32 *startDistances = d + curPos;
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
startDistances + 1, p->numHashBytes - 1) - startDistances);
@@ -399,81 +680,112 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
}
#else
{
- UInt32 posRes;
- curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
- distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
- distances + limit,
- size, &posRes) - distances);
- p->hashBufPos += posRes - pos;
- cyclicBufferPos += posRes - pos;
- p->buffer += posRes - pos;
- pos = posRes;
+ UInt32 posRes = pos;
+ const UInt32 *d_end;
+ {
+ d_end = GetMatchesSpecN_2(
+ p->buffer + lenLimit - 1,
+ pos, p->buffer, p->son, p->cutValue, d + curPos,
+ p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+ d + limit, p->hashBuf + p->hashBufPos + size,
+ cyclicBufferPos, p->cyclicBufferSize,
+ &posRes);
+ }
+ {
+ if (!d_end)
+ {
+ // printf("\n == 2 BtGetMatches() p->failure_BT\n");
+ // internal data failure
+ p->failure_BT = True;
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
+ }
+ curPos = (UInt32)(d_end - d);
+ {
+ const UInt32 processed = posRes - pos;
+ pos = posRes;
+ p->hashBufPos += processed;
+ cyclicBufferPos += processed;
+ p->buffer += processed;
+ }
}
#endif
- numProcessed += pos - p->pos;
- p->hashNumAvail -= pos - p->pos;
- p->pos = pos;
+ {
+ const UInt32 processed = pos - p->pos;
+ numProcessed += processed;
+ p->hashNumAvail -= processed;
+ p->pos = pos;
+ }
if (cyclicBufferPos == p->cyclicBufferSize)
cyclicBufferPos = 0;
p->cyclicBufferPos = cyclicBufferPos;
}
}
- distances[0] = curPos;
+ d[0] = curPos;
}
+
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
{
CMtSync *sync = &p->hashSync;
+
+ BUFFER_MUST_BE_UNLOCKED(sync)
+
if (!sync->needStart)
{
- CriticalSection_Enter(&sync->cs);
- sync->csWasEntered = True;
+ LOCK_BUFFER(sync)
}
- BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
-
- if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
- {
- UInt32 subValue = p->pos - p->cyclicBufferSize;
- MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
- p->pos -= subValue;
- }
+ BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
+
+ /* We suppose that we have called GetNextBlock() from start.
+ So buffer is LOCKED */
- if (!sync->needStart)
- {
- CriticalSection_Leave(&sync->cs);
- sync->csWasEntered = False;
- }
+ UNLOCK_BUFFER(sync)
}
-void BtThreadFunc(CMatchFinderMt *mt)
+
+MY_NO_INLINE
+static void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
for (;;)
{
UInt32 blockIndex = 0;
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+
for (;;)
{
+ PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
+ /* (p->exit == true) is possible after (p->canStart) at first loop iteration
+ and is unexpected after more Wait(freeSemaphore) iterations */
if (p->exit)
return;
+
+ Semaphore_Wait(&p->freeSemaphore);
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
if (p->stopWriting)
- {
- p->numProcessedBlocks = blockIndex;
- MtSync_StopWriting(&mt->hashSync);
- Event_Set(&p->wasStopped);
break;
- }
- Semaphore_Wait(&p->freeSemaphore);
+
BtFillBlock(mt, blockIndex++);
+
Semaphore_Release1(&p->filledSemaphore);
}
+
+ // we stop HASH_THREAD here
+ MtSync_StopWriting(&mt->hashSync);
+
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
}
}
+
void MatchFinderMt_Construct(CMatchFinderMt *p)
{
p->hashBuf = NULL;
@@ -489,16 +801,39 @@ static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
- MtSync_Destruct(&p->hashSync);
+ /*
+ HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
+ So we must be sure that HASH_THREAD will not use CriticalSection(s)
+ after deleting CriticalSection here.
+
+ we call ReleaseStream(p)
+ that calls StopWriting(btSync)
+ that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
+ after StopWriting() it's safe to destruct MtSync(s) in any order */
+
+ MatchFinderMt_ReleaseStream(p);
+
MtSync_Destruct(&p->btSync);
+ MtSync_Destruct(&p->hashSync);
+
+ LOG_ITER(
+ printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
+ (UInt32)(g_NumIters_Tree / 1000),
+ (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
+ (UInt32)(g_NumIters_Loop / 1000),
+ (UInt32)(g_NumIters_Bytes / 1000)
+ ));
+
MatchFinderMt_FreeMem(p, alloc);
}
+
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
+
+static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
+static THREAD_FUNC_DECL BtThreadFunc2(void *p)
{
Byte allocaDummy[0x180];
unsigned i = 0;
@@ -509,16 +844,17 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
return 0;
}
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->historySize = historySize;
if (kMtBtBlockSize <= matchMaxLen * 4)
return SZ_ERROR_PARAM;
if (!p->hashBuf)
{
- p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
+ p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
if (!p->hashBuf)
return SZ_ERROR_MEM;
p->btBuf = p->hashBuf + kHashBufferSize;
@@ -528,253 +864,457 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM;
- RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
- RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
+ RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));
+ RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));
return SZ_OK;
}
-/* Call it after ReleaseStream / SetStream */
+
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
+{
+ RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));
+ return MtSync_Init(&p->btSync, kMtBtNumBlocks);
+}
+
+
static void MatchFinderMt_Init(CMatchFinderMt *p)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->btBufPos =
- p->btBufPosLimit = 0;
+ p->btBufPosLimit = NULL;
p->hashBufPos =
p->hashBufPosLimit = 0;
+ p->hashNumAvail = 0; // 21.03
+
+ p->failure_BT = False;
/* Init without data reading. We don't want to read data in this thread */
- MatchFinder_Init_3(mf, False);
+ MatchFinder_Init_4(mf);
+
MatchFinder_Init_LowHash(mf);
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
p->btNumAvailBytes = 0;
- p->lzPos = p->historySize + 1;
+ p->failure_LZ_BT = False;
+ // p->failure_LZ_LZ = False;
+
+ p->lzPos =
+ 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
+ // p->hash4Mask = mf->hash4Mask;
p->crc = mf->crc;
+ // memcpy(p->crc, mf->crc, sizeof(mf->crc));
p->son = mf->son;
p->matchMaxLen = mf->matchMaxLen;
p->numHashBytes = mf->numHashBytes;
- p->pos = mf->pos;
- p->buffer = mf->buffer;
- p->cyclicBufferPos = mf->cyclicBufferPos;
+
+ /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
+ // mf->streamPos = mf->pos = 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
+
+ /* we must init (p->pos = mf->pos) for BT, because
+ BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
+ p->pos = mf->pos; // do not change it
+
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
p->cyclicBufferSize = mf->cyclicBufferSize;
+ p->buffer = mf->buffer;
p->cutValue = mf->cutValue;
+ // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
}
+
/* ReleaseStream is required to finish multithreading */
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
{
+ // Sleep(1); // for debug
MtSync_StopWriting(&p->btSync);
+ // Sleep(200); // for debug
/* p->MatchFinder->ReleaseStream(); */
}
-static void MatchFinderMt_Normalize(CMatchFinderMt *p)
-{
- MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
- p->lzPos = p->historySize + 1;
-}
-static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
+MY_NO_INLINE
+static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
- UInt32 blockIndex;
- MtSync_GetNextBlock(&p->btSync);
- blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
- p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
- p->btBufPosLimit += p->btBuf[p->btBufPos++];
- p->btNumAvailBytes = p->btBuf[p->btBufPos++];
- if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
- MatchFinderMt_Normalize(p);
+ if (p->failure_LZ_BT)
+ p->btBufPos = p->failureBuf;
+ else
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
+ const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
+ {
+ const UInt32 numItems = bt[0];
+ p->btBufPosLimit = bt + numItems;
+ p->btNumAvailBytes = bt[1];
+ p->btBufPos = bt + 2;
+ if (numItems < 2 || numItems > kMtBtBlockSize)
+ {
+ p->failureBuf[0] = 0;
+ p->btBufPos = p->failureBuf;
+ p->btBufPosLimit = p->failureBuf + 1;
+ p->failure_LZ_BT = True;
+ // p->btNumAvailBytes = 0;
+ /* we don't want to decrease AvailBytes, that was load before.
+ that can be unxepected for the code that have loaded anopther value before */
+ }
+ }
+
+ if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
+ {
+ /* we don't check (lzPos) over exact avail bytes in (btBuf).
+ (fixedHashSize) is small, so normalization is fast */
+ const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ p->lzPos -= subValue;
+ MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
+ }
+ }
+ return p->btNumAvailBytes;
}
+
+
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
{
return p->pointerToCurPos;
}
+
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
+
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
{
- GET_NEXT_BLOCK_IF_REQUIRED;
- return p->btNumAvailBytes;
+ if (p->btBufPos != p->btBufPosLimit)
+ return p->btNumAvailBytes;
+ return MatchFinderMt_GetNextBlock_Bt(p);
}
-static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+
+// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
+#define CHECK_FAILURE_LZ(_match_, _pos_)
+
+static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
- UInt32 h2, curMatch2;
+ UInt32 h2, c2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH2_CALC
- curMatch2 = hash[h2];
- hash[h2] = lzPos;
+ c2 = hash[h2];
+ hash[h2] = m;
- if (curMatch2 >= matchMinPos)
- if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ if (c2 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- *distances++ = 2;
- *distances++ = lzPos - curMatch2 - 1;
+ *d++ = 2;
+ *d++ = m - c2 - 1;
}
+ }
- return distances;
+ return d;
}
-static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
- UInt32 h2, h3, curMatch2, curMatch3;
+ UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH3_CALC
- curMatch2 = hash[ h2];
- curMatch3 = (hash + kFix3HashSize)[h3];
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
- hash[ h2] = lzPos;
- (hash + kFix3HashSize)[h3] = lzPos;
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
- if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ if (c2 >= matchMinPos)
{
- distances[1] = lzPos - curMatch2 - 1;
- if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- distances[0] = 3;
- return distances + 2;
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ return d + 2;
+ }
+ d[0] = 2;
+ d += 2;
}
- distances[0] = 2;
- distances += 2;
}
- if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+ if (c3 >= matchMinPos)
{
- *distances++ = 3;
- *distances++ = lzPos - curMatch3 - 1;
+ CHECK_FAILURE_LZ(c3, m)
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
}
- return distances;
+ return d;
}
+
+#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
+
/*
-static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static
+UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
- UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ UInt32 matchMinPos;
+ UInt32 avail = p->btNumAvailBytes - 1;
+ p->btBufPos = btLim;
+
+ {
+ p->btNumAvailBytes = avail;
+
+ #define BT_HASH_BYTES_MAX 5
+
+ matchMinPos = p->lzPos;
+
+ if (len != 0)
+ matchMinPos -= bt[1];
+ else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ INCREASE_LZ_POS
+ return d;
+ }
+ else
+ {
+ const UInt32 hs = p->historySize;
+ if (matchMinPos > hs)
+ matchMinPos -= hs;
+ else
+ matchMinPos = 1;
+ }
+ }
+
+ for (;;)
+ {
+
+ UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
- MT_HASH4_CALC
-
- curMatch2 = hash[ h2];
- curMatch3 = (hash + kFix3HashSize)[h3];
- curMatch4 = (hash + kFix4HashSize)[h4];
+ UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ d += 2;
+ break;
+ }
+ // else
+ {
+ d[0] = 2;
+ d += 2;
+ }
+ }
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
+ break;
+ }
+
+ if (len != 0)
+ {
+ do
+ {
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
+ }
+ while (bt != btLim);
+ }
+ INCREASE_LZ_POS
+ return d;
+}
+*/
+
+
+static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+ UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ const UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+ // c4 = (hash + kFix4HashSize)[h4];
- hash[ h2] = lzPos;
- (hash + kFix3HashSize)[h3] = lzPos;
- (hash + kFix4HashSize)[h4] = lzPos;
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+ // (hash + kFix4HashSize)[h4] = m;
+
+ #define _USE_H2
- if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ #ifdef _USE_H2
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- distances[1] = lzPos - curMatch2 - 1;
- if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
- distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
- return distances + 2;
+ // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
+ // return d + 2;
+
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
+ {
+ d[0] = 4;
+ return d + 2;
+ }
+ d[0] = 3;
+ d += 2;
+
+ #ifdef _USE_H4
+ if (c4 >= matchMinPos)
+ if (
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
+ )
+ {
+ *d++ = 4;
+ *d++ = m - c4 - 1;
+ }
+ #endif
+ return d;
}
- distances[0] = 2;
- distances += 2;
+ d[0] = 2;
+ d += 2;
}
+ #endif
- if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
- distances[1] = lzPos - curMatch3 - 1;
- if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
+ d[1] = m - c3 - 1;
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
{
- distances[0] = 4;
- return distances + 2;
+ d[0] = 4;
+ return d + 2;
}
- distances[0] = 3;
- distances += 2;
+ d[0] = 3;
+ d += 2;
}
- if (curMatch4 >= matchMinPos)
+ #ifdef _USE_H4
+ if (c4 >= matchMinPos)
if (
- cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
- cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
- *distances++ = 4;
- *distances++ = lzPos - curMatch4 - 1;
+ *d++ = 4;
+ *d++ = m - c4 - 1;
}
+ #endif
- return distances;
+ return d;
}
-*/
-#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
-static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
+static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
- const UInt32 *btBuf = p->btBuf + p->btBufPos;
- UInt32 len = *btBuf++;
- p->btBufPos += 1 + len;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ p->btBufPos = btLim;
p->btNumAvailBytes--;
+ INCREASE_LZ_POS
{
- UInt32 i;
- for (i = 0; i < len; i += 2)
+ while (bt != btLim)
{
- UInt32 v0 = btBuf[0];
- UInt32 v1 = btBuf[1];
- btBuf += 2;
- distances[0] = v0;
- distances[1] = v1;
- distances += 2;
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
}
}
- INCREASE_LZ_POS
- return len;
+ return d;
}
-static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
-{
- const UInt32 *btBuf = p->btBuf + p->btBufPos;
- UInt32 len = *btBuf++;
- p->btBufPos += 1 + len;
+
+static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+ const UInt32 *bt = p->btBufPos;
+ UInt32 len = *bt++;
+ const UInt32 avail = p->btNumAvailBytes - 1;
+ p->btNumAvailBytes = avail;
+ p->btBufPos = bt + len;
if (len == 0)
{
- /* change for bt5 ! */
- if (p->btNumAvailBytes-- >= 4)
- len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
+ #define BT_HASH_BYTES_MAX 5
+ if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ UInt32 m = p->lzPos;
+ if (m > p->historySize)
+ m -= p->historySize;
+ else
+ m = 1;
+ d = p->MixMatchesFunc(p, m, d);
+ }
}
else
{
- /* Condition: there are matches in btBuf with length < p->numHashBytes */
- UInt32 *distances2;
- p->btNumAvailBytes--;
- distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
+ /*
+ first match pair from BinTree: (match_len, match_dist),
+ (match_len >= numHashBytes).
+ MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
+ */
+ d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
+ // if (d) // check for failure
do
{
- UInt32 v0 = btBuf[0];
- UInt32 v1 = btBuf[1];
- btBuf += 2;
- distances2[0] = v0;
- distances2[1] = v1;
- distances2 += 2;
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
}
- while ((len -= 2) != 0);
- len = (UInt32)(distances2 - (distances));
+ while (len -= 2);
}
INCREASE_LZ_POS
- return len;
+ return d;
}
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
-#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
+#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
{
@@ -803,12 +1343,16 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
}
/*
+// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
+// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
+
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
- UInt32 h2, h3, h4;
- MT_HASH4_CALC
- (hash + kFix4HashSize)[h4] =
+ UInt32 h2, h3; // h4
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ // (hash + kFix4HashSize)[h4] =
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
@@ -816,14 +1360,14 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
}
*/
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
- switch (p->MatchFinder->numHashBytes)
+ switch (MF(p)->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
@@ -832,22 +1376,25 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
- p->GetHeadsFunc = GetHeads3;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
- default:
- /* case 4: */
- p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
+ case 4:
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
+
+ // it's fast inline version of GetMatches()
+ // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
+
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
- /*
default:
- p->GetHeadsFunc = GetHeads5;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
+ vTable->Skip =
+ (Mf_Skip_Func)MatchFinderMt3_Skip;
+ // (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
- */
}
}
diff --git a/multiarc/src/formats/7z/C/LzFindMt.h b/multiarc/src/formats/7z/C/LzFindMt.h
index ef431e3f..660b7244 100644..100755
--- a/multiarc/src/formats/7z/C/LzFindMt.h
+++ b/multiarc/src/formats/7z/C/LzFindMt.h
@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
-2018-07-04 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@@ -9,31 +9,26 @@
EXTERN_C_BEGIN
-#define kMtHashBlockSize (1 << 13)
-#define kMtHashNumBlocks (1 << 3)
-#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
-
-#define kMtBtBlockSize (1 << 14)
-#define kMtBtNumBlocks (1 << 6)
-#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
-
typedef struct _CMtSync
{
+ UInt32 numProcessedBlocks;
+ CThread thread;
+ UInt64 affinity;
+
BoolInt wasCreated;
BoolInt needStart;
+ BoolInt csWasInitialized;
+ BoolInt csWasEntered;
+
BoolInt exit;
BoolInt stopWriting;
- CThread thread;
CAutoResetEvent canStart;
- CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
- BoolInt csWasInitialized;
- BoolInt csWasEntered;
CCriticalSection cs;
- UInt32 numProcessedBlocks;
+ // UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -49,18 +44,23 @@ typedef struct _CMatchFinderMt
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
- UInt32 btBufPos;
- UInt32 btBufPosLimit;
+ const UInt32 *btBufPos;
+ const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
+ // UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
-
+ UInt32 failure_LZ_BT; // failure in BT transfered to LZ
+ // UInt32 failure_LZ_LZ; // failure in LZ tables
+ UInt32 failureBuf[1];
+ // UInt32 crc[256];
+
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
@@ -70,6 +70,8 @@ typedef struct _CMatchFinderMt
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
+ UInt32 failure_BT;
+
CLzRef *son;
UInt32 matchMaxLen;
@@ -77,7 +79,7 @@ typedef struct _CMatchFinderMt
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
- UInt32 cyclicBufferSize; /* it must be historySize + 1 */
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
@@ -87,13 +89,19 @@ typedef struct _CMatchFinderMt
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
+ // CMatchFinder MatchFinder;
} CMatchFinderMt;
+// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
+
+/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/LzFindOpt.c b/multiarc/src/formats/7z/C/LzFindOpt.c
new file mode 100755
index 00000000..8ff006e0
--- /dev/null
+++ b/multiarc/src/formats/7z/C/LzFindOpt.c
@@ -0,0 +1,578 @@
+/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
+2021-07-13 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "LzFind.h"
+
+// #include "LzFindMt.h"
+
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+// #define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+UInt64 g_NumIters_Tree;
+UInt64 g_NumIters_Loop;
+UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+// ---------- BT THREAD ----------
+
+#define USE_SON_PREFETCH
+#define USE_LONG_MATCH_OPT
+
+#define kEmptyHashValue 0
+
+// #define CYC_TO_POS_OFFSET 0
+
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+/*
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
+{
+ do
+ {
+ UInt32 delta;
+ if (hash == size)
+ break;
+ delta = *hash++;
+
+ if (delta == 0 || delta > (UInt32)pos)
+ return NULL;
+
+ lenLimit++;
+
+ if (delta == (UInt32)pos)
+ {
+ CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+
+ const Byte *len0 = cur, *len1 = cur;
+ UInt32 cutValue = _cutValue;
+ const Byte *maxLen = cur + _maxLen;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ hash++;
+ pos++;
+ cur++;
+ lenLimit++;
+ {
+ CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+ #else
+ const UInt32 p0 = ptr[0 + (diff * 2)];
+ const UInt32 p1 = ptr[1 + (diff * 2)];
+ ptr[0] = p0;
+ ptr[1] = p1;
+ // ptr[0] = ptr[0 + (diff * 2)];
+ // ptr[1] = ptr[1 + (diff * 2)];
+ #endif
+ }
+ // PrintSon(son + 2, pos - 1);
+ // printf("\npos = %x delta = %x\n", pos, delta);
+ len++;
+ *d++ = 2;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ if (delta >= curMatch)
+ return NULL;
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ if (delta >= curMatch)
+ return NULL;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= pos)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
+
+/* define cbs if you use 2 functions.
+ GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
+ GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
+
+ do not define cbs if you use 1 function:
+ GetMatchesSpecN_2()
+*/
+
+// #define cbs _cyclicBufferSize
+
+/*
+ we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
+ to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
+
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ lenLimit++;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+
+ UInt32 cutValue = _cutValue;
+ const Byte *len0 = cur, *len1 = cur;
+ const Byte *maxLen = cur + _maxLen;
+
+ // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // SPEC code
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)(lenLimit - cur);
+ *d++ = delta - 1;
+ cur++;
+ lenLimit++;
+ // SPEC
+ _cyclicBufferPos++;
+ {
+ // SPEC code
+ CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
+ const CLzRef *src = dest + ((diff
+ + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
+ // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ pos++;
+ hash++;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ } // for() end for long matches
+ }
+ #endif
+
+ break; // break from TREE iterations
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= cbs)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+
+
+
+/*
+typedef UInt32 uint32plus; // size_t
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ UInt32 *_distances = ++d;
+ uint32plus len0 = 0, len1 = 0;
+ UInt32 cutValue = _cutValue;
+ uint32plus maxLen = _maxLen;
+ // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+ const Byte *pb = cur - delta;
+ uint32plus len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr0 = pair1;
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ }
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)lenLimit;
+ *d++ = delta - 1;
+ _cyclicBufferPos++;
+ {
+ CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
+ const CLzRef *src = dest + ((diff +
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ hash++;
+ pos++;
+ cur++;
+ pb++;
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta;
+ if (pb[len] < cur[len])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ {
+ if (delta >= curMatch)
+ return NULL;
+ delta = (UInt32)pos - delta;
+ if (delta >= cbs
+ // delta >= _cyclicBufferSize || delta >= pos
+ || --cutValue == 0)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
diff --git a/multiarc/src/formats/7z/C/LzHash.h b/multiarc/src/formats/7z/C/LzHash.h
index e7c94230..77b898cf 100644..100755
--- a/multiarc/src/formats/7z/C/LzHash.h
+++ b/multiarc/src/formats/7z/C/LzHash.h
@@ -1,57 +1,34 @@
/* LzHash.h -- HASH functions for LZ algorithms
-2015-04-12 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
+/*
+ (kHash2Size >= (1 << 8)) : Required
+ (kHash3Size >= (1 << 16)) : Required
+*/
+
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
-#define kHash4Size (1 << 20)
+// #define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
-#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
-
-#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
-
-#define HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
-
-#define HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
-
-#define HASH5_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- temp ^= (p->crc[cur[3]] << 5); \
- h4 = temp & (kHash4Size - 1); \
- hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
-
-/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
-
-
-#define MT_HASH2_CALC \
- h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
-
-#define MT_HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
-
-#define MT_HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+/*
+ We use up to 3 crc values for hash:
+ crc0
+ crc1 << Shift_1
+ crc2 << Shift_2
+ (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
+ Small values for Shift are not good for collision rate.
+ Big value for Shift_2 increases the minimum size
+ of hash table, that will be slow for small files.
+*/
+
+#define kLzHash_CrcShift_1 5
+#define kLzHash_CrcShift_2 10
#endif
diff --git a/multiarc/src/formats/7z/C/Lzma2Dec.c b/multiarc/src/formats/7z/C/Lzma2Dec.c
index 4e138a4a..ac970a84 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Dec.c
+++ b/multiarc/src/formats/7z/C/Lzma2Dec.c
@@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p)
LzmaDec_Init(&p->decoder);
}
-static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+// ELzma2State
+static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{
diff --git a/multiarc/src/formats/7z/C/Lzma2Dec.h b/multiarc/src/formats/7z/C/Lzma2Dec.h
index b8ddeac8..b8ddeac8 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Dec.h
+++ b/multiarc/src/formats/7z/C/Lzma2Dec.h
diff --git a/multiarc/src/formats/7z/C/Lzma2DecMt.c b/multiarc/src/formats/7z/C/Lzma2DecMt.c
index 988643d9..9f1dc52b 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2DecMt.c
+++ b/multiarc/src/formats/7z/C/Lzma2DecMt.c
@@ -1,25 +1,25 @@
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
+// #define _7ZIP_ST
+
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
+#ifndef _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
-
#define PRF_STR(s) PRF(printf("\n" s "\n"))
-#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
-
-// #define _7ZIP_ST
+#endif
#include "Alloc.h"
@@ -28,10 +28,10 @@
#ifndef _7ZIP_ST
#include "MtDec.h"
-#endif
-
#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
+#endif
+
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
{
@@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
const unsigned kNumAlignBits = 12;
const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */
t->alloc.numAlignBits = kNumAlignBits;
- t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits));
+ t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits));
t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc;
}
}
@@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
BoolInt *needContinue, BoolInt *canRecode)
{
CLzma2DecMt *me = (CLzma2DecMt *)pp;
@@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
const Byte *data = t->outBuf;
BoolInt needContinue2 = True;
+ UNUSED_VAR(src)
+ UNUSED_VAR(srcSize)
+ UNUSED_VAR(isCross)
+
PRF_STR_INT_2("Write", coderIndex, srcSize);
*needContinue = False;
*canRecode = True;
- UNUSED_VAR(src)
- UNUSED_VAR(srcSize)
if (
// t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
@@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
- p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+ p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim);
// p->readProcessed += inLim;
// inLim -= 5; p->readWasFinished = True; // for test
if (inLim == 0 || p->readRes != SZ_OK)
@@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
p->inProcessed = 0;
p->readWasFinished = False;
+ p->readRes = SZ_OK;
*isMT = False;
@@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
if (p->props.numThreads > 1)
{
- IMtDecCallback vt;
+ IMtDecCallback2 vt;
Lzma2DecMt_FreeSt(p);
@@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
*inProcessed = p->inProcessed;
// res = SZ_OK; // for test
- if (res == SZ_OK && p->readRes != SZ_OK)
+ if (res == SZ_ERROR_INPUT_EOF)
+ {
+ if (p->readRes != SZ_OK)
+ res = p->readRes;
+ }
+ else if (res == SZ_OK && p->readRes != SZ_OK)
res = p->readRes;
/*
diff --git a/multiarc/src/formats/7z/C/Lzma2DecMt.h b/multiarc/src/formats/7z/C/Lzma2DecMt.h
index 7791c310..7791c310 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2DecMt.h
+++ b/multiarc/src/formats/7z/C/Lzma2DecMt.h
diff --git a/multiarc/src/formats/7z/C/Lzma2Enc.c b/multiarc/src/formats/7z/C/Lzma2Enc.c
index 5c1ad493..e61a5dfe 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Enc.c
+++ b/multiarc/src/formats/7z/C/Lzma2Enc.c
@@ -1,5 +1,5 @@
/* Lzma2Enc.c -- LZMA2 Encoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
- t2r = (unsigned)numBlocks;
+ t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1(
{
if (outBuf)
{
- size_t destPos = *outBufSize;
+ const size_t destPos = *outBufSize;
if (destPos >= outLim)
return SZ_ERROR_OUTPUT_EOF;
- outBuf[destPos] = 0;
+ outBuf[destPos] = LZMA2_CONTROL_EOF; // 0
*outBufSize = destPos + 1;
}
else
{
- Byte b = 0;
+ const Byte b = LZMA2_CONTROL_EOF; // 0;
if (ISeqOutStream_Write(outStream, &b, 1) != 1)
return SZ_ERROR_WRITE;
}
@@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
p->outBufSize = destBlockSize;
}
- p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max;
+ p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{
SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream)
- *outBufSize = p->outBuf - outBuf;
+ *outBufSize = (size_t)(p->outBuf - outBuf);
return res;
}
}
diff --git a/multiarc/src/formats/7z/C/Lzma2Enc.h b/multiarc/src/formats/7z/C/Lzma2Enc.h
index 6a6110ff..6a6110ff 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Enc.h
+++ b/multiarc/src/formats/7z/C/Lzma2Enc.h
diff --git a/multiarc/src/formats/7z/C/Lzma86.h b/multiarc/src/formats/7z/C/Lzma86.h
index bebed5cb..bebed5cb 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86.h
+++ b/multiarc/src/formats/7z/C/Lzma86.h
diff --git a/multiarc/src/formats/7z/C/Lzma86Dec.c b/multiarc/src/formats/7z/C/Lzma86Dec.c
index 21031745..21031745 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86Dec.c
+++ b/multiarc/src/formats/7z/C/Lzma86Dec.c
diff --git a/multiarc/src/formats/7z/C/Lzma86Enc.c b/multiarc/src/formats/7z/C/Lzma86Enc.c
index 2617bab8..14fcd65c 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86Enc.c
+++ b/multiarc/src/formats/7z/C/Lzma86Enc.c
@@ -11,8 +11,6 @@
#include "Bra.h"
#include "LzmaEnc.h"
-#define SZE_OUT_OVERFLOW SZE_DATA_ERROR
-
int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode)
{
diff --git a/multiarc/src/formats/7z/C/LzmaDec.c b/multiarc/src/formats/7z/C/LzmaDec.c
index ba3e1dd5..d6742e5a 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaDec.c
+++ b/multiarc/src/formats/7z/C/LzmaDec.c
@@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -13,10 +13,12 @@
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
-#define kNumMoveBits 5
#define RC_INIT_SIZE 5
+#ifndef _LZMA_DEC_OPT
+
+#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
@@ -62,9 +64,10 @@
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+#endif // _LZMA_DEC_OPT
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
@@ -114,6 +117,9 @@
#define kMatchMinLen 2
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+#define kMatchSpecLen_Error_Data (1 << 9)
+#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
+
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
/* (probs_1664) is faster and better for code size at some platforms */
@@ -166,10 +172,12 @@
/*
p->remainLen : shows status of LZMA decoder:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
- = kMatchSpecLenStart + 1 : need init range coder
- = kMatchSpecLenStart + 2 : need init range coder and state
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+ = kMatchSpecLen_Error_Fail : Internal Code Failure
+ = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
*/
/* ---------- LZMA_DECODE_REAL ---------- */
@@ -188,23 +196,31 @@ In:
{
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
- is not END_OF_PAYALOAD_MARKER, then function returns error code.
+ is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
+ the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
}
Processing:
- first LZMA symbol will be decoded in any case
- All checks for limits are at the end of main loop,
- It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ The first LZMA symbol will be decoded in any case.
+ All main checks for limits are at the end of main loop,
+ It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+ But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
+ next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
+ that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
+ So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
Out:
RangeCoder is normalized
Result:
SZ_OK - OK
- SZ_ERROR_DATA - Error
- p->remainLen:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
+ p->remainLen:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+
+ SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
+ p->remainLen : undefined
+ p->reps[*] : undefined
*/
@@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
else
{
UPDATE_1(prob);
- /*
- // that case was checked before with kBadRepCode
- if (checkDicSize == 0 && processedPos == 0)
- return SZ_ERROR_DATA;
- */
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
@@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob)
{
UPDATE_0(prob);
+
+ // that case was checked before with kBadRepCode
+ // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+ // The caller doesn't allow (dicPos == limit) case here
+ // so we don't need the following check:
+ // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
+
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
processedPos++;
@@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
{
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
+ len += kMatchSpecLen_Error_Data + kMatchMinLen;
+ // len = kMatchSpecLen_Error_Data;
+ // len += kMatchMinLen;
+ break;
}
}
@@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
if ((rem = limit - dicPos) == 0)
{
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
+ /*
+ We stop decoding and return SZ_OK, and we can resume decoding later.
+ Any error conditions can be tested later in caller code.
+ For more strict mode we can stop decoding with error
+ // len += kMatchSpecLen_Error_Data;
+ */
+ break;
}
curLen = ((rem < len) ? (unsigned)rem : len);
@@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->buf = buf;
p->range = range;
p->code = code;
- p->remainLen = (UInt32)len;
+ p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
@@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = (UInt32)state;
-
+ if (len >= kMatchSpecLen_Error_Data)
+ return SZ_ERROR_DATA;
return SZ_OK;
}
#endif
+
+
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
- if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+ unsigned len = (unsigned)p->remainLen;
+ if (len == 0 /* || len >= kMatchSpecLenStart */)
+ return;
{
- Byte *dic = p->dic;
SizeT dicPos = p->dicPos;
- SizeT dicBufSize = p->dicBufSize;
- unsigned len = (unsigned)p->remainLen;
- SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
- SizeT rem = limit - dicPos;
- if (rem < len)
- len = (unsigned)(rem);
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ {
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ {
+ len = (unsigned)(rem);
+ if (len == 0)
+ return;
+ }
+ }
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
- while (len != 0)
+ dic = p->dic;
+ rep0 = p->reps[0];
+ dicBufSize = p->dicBufSize;
+ do
{
- len--;
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
}
+ while (--len);
p->dicPos = dicPos;
}
}
+/*
+At staring of new stream we have one of the following symbols:
+ - Literal - is allowed
+ - Non-Rep-Match - is allowed only if it's end marker symbol
+ - Rep-Match - is not allowed
+We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
+*/
+
#define kRange0 0xFFFFFFFF
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
@@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
#error Stop_Compiling_Bad_LZMA_Check
#endif
+
+/*
+LzmaDec_DecodeReal2():
+ It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
+
+We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
+and we support the following state of (p->checkDicSize):
+ if (total_processed < p->prop.dicSize) then
+ {
+ (total_processed == p->processedPos)
+ (p->checkDicSize == 0)
+ }
+ else
+ (p->checkDicSize == p->prop.dicSize)
+*/
+
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
- do
+ if (p->checkDicSize == 0)
{
- SizeT limit2 = limit;
- if (p->checkDicSize == 0)
- {
- UInt32 rem = p->prop.dicSize - p->processedPos;
- if (limit - p->dicPos > rem)
- limit2 = p->dicPos + rem;
-
- if (p->processedPos == 0)
- if (p->code >= kBadRepCode)
- return SZ_ERROR_DATA;
- }
-
- RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
-
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem)
+ limit = p->dicPos + rem;
+ }
+ {
+ int res = LZMA_DECODE_REAL(p, limit, bufLimit);
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
-
- LzmaDec_WriteRem(p, limit);
+ return res;
}
- while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
-
- return 0;
}
+
+
typedef enum
{
- DUMMY_ERROR, /* unexpected end of input stream */
+ DUMMY_INPUT_EOF, /* need more input data */
DUMMY_LIT,
DUMMY_MATCH,
DUMMY_REP
} ELzmaDummy;
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+
+#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
{
UInt32 range = p->range;
UInt32 code = p->code;
- const Byte *bufLimit = buf + inSize;
+ const Byte *bufLimit = *bufOut;
const CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
ELzmaDummy res;
+ for (;;)
{
const CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
- unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+ unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
prob = probs + IsMatch + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
- /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
-
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += ((UInt32)LZMA_LIT_SIZE *
- ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
- (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+ ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
- NORMALIZE_CHECK;
- return DUMMY_REP;
+ break;
}
else
{
@@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
- /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
-
if (posSlot < kEndPosModelIndex)
{
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
@@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
}
}
+ break;
}
NORMALIZE_CHECK;
+
+ *bufOut = buf;
return res;
}
-
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
{
p->remainLen = kMatchSpecLenStart + 1;
@@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p)
}
+/*
+LZMA supports optional end_marker.
+So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
+That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
+When the decoder reaches dicLimit, it looks (finishMode) parameter:
+ if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
+ if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
+
+When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
+ 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
+ 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
+ must check (status) value. The caller can show the error,
+ if the end of stream is expected, and the (status) is noit
+ LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
+*/
+
+
+#define RETURN__NOT_FINISHED__FOR_FINISH \
+ *status = LZMA_STATUS_NOT_FINISHED; \
+ return SZ_ERROR_DATA; // for strict mode
+ // return SZ_OK; // for relaxed mode
+
+
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
(*srcLen) = 0;
-
*status = LZMA_STATUS_NOT_SPECIFIED;
if (p->remainLen > kMatchSpecLenStart)
{
+ if (p->remainLen > kMatchSpecLenStart + 2)
+ return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
+
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
@@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
+
+ if (p->checkDicSize == 0
+ && p->processedPos == 0
+ && p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+
p->range = 0xFFFFFFFF;
p->tempBufSize = 0;
@@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->remainLen = 0;
}
- LzmaDec_WriteRem(p, dicLimit);
-
- while (p->remainLen != kMatchSpecLenStart)
+ for (;;)
{
+ if (p->remainLen == kMatchSpecLenStart)
+ {
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ {
+ // (p->remainLen == 0 || p->dicPos == dicLimit)
+
int checkEndMarkNow = 0;
if (p->dicPos >= dicLimit)
@@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
checkEndMarkNow = 1;
}
+ // (p->remainLen == 0)
+
if (p->tempBufSize == 0)
{
- SizeT processed;
const Byte *bufLimit;
+ int dummyProcessed = -1;
+
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
- int dummyRes = LzmaDec_TryDummy(p, src, inSize);
- if (dummyRes == DUMMY_ERROR)
+ const Byte *bufOut = src + inSize;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
{
- memcpy(p->tempBuf, src, inSize);
- p->tempBufSize = (unsigned)inSize;
+ size_t i;
+ if (inSize >= LZMA_REQUIRED_INPUT_MAX)
+ break;
(*srcLen) += inSize;
+ p->tempBufSize = (unsigned)inSize;
+ for (i = 0; i < inSize; i++)
+ p->tempBuf[i] = src[i];
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+
+ dummyProcessed = (int)(bufOut - src);
+ if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ unsigned i;
+ (*srcLen) += (unsigned)dummyProcessed;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ for (i = 0; i < (unsigned)dummyProcessed; i++)
+ p->tempBuf[i] = src[i];
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
+
bufLimit = src;
+ // we will decode only one iteration
}
else
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+
p->buf = src;
- if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
- return SZ_ERROR_DATA;
- processed = (SizeT)(p->buf - src);
- (*srcLen) += processed;
- src += processed;
- inSize -= processed;
+
+ {
+ int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
+
+ SizeT processed = (SizeT)(p->buf - src);
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > inSize)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ continue;
}
- else
+
{
- unsigned rem = p->tempBufSize, lookAhead = 0;
- while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
- p->tempBuf[rem++] = src[lookAhead++];
- p->tempBufSize = rem;
+ // we have some data in (p->tempBuf)
+ // in strict mode: tempBufSize is not enough for one Symbol decoding.
+ // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
+
+ unsigned rem = p->tempBufSize;
+ unsigned ahead = 0;
+ int dummyProcessed = -1;
+
+ while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
+ p->tempBuf[rem++] = src[ahead++];
+
+ // ahead - the size of new data copied from (src) to (p->tempBuf)
+ // rem - the size of temp buffer including new data from (src)
+
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
- int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
- if (dummyRes == DUMMY_ERROR)
+ const Byte *bufOut = p->tempBuf + rem;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
{
- (*srcLen) += (SizeT)lookAhead;
+ if (rem >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ p->tempBufSize = rem;
+ (*srcLen) += (SizeT)ahead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+
+ dummyProcessed = (int)(bufOut - p->tempBuf);
+
+ if ((unsigned)dummyProcessed < p->tempBufSize)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
}
+
p->buf = p->tempBuf;
- if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
- return SZ_ERROR_DATA;
{
- unsigned kkk = (unsigned)(p->buf - p->tempBuf);
- if (rem < kkk)
- return SZ_ERROR_FAIL; /* some internal error */
- rem -= kkk;
- if (lookAhead < rem)
- return SZ_ERROR_FAIL; /* some internal error */
- lookAhead -= rem;
+ // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
+ int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
+
+ SizeT processed = (SizeT)(p->buf - p->tempBuf);
+ rem = p->tempBufSize;
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+ if (processed < rem)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ processed -= rem;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+ p->tempBufSize = 0;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
}
- (*srcLen) += (SizeT)lookAhead;
- src += lookAhead;
- inSize -= (SizeT)lookAhead;
- p->tempBufSize = 0;
}
+ }
}
-
- if (p->code != 0)
- return SZ_ERROR_DATA;
- *status = LZMA_STATUS_FINISHED_WITH_MARK;
- return SZ_OK;
+
+ /* Some unexpected error: internal error of code, memory corruption or hardware failure */
+ p->remainLen = kMatchSpecLen_Error_Fail;
+ return SZ_ERROR_FAIL;
}
+
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen;
diff --git a/multiarc/src/formats/7z/C/LzmaDec.h b/multiarc/src/formats/7z/C/LzmaDec.h
index 1f0927ab..6f129625 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaDec.h
+++ b/multiarc/src/formats/7z/C/LzmaDec.h
@@ -1,5 +1,5 @@
/* LzmaDec.h -- LZMA Decoder
-2018-04-21 : Igor Pavlov : Public domain */
+2020-03-19 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
@@ -181,6 +181,7 @@ Returns:
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@@ -223,6 +224,7 @@ Returns:
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
diff --git a/multiarc/src/formats/7z/C/LzmaEnc.c b/multiarc/src/formats/7z/C/LzmaEnc.c
index 46a0db00..c8b31a19 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaEnc.c
+++ b/multiarc/src/formats/7z/C/LzmaEnc.c
@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
-2019-01-10: Igor Pavlov : Public domain */
+2022-07-15: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,6 +12,7 @@
#include <stdio.h>
#endif
+#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
@@ -19,12 +20,25 @@
#include "LzFindMt.h"
#endif
+/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
+void LzmaEnc_Finish(CLzmaEncHandle pp);
+void LzmaEnc_SaveState(CLzmaEncHandle pp);
+void LzmaEnc_RestoreState(CLzmaEncHandle pp);
+
#ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0;
#endif
-#define kLzmaMaxHistorySize ((UInt32)3 << 29)
-/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+/* for good normalization speed we still reserve 256 MB before 4 GB range */
+#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@@ -36,7 +50,7 @@ static unsigned g_STAT_OFFSET = 0;
#define kNumMoveReducingBits 4
#define kNumBitPriceShiftBits 4
-#define kBitPrice (1 << kNumBitPriceShiftBits)
+// #define kBitPrice (1 << kNumBitPriceShiftBits)
#define REP_LEN_COUNT 64
@@ -47,6 +61,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->writeEndMark = 0;
+ p->affinity = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -55,16 +70,21 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (level < 0) level = 5;
p->level = level;
- if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+ if (p->dictSize == 0)
+ p->dictSize =
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+ )));
+
if (p->dictSize > p->reduceSize)
{
- unsigned i;
- UInt32 reduceSize = (UInt32)p->reduceSize;
- for (i = 11; i <= 30; i++)
- {
- if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
- if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
- }
+ UInt32 v = (UInt32)p->reduceSize;
+ const UInt32 kReduceMin = ((UInt32)1 << 12);
+ if (v < kReduceMin)
+ v = kReduceMin;
+ if (p->dictSize > v)
+ p->dictSize = v;
}
if (p->lc < 0) p->lc = 3;
@@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
- if (p->numHashBytes < 0) p->numHashBytes = 4;
- if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+ if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+ if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numThreads < 0)
p->numThreads =
@@ -93,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
-#if (_MSC_VER >= 1400)
-/* BSR code is fast for some new CPUs */
-/* #define LZMA_LOG_BSR */
+
+/*
+x86/x64:
+
+BSR:
+ IF (SRC == 0) ZF = 1, DEST is undefined;
+ AMD : DEST is unchanged;
+ IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
+ BSR is slow in some processors
+
+LZCNT:
+ IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
+ IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
+ IF (DEST == 0) ZF = 1;
+
+LZCNT works only in new processors starting from Haswell.
+if LZCNT is not supported by processor, then it's executed as BSR.
+LZCNT can be faster than BSR, if supported.
+*/
+
+// #define LZMA_LOG_BSR
+
+#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
+
+ #if (defined(__clang__) && (__clang_major__ >= 6)) \
+ || (defined(__GNUC__) && (__GNUC__ >= 6))
+ #define LZMA_LOG_BSR
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+ // #if defined(MY_CPU_ARM_OR_ARM64)
+ #define LZMA_LOG_BSR
+ // #endif
+ #endif
#endif
+// #include <intrin.h>
+
#ifdef LZMA_LOG_BSR
-#define kDicLogSizeMaxCompress 32
+#if defined(__clang__) \
+ || defined(__GNUC__)
+
+/*
+ C code: : (30 - __builtin_clz(x))
+ gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
+ clang10 for x64 : 31 + (bsr(x) xor -32)
+*/
+
+ #define MY_clz(x) ((unsigned)__builtin_clz(x))
+ // __lzcnt32
+ // __builtin_ia32_lzcnt_u32
+
+#else // #if defined(_MSC_VER)
+
+ #ifdef MY_CPU_ARM_OR_ARM64
+
+ #define MY_clz _CountLeadingZeros
+
+ #else // if defined(MY_CPU_X86_OR_AMD64)
+
+ // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
+ // _BitScanReverse code is not optimal for some MSVC compilers
+ #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
+ res = (zz + zz) + (pos >> zz); }
+
+ #endif // MY_CPU_X86_OR_AMD64
+
+#endif // _MSC_VER
+
+
+#ifndef BSR2_RET
-#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+ #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
+ res = (zz + zz) + (pos >> zz); }
-static unsigned GetPosSlot1(UInt32 pos)
+#endif
+
+
+unsigned GetPosSlot1(UInt32 pos);
+unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
@@ -113,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
-#else
-#define kNumLogBits (9 + sizeof(size_t) / 2)
-/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+#else // ! LZMA_LOG_BSR
+
+#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
@@ -163,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
-#endif
+#endif // LZMA_LOG_BSR
#define LZMA_NUM_REPS 4
@@ -193,7 +280,7 @@ typedef struct
#define kNumLenToPosStates 4
#define kNumPosSlotBits 6
-#define kDicLogSizeMin 0
+// #define kDicLogSizeMin 0
#define kDicLogSizeMax 32
#define kDistTableSizeMax (kDicLogSizeMax * 2)
@@ -299,7 +386,7 @@ typedef UInt32 CProbPrice;
typedef struct
{
void *matchFinderObj;
- IMatchFinder matchFinder;
+ IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
@@ -344,10 +431,14 @@ typedef struct
// begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
+ // #else
+ // CMatchFinder matchFinderBase;
#endif
-
CMatchFinder matchFinderBase;
+
+ // we suppose that we have 8-bytes alignment after CMatchFinder
+
#ifndef _7ZIP_ST
Byte pad[128];
#endif
@@ -355,8 +446,10 @@ typedef struct
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
- UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+ // we want {len , dist} pairs to be 8-bytes aligned in matches array
+ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
+ // we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
@@ -385,12 +478,19 @@ typedef struct
CSaveState saveState;
+ // BoolInt mf_Failure;
#ifndef _7ZIP_ST
Byte pad2[128];
#endif
} CLzmaEnc;
+#define MFB (p->matchFinderBase)
+/*
+#ifndef _7ZIP_ST
+#define MFB (p->matchFinderMt.MatchFinder)
+#endif
+*/
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
@@ -455,41 +555,51 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
- || props.pb > LZMA_PB_MAX
- || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
- || props.dictSize > kLzmaMaxHistorySize)
+ || props.pb > LZMA_PB_MAX)
return SZ_ERROR_PARAM;
+
+ if (props.dictSize > kLzmaMaxHistorySize)
+ props.dictSize = kLzmaMaxHistorySize;
+
+ #ifndef LZMA_LOG_BSR
+ {
+ const UInt64 dict64 = props.dictSize;
+ if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
+ return SZ_ERROR_PARAM;
+ }
+ #endif
+
p->dictSize = props.dictSize;
{
- unsigned fb = props.fb;
+ unsigned fb = (unsigned)props.fb;
if (fb < 5)
fb = 5;
if (fb > LZMA_MATCH_LEN_MAX)
fb = LZMA_MATCH_LEN_MAX;
p->numFastBytes = fb;
}
- p->lc = props.lc;
- p->lp = props.lp;
- p->pb = props.pb;
+ p->lc = (unsigned)props.lc;
+ p->lp = (unsigned)props.lp;
+ p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
- p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+ MFB.btMode = (Byte)(props.btMode ? 1 : 0);
{
unsigned numHashBytes = 4;
if (props.btMode)
{
- if (props.numHashBytes < 2)
- numHashBytes = 2;
- else if (props.numHashBytes < 4)
- numHashBytes = props.numHashBytes;
+ if (props.numHashBytes < 2) numHashBytes = 2;
+ else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
}
- p->matchFinderBase.numHashBytes = numHashBytes;
+ if (props.numHashBytes >= 5) numHashBytes = 5;
+
+ MFB.numHashBytes = numHashBytes;
}
- p->matchFinderBase.cutValue = props.mc;
+ MFB.cutValue = props.mc;
- p->writeEndMark = props.writeEndMark;
+ p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST
/*
@@ -500,6 +610,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
*/
p->multiThread = (props.numThreads > 1);
+ p->matchFinderMt.btSync.affinity =
+ p->matchFinderMt.hashSync.affinity = props.affinity;
#endif
return SZ_OK;
@@ -509,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.expectedDataSize = expectedDataSiize;
+ MFB.expectedDataSize = expectedDataSiize;
}
@@ -536,8 +648,8 @@ static void RangeEnc_Construct(CRangeEnc *p)
p->bufBase = NULL;
}
-#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
-#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RC_BUF_SIZE (1 << 16)
@@ -556,12 +668,11 @@ static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->bufBase);
- p->bufBase = 0;
+ p->bufBase = NULL;
}
static void RangeEnc_Init(CRangeEnc *p)
{
- /* Stream.Init(); */
p->range = 0xFFFFFFFF;
p->cache = 0;
p->low = 0;
@@ -575,12 +686,12 @@ static void RangeEnc_Init(CRangeEnc *p)
MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
{
- size_t num;
- if (p->res != SZ_OK)
- return;
- num = p->buf - p->bufBase;
- if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
- p->res = SZ_ERROR_WRITE;
+ const size_t num = (size_t)(p->buf - p->bufBase);
+ if (p->res == SZ_OK)
+ {
+ if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+ p->res = SZ_ERROR_WRITE;
+ }
p->processed += num;
p->buf = p->bufBase;
}
@@ -656,7 +767,7 @@ static void RangeEnc_FlushData(CRangeEnc *p)
range += newBound & mask; \
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
mask += ((1 << kNumMoveBits) - 1); \
- ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
+ ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
@@ -749,7 +860,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
bitCount++;
}
}
- ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+ ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
// printf("\n%3d: %5d", i, ProbPrices[i]);
}
}
@@ -985,7 +1096,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
p->additionalOffset++;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
- numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ {
+ const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
+ numPairs = (unsigned)(d - p->matches);
+ }
*numPairsRes = numPairs;
#ifdef SHOW_STAT
@@ -1001,7 +1116,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
if (numPairs == 0)
return 0;
{
- unsigned len = p->matches[(size_t)numPairs - 2];
+ const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
{
@@ -1011,7 +1126,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
- ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
+ const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@@ -1167,6 +1282,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
+ if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
+ break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
@@ -1179,10 +1296,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
matches = p->matches;
+ #define MATCHES matches
+ // #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
- p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
@@ -1276,13 +1395,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (len < 2)
len = 2;
else
- while (len > matches[offs])
+ while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
- UInt32 dist = matches[(size_t)offs + 1];
+ UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
@@ -1306,7 +1425,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
opt->extra = 0;
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
@@ -1727,8 +1846,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (newLen > numAvail)
{
newLen = numAvail;
- for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
- matches[numPairs] = (UInt32)newLen;
+ for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
+ MATCHES[numPairs] = (UInt32)newLen;
numPairs += 2;
}
@@ -1747,9 +1866,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
offs = 0;
- while (startLen > matches[offs])
+ while (startLen > MATCHES[offs])
offs += 2;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
@@ -1776,7 +1895,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
@@ -1841,7 +1960,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
offs += 2;
if (offs == numPairs)
break;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
}
@@ -2059,8 +2178,23 @@ static SRes CheckErrors(CLzmaEnc *p)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
- if (p->matchFinderBase.result != SZ_OK)
+
+ #ifndef _7ZIP_ST
+ if (
+ // p->mf_Failure ||
+ (p->mtMode &&
+ ( // p->matchFinderMt.failure_LZ_LZ ||
+ p->matchFinderMt.failure_LZ_BT))
+ )
+ {
+ p->result = MY_HRES_ERROR__INTERNAL_ERROR;
+ // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+ }
+ #endif
+
+ if (MFB.result != SZ_OK)
p->result = SZ_ERROR_READ;
+
if (p->result != SZ_OK)
p->finished = True;
return p->result;
@@ -2198,14 +2332,14 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
-void LzmaEnc_Construct(CLzmaEnc *p)
+static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
- MatchFinder_Construct(&p->matchFinderBase);
+ MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST
+ p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt);
- p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
@@ -2221,7 +2355,6 @@ void LzmaEnc_Construct(CLzmaEnc *p)
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
-
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
@@ -2233,7 +2366,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
return p;
}
-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->litProbs);
ISzAlloc_Free(alloc, p->saveState.litProbs);
@@ -2241,13 +2374,13 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
p->saveState.litProbs = NULL;
}
-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
#ifndef _7ZIP_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
- MatchFinder_Free(&p->matchFinderBase, allocBig);
+ MatchFinder_Free(&MFB, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
@@ -2259,11 +2392,18 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
}
+MY_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{
UInt32 nowPos32, startPos32;
if (p->needInit)
{
+ #ifndef _7ZIP_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
+ }
+ #endif
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
}
@@ -2521,12 +2661,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
// { int y; for (y = 0; y < 100; y++) {
FillDistancesPrices(p);
// }}
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
}
if (p->repLenEncCounter <= 0)
{
p->repLenEncCounter = REP_LEN_COUNT;
- LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
}
@@ -2559,11 +2699,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
UInt32 beforeSize = kNumOpts;
+ UInt32 dictSize;
+
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef _7ZIP_ST
- p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+ p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif
{
@@ -2582,36 +2724,56 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
- p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+ MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
- if (beforeSize + p->dictSize < keepWindowSize)
- beforeSize = keepWindowSize - p->dictSize;
+
+ dictSize = p->dictSize;
+ if (dictSize == ((UInt32)2 << 30) ||
+ dictSize == ((UInt32)3 << 30))
+ {
+ /* 21.03 : here we reduce the dictionary for 2 reasons:
+ 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
+ 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
+ where data size is aligned for 1 GB: 5/6/8 GB.
+ That reducing must be >= 1 for such corner cases. */
+ dictSize -= 1;
+ }
+
+ if (beforeSize + dictSize < keepWindowSize)
+ beforeSize = keepWindowSize - dictSize;
+
+ /* in worst case we can look ahead for
+ max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
+ we send larger value for (keepAfter) to MantchFinder_Create():
+ (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+ */
#ifndef _7ZIP_ST
if (p->mtMode)
{
- RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
- LZMA_MATCH_LEN_MAX
- + 1 /* 18.04 */
+ RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig));
p->matchFinderObj = &p->matchFinderMt;
- p->matchFinderBase.bigHash = (Byte)(
- (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+ MFB.bigHash = (Byte)(
+ (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
- if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+ if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
+ , allocBig))
return SZ_ERROR_MEM;
- p->matchFinderObj = &p->matchFinderBase;
- MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+ p->matchFinderObj = &MFB;
+ MatchFinder_CreateVTable(&MFB, &p->matchFinder);
}
return SZ_OK;
}
-void LzmaEnc_Init(CLzmaEnc *p)
+static void LzmaEnc_Init(CLzmaEnc *p)
{
unsigned i;
p->state = 0;
@@ -2675,12 +2837,14 @@ void LzmaEnc_Init(CLzmaEnc *p)
p->additionalOffset = 0;
- p->pbMask = (1 << p->pb) - 1;
+ p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+
+ // p->mf_Failure = False;
}
-void LzmaEnc_InitPrices(CLzmaEnc *p)
+static void LzmaEnc_InitPrices(CLzmaEnc *p)
{
if (!p->fastMode)
{
@@ -2694,8 +2858,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
p->repLenEncCounter = REP_LEN_COUNT;
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
- LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
@@ -2719,7 +2883,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
@@ -2730,16 +2894,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
- p->matchFinderBase.directInput = 1;
- p->matchFinderBase.bufferBase = (Byte *)src;
- p->matchFinderBase.directInputRem = srcLen;
+ MFB.directInput = 1;
+ MFB.bufferBase = (Byte *)src;
+ MFB.directInputRem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
@@ -2781,19 +2945,23 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
size = p->rem;
p->overflow = True;
}
- memcpy(p->data, data, size);
- p->rem -= size;
- p->data += size;
+ if (size != 0)
+ {
+ memcpy(p->data, data, size);
+ p->rem -= size;
+ p->data += size;
+ }
return size;
}
+/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
}
-
+*/
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
{
@@ -2802,6 +2970,7 @@ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
}
+// (desiredPackSize == 0) is not allowed
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
{
@@ -2822,14 +2991,10 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
if (reInit)
LzmaEnc_Init(p);
LzmaEnc_InitPrices(p);
-
- nowPos64 = p->nowPos64;
RangeEnc_Init(&p->rc);
p->rc.outStream = &outStream.vt;
-
- if (desiredPackSize == 0)
- return SZ_ERROR_OUTPUT_EOF;
-
+ nowPos64 = p->nowPos64;
+
res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
*unpackSize = (UInt32)(p->nowPos64 - nowPos64);
@@ -2841,6 +3006,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
}
+MY_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
{
SRes res = SZ_OK;
@@ -2870,7 +3036,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
LzmaEnc_Finish(p);
/*
- if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
+ if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
@@ -2889,35 +3055,43 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- unsigned i;
- UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
- props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
-
- if (dictSize >= ((UInt32)1 << 22))
- {
- UInt32 kDictMask = ((UInt32)1 << 20) - 1;
- if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
- dictSize = (dictSize + kDictMask) & ~kDictMask;
- }
- else for (i = 11; i <= 30; i++)
{
- if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
- if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
- }
+ const CLzmaEnc *p = (const CLzmaEnc *)pp;
+ const UInt32 dictSize = p->dictSize;
+ UInt32 v;
+ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+ // we write aligned dictionary value to properties for lzma decoder
+ if (dictSize >= ((UInt32)1 << 21))
+ {
+ const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+ v = (dictSize + kDictMask) & ~kDictMask;
+ if (v < dictSize)
+ v = dictSize;
+ }
+ else
+ {
+ unsigned i = 11 * 2;
+ do
+ {
+ v = (UInt32)(2 + (i & 1)) << (i >> 1);
+ i++;
+ }
+ while (v < dictSize);
+ }
- for (i = 0; i < 4; i++)
- props[1 + i] = (Byte)(dictSize >> (8 * i));
- return SZ_OK;
+ SetUi32(props + 1, v);
+ return SZ_OK;
+ }
}
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
{
- return ((CLzmaEnc *)pp)->writeEndMark;
+ return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
}
@@ -2974,3 +3148,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
LzmaEnc_Destroy(p, alloc, allocBig);
return res;
}
+
+
+/*
+#ifndef _7ZIP_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
+{
+ const CLzmaEnc *p = (CLzmaEnc *)pp;
+ lz_threads[0] = p->matchFinderMt.hashSync.thread;
+ lz_threads[1] = p->matchFinderMt.btSync.thread;
+}
+#endif
+*/
diff --git a/multiarc/src/formats/7z/C/LzmaEnc.h b/multiarc/src/formats/7z/C/LzmaEnc.h
index 9194ee57..bc2ed504 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaEnc.h
+++ b/multiarc/src/formats/7z/C/LzmaEnc.h
@@ -1,5 +1,5 @@
/* LzmaEnc.h -- LZMA Encoder
-2017-07-27 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
@@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
+
+ UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);
diff --git a/multiarc/src/formats/7z/C/LzmaLib.c b/multiarc/src/formats/7z/C/LzmaLib.c
index 706e9e58..706e9e58 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaLib.c
+++ b/multiarc/src/formats/7z/C/LzmaLib.c
diff --git a/multiarc/src/formats/7z/C/LzmaLib.h b/multiarc/src/formats/7z/C/LzmaLib.h
index 88fa87d3..c343a859 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaLib.h
+++ b/multiarc/src/formats/7z/C/LzmaLib.h
@@ -1,5 +1,5 @@
/* LzmaLib.h -- LZMA library interface
-2013-01-18 : Igor Pavlov : Public domain */
+2021-04-03 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H
#define __LZMA_LIB_H
@@ -40,14 +40,16 @@ outPropsSize -
level - compression level: 0 <= level <= 9;
level dictSize algo fb
- 0: 16 KB 0 32
- 1: 64 KB 0 32
- 2: 256 KB 0 32
- 3: 1 MB 0 32
- 4: 4 MB 0 32
+ 0: 64 KB 0 32
+ 1: 256 KB 0 32
+ 2: 1 MB 0 32
+ 3: 4 MB 0 32
+ 4: 16 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
- 7+: 64 MB 1 64
+ 7: 32 MB 1 64
+ 8: 64 MB 1 64
+ 9: 64 MB 1 64
The default value for "level" is 5.
@@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes).
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
+In:
+ dest - output data buffer
+ destLen - output data buffer size
+ src - input data
+ srcLen - input data size
Out:
destLen - processed output size
Returns:
@@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
LzmaUncompress
--------------
In:
- dest - output data
- destLen - output data size
+ dest - output data buffer
+ destLen - output data buffer size
src - input data
srcLen - input data size
Out:
diff --git a/multiarc/src/formats/7z/C/MtCoder.c b/multiarc/src/formats/7z/C/MtCoder.c
index 95359857..99dc9090 100644..100755
--- a/multiarc/src/formats/7z/C/MtCoder.c
+++ b/multiarc/src/formats/7z/C/MtCoder.c
@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,7 +7,7 @@
#ifndef _7ZIP_ST
-SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
+static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
{
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
UInt64 inSize2 = 0;
@@ -44,7 +44,7 @@ static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
}
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp);
+static THREAD_FUNC_DECL ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
@@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
{
t->stop = 1;
Event_Set(&t->startEvent);
- Thread_Wait(&t->thread);
- Thread_Close(&t->thread);
+ Thread_Wait_Close(&t->thread);
}
Event_Close(&t->startEvent);
@@ -336,13 +335,13 @@ static SRes ThreadFunc2(CMtCoderThread *t)
}
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
+static THREAD_FUNC_DECL ThreadFunc(void *pp)
{
CMtCoderThread *t = (CMtCoderThread *)pp;
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
- return SZ_ERROR_THREAD;
+ return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
if (t->stop)
return 0;
{
@@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
- return SZ_ERROR_THREAD;
+ return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
}
#endif
}
@@ -496,12 +495,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
-
- if (Semaphore_IsCreated(&p->blocksSemaphore))
- {
- RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore));
- }
- RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
+ RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
diff --git a/multiarc/src/formats/7z/C/MtCoder.h b/multiarc/src/formats/7z/C/MtCoder.h
index 5a5f4d11..5a5f4d11 100644..100755
--- a/multiarc/src/formats/7z/C/MtCoder.h
+++ b/multiarc/src/formats/7z/C/MtCoder.h
diff --git a/multiarc/src/formats/7z/C/MtDec.c b/multiarc/src/formats/7z/C/MtDec.c
index 7803bf2a..45a67139 100644..100755
--- a/multiarc/src/formats/7z/C/MtDec.c
+++ b/multiarc/src/formats/7z/C/MtDec.c
@@ -1,16 +1,21 @@
/* MtDec.c -- Multi-thread Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
// #include <stdio.h>
+#include <string.h>
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
+#include "MtDec.h"
+
+#ifndef _7ZIP_ST
+
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
@@ -19,10 +24,6 @@
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
-#include "MtDec.h"
-
-#ifndef _7ZIP_ST
-
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)
{
p->progress = progress;
@@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
}
-#define RINOK_THREAD(x) RINOK(x)
+#define RINOK_THREAD(x) RINOK_WRes(x)
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
@@ -101,7 +102,7 @@ typedef struct __CMtDecBufLink CMtDecBufLink;
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp);
+static THREAD_FUNC_DECL ThreadFunc(void *pp);
static WRes MtDecThread_CreateEvents(CMtDecThread *t)
@@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
{
Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */
Event_Set(&t->canRead);
- Thread_Wait(&t->thread);
- Thread_Close(&t->thread);
+ Thread_Wait_Close(&t->thread);
}
Event_Close(&t->canRead);
@@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
Byte *afterEndData = NULL;
size_t afterEndData_Size = 0;
+ BoolInt afterEndData_IsCross = False;
BoolInt canCreateNewThread = False;
// CMtDecCallbackInfo parse;
CMtDecThread *nextThread;
- PRF_STR_INT("Event_Wait(&t->canRead)", t->index);
+ PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);
RINOK_THREAD(Event_Wait(&t->canRead));
if (p->exitThread)
@@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
parse.srcFinished = finish;
parse.canCreateNewThread = True;
- // PRF(printf("\nParse size = %d\n", (unsigned)size))
+ PRF(printf("\nParse size = %d\n", (unsigned)size));
p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse);
+ PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state));
+
needWrite = True;
canCreateNewThread = parse.canCreateNewThread;
@@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
if (parse.state == MTDEC_PARSE_END)
{
- p->crossStart = 0;
- p->crossEnd = 0;
-
- if (crossSize != 0)
- memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data
- afterEndData_Size = size - parse.srcSize;
afterEndData = parseData + parse.srcSize;
-
+ afterEndData_Size = size - parse.srcSize;
+ if (crossSize != 0)
+ afterEndData_IsCross = True;
// we reduce data size to required bytes (parsed only)
- inDataSize -= (size - parse.srcSize);
+ inDataSize -= afterEndData_Size;
if (!prev)
inDataSize_Start = parse.srcSize;
break;
@@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t)
{
// p->inProcessed += inCodePos;
+ PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size));
+
res = p->mtCallback->Write(p->mtCallbackObject, t->index,
res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite
- afterEndData, afterEndData_Size,
+ afterEndData, afterEndData_Size, afterEndData_IsCross,
&needContinue,
&canRecode);
-
- // res= E_INVALIDARG; // for test
+
+ // res = SZ_ERROR_FAIL; // for test
PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue));
PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed));
@@ -835,7 +836,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
#endif
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
+static THREAD_FUNC_DECL ThreadFunc1(void *pp)
{
WRes res;
@@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
res = ThreadFunc2(t);
p = t->mtDec;
if (res == 0)
- return p->exitThreadWRes;
+ return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
{
// it's unexpected situation for some threading function error
if (p->exitThreadWRes == 0)
@@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
Event_Set(&p->threads[0].canWrite);
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
}
- return res;
+ return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
}
-static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
+static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp)
{
+ #ifdef USE_ALLOCA
CMtDecThread *t = (CMtDecThread *)pp;
-
// fprintf(stderr, "\n%d = %p - before", t->index, &t);
- #ifdef USE_ALLOCA
t->allocaPtr = alloca(t->index * 128);
#endif
return ThreadFunc1(pp);
@@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p)
{
WRes wres;
- WRes sres;
+ SRes sres;
CMtDecThread *nextThread = &p->threads[p->numStartedThreads++];
// wres = MtDecThread_CreateAndStart(nextThread);
wres = MtDecThread_CreateEvents(nextThread);
if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead);
- if (wres == 0) { wres = ThreadFunc(nextThread);
+ if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);
+ wres = (WRes)(UINT_PTR)res;
if (wres != 0)
{
p->needContinue = False;
@@ -1130,8 +1131,8 @@ SRes MtDec_Code(CMtDec *p)
return SZ_OK;
// if (sres != SZ_OK)
- return sres;
- // return E_FAIL;
+ return sres;
+ // return SZ_ERROR_FAIL;
}
}
diff --git a/multiarc/src/formats/7z/C/MtDec.h b/multiarc/src/formats/7z/C/MtDec.h
index 9b577667..c2da46ae 100644..100755
--- a/multiarc/src/formats/7z/C/MtDec.h
+++ b/multiarc/src/formats/7z/C/MtDec.h
@@ -1,5 +1,5 @@
/* MtDec.h -- Multi-thread Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2020-03-05 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
@@ -108,11 +108,12 @@ typedef struct
*/
SRes (*Write)(void *p, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode);
-} IMtDecCallback;
+
+} IMtDecCallback2;
@@ -132,7 +133,7 @@ typedef struct _CMtDec
ICompressProgress *progress;
ISzAllocPtr alloc;
- IMtDecCallback *mtCallback;
+ IMtDecCallback2 *mtCallback;
void *mtCallbackObject;
diff --git a/multiarc/src/formats/7z/C/Ppmd.h b/multiarc/src/formats/7z/C/Ppmd.h
index a5c1e3ef..b1987920 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd.h
+++ b/multiarc/src/formats/7z/C/Ppmd.h
@@ -1,5 +1,5 @@
/* Ppmd.h -- PPMD codec common code
-2017-04-03 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
@@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
EXTERN_C_BEGIN
-#ifdef MY_CPU_32BIT
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+/*
+ PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
+ if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
+ if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
+ if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
+ if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
+ and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
+ PPMD code works slightly faster in (PPMD_32BIT) mode.
+*/
#define PPMD_32BIT
#endif
@@ -28,7 +37,7 @@ EXTERN_C_BEGIN
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
-#pragma pack(push, 1)
+MY_CPU_pragma_pack_push_1
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
@@ -40,41 +49,114 @@ typedef struct
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
- { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
+ { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
+
typedef struct
{
Byte Symbol;
Byte Freq;
- UInt16 SuccessorLow;
- UInt16 SuccessorHigh;
+ UInt16 Successor_0;
+ UInt16 Successor_1;
} CPpmd_State;
-#pragma pack(pop)
-
-typedef
- #ifdef PPMD_32BIT
- CPpmd_State *
- #else
- UInt32
- #endif
- CPpmd_State_Ref;
-
-typedef
- #ifdef PPMD_32BIT
- void *
- #else
- UInt32
- #endif
- CPpmd_Void_Ref;
-
-typedef
- #ifdef PPMD_32BIT
- Byte *
- #else
- UInt32
- #endif
- CPpmd_Byte_Ref;
+typedef struct CPpmd_State2_
+{
+ Byte Symbol;
+ Byte Freq;
+} CPpmd_State2;
+
+typedef struct CPpmd_State4_
+{
+ UInt16 Successor_0;
+ UInt16 Successor_1;
+} CPpmd_State4;
+
+MY_CPU_pragma_pop
+
+/*
+ PPMD code can write full CPpmd_State structure data to CPpmd*_Context
+ at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
+
+ If we use pointers to different types, but that point to shared
+ memory space, we can have aliasing problem (strict aliasing).
+
+ XLC compiler in -O2 mode can change the order of memory write instructions
+ in relation to read instructions, if we have use pointers to different types.
+
+ To solve that aliasing problem we use combined CPpmd*_Context structure
+ with unions that contain the fields from both structures:
+ the original CPpmd*_Context and CPpmd_State.
+ So we can access the fields from both structures via one pointer,
+ and the compiler doesn't change the order of write instructions
+ in relation to read instructions.
+
+ If we don't use memory write instructions to shared memory in
+ some local code, and we use only reading instructions (read only),
+ then probably it's safe to use pointers to different types for reading.
+*/
+
+
+
+#ifdef PPMD_32BIT
+
+ #define Ppmd_Ref_Type(type) type *
+ #define Ppmd_GetRef(p, ptr) (ptr)
+ #define Ppmd_GetPtr(p, ptr) (ptr)
+ #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
+
+#else
+
+ #define Ppmd_Ref_Type(type) UInt32
+ #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
+ #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
+ #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
+
+#endif // PPMD_32BIT
+
+
+typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
+typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
+typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
+
+
+/*
+#ifdef MY_CPU_LE_UNALIGN
+// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
+#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
+#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
+
+#else
+*/
+
+/*
+ We can write 16-bit halves to 32-bit (Successor) field in any selected order.
+ But the native order is more consistent way.
+ So we use the native order, if LE/BE order can be detected here at compile time.
+*/
+
+#ifdef MY_CPU_BE
+
+ #define Ppmd_GET_SUCCESSOR(p) \
+ ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
+
+ #define Ppmd_SET_SUCCESSOR(p, v) { \
+ (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
+ (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
+
+#else
+
+ #define Ppmd_GET_SUCCESSOR(p) \
+ ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
+
+ #define Ppmd_SET_SUCCESSOR(p, v) { \
+ (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
+ (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
+
+#endif
+
+// #endif
+
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
diff --git a/multiarc/src/formats/7z/C/Ppmd7.c b/multiarc/src/formats/7z/C/Ppmd7.c
index 470aadcc..cf401cb3 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7.c
+++ b/multiarc/src/formats/7z/C/Ppmd7.c
@@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec
-2018-07-04 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Ppmd7.h"
-const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
+// #define PPMD7_ORDER_0_SUPPPORT
+
+MY_ALIGN(16)
+static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
@@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
-#define I2U(indx) (p->Indx2Units[indx])
+#define I2U(indx) ((unsigned)p->Indx2Units[indx])
+#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
-#ifdef PPMD_32BIT
- #define REF(ptr) (ptr)
-#else
- #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
-#endif
+#define REF(ptr) Ppmd_GetRef(p, ptr)
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
@@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR;
struct CPpmd7_Node_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd7_Node_ *
- #else
- UInt32
- #endif
- CPpmd7_Node_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
typedef struct CPpmd7_Node_
{
@@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_
CPpmd7_Node_Ref Prev;
} CPpmd7_Node;
-#ifdef PPMD_32BIT
- #define NODE(ptr) (ptr)
-#else
- #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
-#endif
+#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
void Ppmd7_Construct(CPpmd7 *p)
{
unsigned i, k, m;
- p->Base = 0;
+ p->Base = NULL;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
@@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p)
for (i = 0; i < 3; i++)
p->NS2Indx[i] = (Byte)i;
+
for (m = i, k = 1; i < 256; i++)
{
p->NS2Indx[i] = (Byte)m;
@@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p)
k = (++m) - 2;
}
- memset(p->HB2Flag, 0, 0x40);
- memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
+ memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
}
+
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
- p->Base = 0;
+ p->Base = NULL;
}
+
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
- size_t size2;
Ppmd7_Free(p, alloc);
- size2 = 0
- #ifndef PPMD_32BIT
- + UNIT_SIZE
- #endif
- ;
- p->AlignOffset =
- #ifdef PPMD_32BIT
- (4 - size) & 3;
- #else
- 4 - (size & 3);
- #endif
- if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0)
+ p->AlignOffset = (4 - size) & 3;
+ if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
return False;
p->Size = size;
}
return True;
}
+
+
+// ---------- Internal Memory Allocator ----------
+
+/* We can use CPpmd7_Node in list of free units (as in Ppmd8)
+ But we still need one additional list walk pass in GlueFreeBlocks().
+ So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode()
+*/
+
+#define EMPTY_NODE 0
+
+
static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
{
*((CPpmd_Void_Ref *)node) = p->FreeList[indx];
+ // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
+
p->FreeList[indx] = REF(node);
+
}
+
static void *RemoveNode(CPpmd7 *p, unsigned indx)
{
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
p->FreeList[indx] = *node;
+ // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
+ // p->FreeList[indx] = node->Next;
return node;
}
+
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
@@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
InsertNode(p, ptr, i);
}
-static void GlueFreeBlocks(CPpmd7 *p)
+
+/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
+
+typedef union _CPpmd7_Node_Union
{
- #ifdef PPMD_32BIT
- CPpmd7_Node headItem;
- CPpmd7_Node_Ref head = &headItem;
- #else
- CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
- #endif
-
- CPpmd7_Node_Ref n = head;
- unsigned i;
+ CPpmd7_Node Node;
+ CPpmd7_Node_Ref NextRef;
+} CPpmd7_Node_Union;
+
+/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks()
+ we use single linked list similar to Ppmd8 code */
+
+static void GlueFreeBlocks(CPpmd7 *p)
+{
+ /*
+ we use first UInt16 field of 12-bytes UNITs as record type stamp
+ CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0
+ CPpmd7_Context { UInt16 NumStats; : NumStats != 0
+ CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record
+ : Stamp == 1 for head record and guard
+ Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
+ */
+ CPpmd7_Node_Ref head, n = 0;
+
p->GlueCount = 255;
- /* create doubly-linked list of free blocks */
- for (i = 0; i < PPMD_NUM_INDEXES; i++)
+
+ /* we set guard NODE at LoUnit */
+ if (p->LoUnit != p->HiUnit)
+ ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
+
{
- UInt16 nu = I2U(i);
- CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
- p->FreeList[i] = 0;
- while (next != 0)
+ /* Create list of free blocks.
+ We still need one additional list walk pass before Glue. */
+ unsigned i;
+ for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd7_Node *node = NODE(next);
- node->Next = n;
- n = NODE(n)->Prev = next;
- next = *(const CPpmd7_Node_Ref *)node;
- node->Stamp = 0;
- node->NU = (UInt16)nu;
+ const UInt16 nu = I2U_UInt16(i);
+ CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
+ p->FreeList[i] = 0;
+ while (next != 0)
+ {
+ /* Don't change the order of the following commands: */
+ CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
+ const CPpmd7_Node_Ref tmp = next;
+ next = un->NextRef;
+ un->Node.Stamp = EMPTY_NODE;
+ un->Node.NU = nu;
+ un->Node.Next = n;
+ n = tmp;
+ }
}
}
- NODE(head)->Stamp = 1;
- NODE(head)->Next = n;
- NODE(n)->Prev = head;
- if (p->LoUnit != p->HiUnit)
- ((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
-
- /* Glue free blocks */
- while (n != head)
+
+ head = n;
+ /* Glue and Fill must walk the list in same direction */
{
- CPpmd7_Node *node = NODE(n);
- UInt32 nu = (UInt32)node->NU;
- for (;;)
+ /* Glue free blocks */
+ CPpmd7_Node_Ref *prev = &head;
+ while (n)
{
- CPpmd7_Node *node2 = NODE(n) + nu;
- nu += node2->NU;
- if (node2->Stamp != 0 || nu >= 0x10000)
- break;
- NODE(node2->Prev)->Next = node2->Next;
- NODE(node2->Next)->Prev = node2->Prev;
- node->NU = (UInt16)nu;
+ CPpmd7_Node *node = NODE(n);
+ UInt32 nu = node->NU;
+ n = node->Next;
+ if (nu == 0)
+ {
+ *prev = n;
+ continue;
+ }
+ prev = &node->Next;
+ for (;;)
+ {
+ CPpmd7_Node *node2 = node + nu;
+ nu += node2->NU;
+ if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
+ break;
+ node->NU = (UInt16)nu;
+ node2->NU = 0;
+ }
}
- n = node->Next;
}
-
+
/* Fill lists of free blocks */
- for (n = NODE(head)->Next; n != head;)
+ for (n = head; n != 0;)
{
CPpmd7_Node *node = NODE(n);
- unsigned nu;
- CPpmd7_Node_Ref next = node->Next;
- for (nu = node->NU; nu > 128; nu -= 128, node += 128)
+ UInt32 nu = node->NU;
+ unsigned i;
+ n = node->Next;
+ if (nu == 0)
+ continue;
+ for (; nu > 128; nu -= 128, node += 128)
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
- InsertNode(p, node + k, nu - k - 1);
+ InsertNode(p, node + k, (unsigned)nu - k - 1);
}
InsertNode(p, node, i);
- n = next;
}
}
+
+MY_NO_INLINE
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
{
unsigned i;
- void *retVal;
+
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
+
i = indx;
+
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
+ Byte *us = p->UnitsStart;
p->GlueCount--;
- return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+ return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
}
}
while (p->FreeList[i] == 0);
- retVal = RemoveNode(p, i);
- SplitBlock(p, retVal, i, indx);
- return retVal;
+
+ {
+ void *block = RemoveNode(p, i);
+ SplitBlock(p, block, i, indx);
+ return block;
+ }
}
+
static void *AllocUnits(CPpmd7 *p, unsigned indx)
{
- UInt32 numBytes;
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
- numBytes = U2B(I2U(indx));
- if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
{
- void *retVal = p->LoUnit;
- p->LoUnit += numBytes;
- return retVal;
+ UInt32 numBytes = U2B(I2U(indx));
+ Byte *lo = p->LoUnit;
+ if ((UInt32)(p->HiUnit - lo) >= numBytes)
+ {
+ p->LoUnit = lo + numBytes;
+ return lo;
+ }
}
return AllocUnitsRare(p, indx);
}
+
#define MyMem12Cpy(dest, src, num) \
- { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
- do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
+ { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
+ do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
+
+/*
static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
@@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
SplitBlock(p, oldPtr, i0, i1);
return oldPtr;
}
+*/
-#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
- (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
- (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+ Ppmd_SET_SUCCESSOR(p, v);
}
-static void RestartModel(CPpmd7 *p)
+
+
+MY_NO_INLINE
+static
+void RestartModel(CPpmd7 *p)
{
- unsigned i, k, m;
+ unsigned i, k;
memset(p->FreeList, 0, sizeof(p->FreeList));
+
p->Text = p->Base + p->AlignOffset;
p->HiUnit = p->Text + p->Size;
p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
@@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p)
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
- p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
- p->MinContext->Suffix = 0;
- p->MinContext->NumStats = 256;
- p->MinContext->SummFreq = 256 + 1;
- p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
- p->LoUnit += U2B(256 / 2);
- p->MinContext->Stats = REF(p->FoundState);
- for (i = 0; i < 256; i++)
{
- CPpmd_State *s = &p->FoundState[i];
- s->Symbol = (Byte)i;
- s->Freq = 1;
- SetSuccessor(s, 0);
+ CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+ CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+
+ p->LoUnit += U2B(256 / 2);
+ p->MaxContext = p->MinContext = mc;
+ p->FoundState = s;
+
+ mc->NumStats = 256;
+ mc->Union2.SummFreq = 256 + 1;
+ mc->Union4.Stats = REF(s);
+ mc->Suffix = 0;
+
+ for (i = 0; i < 256; i++, s++)
+ {
+ s->Symbol = (Byte)i;
+ s->Freq = 1;
+ SetSuccessor(s, 0);
+ }
+
+ #ifdef PPMD7_ORDER_0_SUPPPORT
+ if (p->MaxOrder == 0)
+ {
+ CPpmd_Void_Ref r = REF(mc);
+ s = p->FoundState;
+ for (i = 0; i < 256; i++, s++)
+ SetSuccessor(s, r);
+ return;
+ }
+ #endif
}
for (i = 0; i < 128; i++)
+
+
+
for (k = 0; k < 8; k++)
{
+ unsigned m;
UInt16 *dest = p->BinSumm[i] + k;
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
for (m = 0; m < 64; m += 8)
dest[m] = val;
}
-
+
+
for (i = 0; i < 25; i++)
- for (k = 0; k < 16; k++)
+ {
+
+ CPpmd_See *s = p->See[i];
+
+
+
+ unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
+ for (k = 0; k < 16; k++, s++)
{
- CPpmd_See *s = &p->See[i][k];
- s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
+ s->Summ = (UInt16)summ;
+ s->Shift = (PPMD_PERIOD_BITS - 4);
s->Count = 4;
}
+ }
+
+ p->DummySee.Summ = 0; /* unused */
+ p->DummySee.Shift = PPMD_PERIOD_BITS;
+ p->DummySee.Count = 64; /* unused */
}
+
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
{
p->MaxOrder = maxOrder;
+
RestartModel(p);
- p->DummySee.Shift = PPMD_PERIOD_BITS;
- p->DummySee.Summ = 0; /* unused */
- p->DummySee.Count = 64; /* unused */
}
-static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
+
+
+/*
+ CreateSuccessors()
+ It's called when (FoundState->Successor) is RAW-Successor,
+ that is the link to position in Raw text.
+ So we create Context records and write the links to
+ FoundState->Successor and to identical RAW-Successors in suffix
+ contexts of MinContex.
+
+ The function returns:
+ if (OrderFall == 0) then MinContext is already at MAX order,
+ { return pointer to new or existing context of same MAX order }
+ else
+ { return pointer to new real context that will be (Order+1) in comparison with MinContext
+
+ also it can return pointer to real context of same order,
+*/
+
+MY_NO_INLINE
+static CTX_PTR CreateSuccessors(CPpmd7 *p)
{
- CPpmd_State upState;
CTX_PTR c = p->MinContext;
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
- CPpmd_State *ps[PPMD7_MAX_ORDER];
+ Byte newSym, newFreq;
unsigned numPs = 0;
-
- if (!skip)
+ CPpmd_State *ps[PPMD7_MAX_ORDER];
+
+ if (p->OrderFall != 0)
ps[numPs++] = p->FoundState;
while (c->Suffix)
@@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
+
+
if (c->NumStats != 1)
{
- for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
+ Byte sym = p->FoundState->Symbol;
+ for (s = STATS(c); s->Symbol != sym; s++);
+
}
else
+ {
s = ONE_STATE(c);
+
+ }
successor = SUCCESSOR(s);
if (successor != upBranch)
{
+ // (c) is real record Context here,
c = CTX(successor);
if (numPs == 0)
+ {
+ // (c) is real record MAX Order Context here,
+ // So we don't need to create any new contexts.
return c;
+ }
break;
}
ps[numPs++] = s;
}
- upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
- SetSuccessor(&upState, upBranch + 1);
+ // All created contexts will have single-symbol with new RAW-Successor
+ // All new RAW-Successors will point to next position in RAW text
+ // after FoundState->Successor
+
+ newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
+ upBranch++;
+
if (c->NumStats == 1)
- upState.Freq = ONE_STATE(c)->Freq;
+ newFreq = ONE_STATE(c)->Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
- for (s = STATS(c); s->Symbol != upState.Symbol; s++);
- cf = s->Freq - 1;
- s0 = c->SummFreq - c->NumStats - cf;
- upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
+ for (s = STATS(c); s->Symbol != newSym; s++);
+ cf = (UInt32)s->Freq - 1;
+ s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
+ /*
+ cf - is frequency of symbol that will be Successor in new context records.
+ s0 - is commulative frequency sum of another symbols from parent context.
+ max(newFreq)= (s->Freq + 1), when (s0 == 1)
+ we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
+ so (s->Freq < 128) - is requirement for multi-symbol contexts
+ */
+ newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
}
+ // Create new single-symbol contexts from low order to high order in loop
+
do
{
- /* Create Child */
- CTX_PTR c1; /* = AllocContext(p); */
+ CTX_PTR c1;
+ /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
- c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+ c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
@@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
if (!c1)
return NULL;
}
+
c1->NumStats = 1;
- *ONE_STATE(c1) = upState;
+ ONE_STATE(c1)->Symbol = newSym;
+ ONE_STATE(c1)->Freq = newFreq;
+ SetSuccessor(ONE_STATE(c1), upBranch);
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
@@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
return c;
}
-static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
-{
- CPpmd_State tmp = *t1;
- *t1 = *t2;
- *t2 = tmp;
-}
-static void UpdateModel(CPpmd7 *p)
+
+#define SwapStates(s) \
+ { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
+
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+MY_NO_INLINE
+void Ppmd7_UpdateModel(CPpmd7 *p)
{
- CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
- CTX_PTR c;
+ CPpmd_Void_Ref maxSuccessor, minSuccessor;
+ CTX_PTR c, mc;
unsigned s0, ns;
-
+
+
+
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
+ /* Update Freqs in Suffix Context */
+
c = SUFFIX(p->MinContext);
if (c->NumStats == 1)
@@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p)
else
{
CPpmd_State *s = STATS(c);
- if (s->Symbol != p->FoundState->Symbol)
+ Byte sym = p->FoundState->Symbol;
+
+ if (s->Symbol != sym)
{
- do { s++; } while (s->Symbol != p->FoundState->Symbol);
+ do
+ {
+ // s++; if (s->Symbol == sym) break;
+ s++;
+ }
+ while (s->Symbol != sym);
+
if (s[0].Freq >= s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s);
s--;
}
}
+
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
}
+
if (p->OrderFall == 0)
{
- p->MinContext = p->MaxContext = CreateSuccessors(p, True);
- if (p->MinContext == 0)
+ /* MAX ORDER context */
+ /* (FoundState->Successor) is RAW-Successor. */
+ p->MaxContext = p->MinContext = CreateSuccessors(p);
+ if (!p->MinContext)
{
RestartModel(p);
return;
@@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p)
SetSuccessor(p->FoundState, REF(p->MinContext));
return;
}
+
+
+ /* NON-MAX ORDER context */
- *p->Text++ = p->FoundState->Symbol;
- successor = REF(p->Text);
- if (p->Text >= p->UnitsStart)
{
- RestartModel(p);
- return;
+ Byte *text = p->Text;
+ *text++ = p->FoundState->Symbol;
+ p->Text = text;
+ if (text >= p->UnitsStart)
+ {
+ RestartModel(p);
+ return;
+ }
+ maxSuccessor = REF(text);
}
- if (fSuccessor)
+ minSuccessor = SUCCESSOR(p->FoundState);
+
+ if (minSuccessor)
{
- if (fSuccessor <= successor)
+ // there is Successor for FoundState in MinContext.
+ // So the next context will be one order higher than MinContext.
+
+ if (minSuccessor <= maxSuccessor)
{
- CTX_PTR cs = CreateSuccessors(p, False);
- if (cs == NULL)
+ // minSuccessor is RAW-Successor. So we will create real contexts records:
+ CTX_PTR cs = CreateSuccessors(p);
+ if (!cs)
{
RestartModel(p);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
+
+ // minSuccessor now is real Context pointer that points to existing (Order+1) context
+
if (--p->OrderFall == 0)
{
- successor = fSuccessor;
+ /*
+ if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
+ MinContext that is (MaxOrder - 1)
+ MaxContext that is (MaxOrder)
+ so we don't need new RAW-Successor, and we can use real minSuccessor
+ as succssors for both MinContext and MaxContext.
+ */
+ maxSuccessor = minSuccessor;
+
+ /*
+ if (MaxContext != MinContext)
+ {
+ there was order fall from MaxOrder and we don't need current symbol
+ to transfer some RAW-Succesors to real contexts.
+ So we roll back pointer in raw data for one position.
+ }
+ */
p->Text -= (p->MaxContext != p->MinContext);
}
}
else
{
- SetSuccessor(p->FoundState, successor);
- fSuccessor = REF(p->MinContext);
+ /*
+ FoundState has NULL-Successor here.
+ And only root 0-order context can contain NULL-Successors.
+ We change Successor in FoundState to RAW-Successor,
+ And next context will be same 0-order root Context.
+ */
+ SetSuccessor(p->FoundState, maxSuccessor);
+ minSuccessor = REF(p->MinContext);
}
-
- s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
-
- for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
+
+ mc = p->MinContext;
+ c = p->MaxContext;
+
+ p->MaxContext = p->MinContext = CTX(minSuccessor);
+
+ if (c == mc)
+ return;
+
+ // s0 : is pure Escape Freq
+ s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
+
+ do
{
unsigned ns1;
- UInt32 cf, sf;
+ UInt32 sum;
+
if ((ns1 = c->NumStats) != 1)
{
if ((ns1 & 1) == 0)
@@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p)
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
- c->Stats = STATS_REF(ptr);
+ c->Union4.Stats = STATS_REF(ptr);
}
}
- c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
+ sum = c->Union2.SummFreq;
+ /* max increase of Escape_Freq is 3 here.
+ total increase of Union2.SummFreq for all symbols is less than 256 here */
+ sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
+ /* original PPMdH uses 16-bit variable for (sum) here.
+ But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
+ // sum = (UInt16)sum;
}
else
{
+ // instead of One-symbol context we create 2-symbol context
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
if (!s)
{
RestartModel(p);
return;
}
- *s = *ONE_STATE(c);
- c->Stats = REF(s);
- if (s->Freq < MAX_FREQ / 4 - 1)
- s->Freq <<= 1;
- else
- s->Freq = MAX_FREQ - 4;
- c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
- }
- cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
- sf = (UInt32)s0 + c->SummFreq;
- if (cf < 6 * sf)
- {
- cf = 1 + (cf > sf) + (cf >= 4 * sf);
- c->SummFreq += 3;
- }
- else
- {
- cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
- c->SummFreq = (UInt16)(c->SummFreq + cf);
+ {
+ unsigned freq = c->Union2.State2.Freq;
+ // s = *ONE_STATE(c);
+ s->Symbol = c->Union2.State2.Symbol;
+ s->Successor_0 = c->Union4.State4.Successor_0;
+ s->Successor_1 = c->Union4.State4.Successor_1;
+ // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
+ // (Successor_0 and Successor_1) in LE/BE.
+ c->Union4.Stats = REF(s);
+ if (freq < MAX_FREQ / 4 - 1)
+ freq <<= 1;
+ else
+ freq = MAX_FREQ - 4;
+ // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
+ s->Freq = (Byte)freq;
+ // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
+ sum = freq + p->InitEsc + (ns > 3);
+ }
}
+
{
CPpmd_State *s = STATS(c) + ns1;
- SetSuccessor(s, successor);
+ UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
+ UInt32 sf = (UInt32)s0 + sum;
s->Symbol = p->FoundState->Symbol;
- s->Freq = (Byte)cf;
c->NumStats = (UInt16)(ns1 + 1);
+ SetSuccessor(s, maxSuccessor);
+
+ if (cf < 6 * sf)
+ {
+ cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
+ sum += 3;
+ /* It can add (0, 1, 2) to Escape_Freq */
+ }
+ else
+ {
+ cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
+ sum += cf;
+ }
+
+ c->Union2.SummFreq = (UInt16)sum;
+ s->Freq = (Byte)cf;
}
+ c = SUFFIX(c);
}
- p->MaxContext = p->MinContext = CTX(fSuccessor);
+ while (c != mc);
}
+
+
+MY_NO_INLINE
static void Rescale(CPpmd7 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
+
+ /* Sort the list by Freq */
+ if (s != stats)
{
CPpmd_State tmp = *s;
- for (; s != stats; s--)
+ do
s[0] = s[-1];
+ while (--s != stats);
*s = tmp;
}
- escFreq = p->MinContext->SummFreq - s->Freq;
- s->Freq += 4;
- adder = (p->OrderFall != 0);
- s->Freq = (Byte)((s->Freq + adder) >> 1);
+
sumFreq = s->Freq;
+ escFreq = p->MinContext->Union2.SummFreq - sumFreq;
+
+ /*
+ if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context
+ if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
+ */
+
+ adder = (p->OrderFall != 0);
+
+ #ifdef PPMD7_ORDER_0_SUPPPORT
+ adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
+ #endif
+
+ sumFreq = (sumFreq + 4 + adder) >> 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
+ s->Freq = (Byte)sumFreq;
- i = p->MinContext->NumStats - 1;
do
{
- escFreq -= (++s)->Freq;
- s->Freq = (Byte)((s->Freq + adder) >> 1);
- sumFreq += s->Freq;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + adder) >> 1;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
+ CPpmd_State tmp = *s;
CPpmd_State *s1 = s;
- CPpmd_State tmp = *s1;
do
+ {
s1[0] = s1[-1];
- while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+ }
+ while (--s1 != stats && freq > s1[-1].Freq);
*s1 = tmp;
}
}
@@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p)
if (s->Freq == 0)
{
- unsigned numStats = p->MinContext->NumStats;
- unsigned n0, n1;
- do { i++; } while ((--s)->Freq == 0);
+ /* Remove all items with Freq == 0 */
+ CPpmd7_Context *mc;
+ unsigned numStats, numStatsNew, n0, n1;
+
+ i = 0; do { i++; } while ((--s)->Freq == 0);
+
+ /* We increase (escFreq) for the number of removed symbols.
+ So we will have (0.5) increase for Escape_Freq in avarage per
+ removed symbol after Escape_Freq halving */
escFreq += i;
- p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
- if (p->MinContext->NumStats == 1)
+ mc = p->MinContext;
+ numStats = mc->NumStats;
+ numStatsNew = numStats - i;
+ mc->NumStats = (UInt16)(numStatsNew);
+ n0 = (numStats + 1) >> 1;
+
+ if (numStatsNew == 1)
{
- CPpmd_State tmp = *stats;
+ /* Create Single-Symbol context */
+ unsigned freq = stats->Freq;
+
do
{
- tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
escFreq >>= 1;
+ freq = (freq + 1) >> 1;
}
while (escFreq > 1);
- InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
- *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+
+ s = ONE_STATE(mc);
+ *s = *stats;
+ s->Freq = (Byte)freq; // (freq <= 260 / 4)
+ p->FoundState = s;
+ InsertNode(p, stats, U2I(n0));
return;
}
- n0 = (numStats + 1) >> 1;
- n1 = (p->MinContext->NumStats + 1) >> 1;
+
+ n1 = (numStatsNew + 1) >> 1;
if (n0 != n1)
- p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ {
+ // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ unsigned i0 = U2I(n0);
+ unsigned i1 = U2I(n1);
+ if (i0 != i1)
+ {
+ if (p->FreeList[i1] != 0)
+ {
+ void *ptr = RemoveNode(p, i1);
+ p->MinContext->Union4.Stats = STATS_REF(ptr);
+ MyMem12Cpy(ptr, (const void *)stats, n1);
+ InsertNode(p, stats, i0);
+ }
+ else
+ SplitBlock(p, stats, i0, i1);
+ }
+ }
+ }
+ {
+ CPpmd7_Context *mc = p->MinContext;
+ mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+ // Escape_Freq halving here
+ p->FoundState = STATS(mc);
}
- p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
- p->FoundState = STATS(p->MinContext);
}
+
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
{
CPpmd_See *see;
- unsigned nonMasked = p->MinContext->NumStats - numMasked;
- if (p->MinContext->NumStats != 256)
+ const CPpmd7_Context *mc = p->MinContext;
+ unsigned numStats = mc->NumStats;
+ if (numStats != 256)
{
- see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] +
- (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
- 2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
- 4 * (unsigned)(numMasked > nonMasked) +
+ unsigned nonMasked = numStats - numMasked;
+ see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
+ + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
+ + 4 * (unsigned)(numMasked > nonMasked) +
p->HiBitsFlag;
{
- unsigned r = (see->Summ >> see->Shift);
- see->Summ = (UInt16)(see->Summ - r);
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
}
}
@@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
return see;
}
+
static void NextContext(CPpmd7 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
- if (p->OrderFall == 0 && (Byte *)c > p->Text)
- p->MinContext = p->MaxContext = c;
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
else
- UpdateModel(p);
+ Ppmd7_UpdateModel(p);
}
+
void Ppmd7_Update1(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
- s->Freq += 4;
- p->MinContext->SummFreq += 4;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = s->Freq;
+ freq += 4;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s);
p->FoundState = --s;
- if (s->Freq > MAX_FREQ)
+ if (freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
+
void Ppmd7_Update1_0(CPpmd7 *p)
{
- p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
- p->RunLength += p->PrevSuccess;
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
+ CPpmd_State *s = p->FoundState;
+ CPpmd7_Context *mc = p->MinContext;
+ unsigned freq = s->Freq;
+ unsigned summFreq = mc->Union2.SummFreq;
+ p->PrevSuccess = (2 * freq > summFreq);
+ p->RunLength += (int)p->PrevSuccess;
+ mc->Union2.SummFreq = (UInt16)(summFreq + 4);
+ freq += 4;
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
Rescale(p);
NextContext(p);
}
+
+/*
void Ppmd7_UpdateBin(CPpmd7 *p)
{
- p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
+ unsigned freq = p->FoundState->Freq;
+ p->FoundState->Freq = (Byte)(freq + (freq < 128));
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
+*/
void Ppmd7_Update2(CPpmd7 *p)
{
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
- Rescale(p);
+ CPpmd_State *s = p->FoundState;
+ unsigned freq = s->Freq;
+ freq += 4;
p->RunLength = p->InitRL;
- UpdateModel(p);
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
+ Rescale(p);
+ Ppmd7_UpdateModel(p);
+}
+
+
+
+/*
+PPMd Memory Map:
+{
+ [ 0 ] contains subset of original raw text, that is required to create context
+ records, Some symbols are not written, when max order context was reached
+ [ Text ] free area
+ [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records
+ [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items
+[ HiUnit ] CPpmd7_Context records
+ [ Size ] end of array
}
+
+These addresses don't cross at any time.
+And the following condtions is true for addresses:
+ (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
+
+Raw text is BYTE--aligned.
+the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
+
+Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
+The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
+The code doesn't free UNITs allocated for CPpmd7_Context records.
+
+The code calls RestartModel(), when there is no free memory for allocation.
+And RestartModel() changes the state to orignal start state, with full free block.
+
+
+The code allocates UNITs with the following order:
+
+Allocation of 1 UNIT for Context record
+ - from free space (HiUnit) down to (LoUnit)
+ - from FreeList[0]
+ - AllocUnitsRare()
+
+AllocUnits() for CPpmd_State vectors:
+ - from FreeList[i]
+ - from free space (LoUnit) up to (HiUnit)
+ - AllocUnitsRare()
+
+AllocUnitsRare()
+ - if (GlueCount == 0)
+ { Glue lists, GlueCount = 255, allocate from FreeList[i]] }
+ - loop for all higher sized FreeList[...] lists
+ - from (UnitsStart - Text), GlueCount--
+ - ERROR
+
+
+Each Record with Context contains the CPpmd_State vector, where each
+CPpmd_State contains the link to Successor.
+There are 3 types of Successor:
+ 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored
+ only in 0-order Root Context Record.
+ We use 0 value as NULL-Successor
+ 2) RAW-Successor - the link to position in raw text,
+ that "RAW-Successor" is being created after first
+ occurrence of new symbol for some existing context record.
+ (RAW-Successor > 0).
+ 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
+ that record is being created when we go via RAW-Successor again.
+
+For any successors at any time: the following condtions are true for Successor links:
+(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
+
+
+---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
+
+CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
+
+The PPMd code tries to fulfill the condition:
+ (SummFreq <= (256 * 128 = RC::kBot))
+
+We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
+So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
+If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
+SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions.
+Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for
+max-order context.
+
+When the PPMd code still break (Total <= RC::Range) condition in range coder,
+we have two ways to resolve that problem:
+ 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
+ 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
+*/
diff --git a/multiarc/src/formats/7z/C/Ppmd7.h b/multiarc/src/formats/7z/C/Ppmd7.h
index 610539a0..d31809ae 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7.h
+++ b/multiarc/src/formats/7z/C/Ppmd7.h
@@ -1,10 +1,8 @@
-/* Ppmd7.h -- PPMdH compression codec
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
-
-/* This code supports virtual RangeDecoder and includes the implementation
-of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
-If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
+/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#ifndef __PPMD7_H
#define __PPMD7_H
@@ -21,23 +19,56 @@ EXTERN_C_BEGIN
struct CPpmd7_Context_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd7_Context_ *
- #else
- UInt32
- #endif
- CPpmd7_Context_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
+
+// MY_CPU_pragma_pack_push_1
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
- UInt16 SummFreq;
- CPpmd_State_Ref Stats;
+
+
+ union
+ {
+ UInt16 SummFreq;
+ CPpmd_State2 State2;
+ } Union2;
+
+ union
+ {
+ CPpmd_State_Ref Stats;
+ CPpmd_State4 State4;
+ } Union4;
+
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
-#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+// MY_CPU_pragma_pop
+
+#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
+
+
+
+
+typedef struct
+{
+ UInt32 Range;
+ UInt32 Code;
+ UInt32 Low;
+ IByteIn *Stream;
+} CPpmd7_RangeDec;
+
+
+typedef struct
+{
+ UInt32 Range;
+ Byte Cache;
+ // Byte _dummy_[3];
+ UInt64 Low;
+ UInt64 CacheSize;
+ IByteOut *Stream;
+} CPpmd7z_RangeEnc;
+
typedef struct
{
@@ -48,17 +79,30 @@ typedef struct
UInt32 Size;
UInt32 GlueCount;
- Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
+ Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
- Byte Indx2Units[PPMD_NUM_INDEXES];
+
+
+
+ union
+ {
+ CPpmd7_RangeDec dec;
+ CPpmd7z_RangeEnc enc;
+ } rc;
+
+ Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
- Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
+
+ Byte NS2BSIndx[256], NS2Indx[256];
+ Byte ExpEscape[16];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
+ // int LastSymbol;
} CPpmd7;
+
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
@@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
/* ---------- Internal Functions ---------- */
-extern const Byte PPMD7_kExpEscape[16];
-
-#ifdef PPMD_32BIT
- #define Ppmd7_GetPtr(p, ptr) (ptr)
- #define Ppmd7_GetContext(p, ptr) (ptr)
- #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
-#else
- #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
- #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
- #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
-#endif
+#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
+#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
+#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
-void Ppmd7_UpdateBin(CPpmd7 *p);
+
+#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
+#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
+// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
+// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
#define Ppmd7_GetBinSumm(p) \
- &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
- p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
- (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
- 2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
- ((p->RunLength >> 26) & 0x20)]
+ &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
+ [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+ + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+ + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
+/*
+We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
+ 1) Ppmd7a_*: original PPMdH
+ 2) Ppmd7z_*: modified PPMdH with 7z Range Coder
+Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
+*/
+
/* ---------- Decode ---------- */
-typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
+#define PPMD7_SYM_END (-1)
+#define PPMD7_SYM_ERROR (-2)
-struct IPpmd7_RangeDec
-{
- UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
- void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
- UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
-};
+/*
+You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
-typedef struct
-{
- IPpmd7_RangeDec vt;
- UInt32 Range;
- UInt32 Code;
- IByteIn *Stream;
-} CPpmd7z_RangeDec;
+Ppmd7*_DecodeSymbol()
+out:
+ >= 0 : decoded byte
+ -1 : PPMD7_SYM_END : End of payload marker
+ -2 : PPMD7_SYM_ERROR : Data error
+*/
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
-#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+/* Ppmd7a_* : original PPMdH */
+BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7a_DecodeSymbol(CPpmd7 *p);
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
+/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7z_DecodeSymbol(CPpmd7 *p);
+// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
/* ---------- Encode ---------- */
-typedef struct
-{
- UInt64 Low;
- UInt32 Range;
- Byte Cache;
- UInt64 CacheSize;
- IByteOut *Stream;
-} CPpmd7z_RangeEnc;
-
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
-
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
+// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Ppmd7Dec.c b/multiarc/src/formats/7z/C/Ppmd7Dec.c
index 311e9f9d..55d74ff9 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7Dec.c
+++ b/multiarc/src/formats/7z/C/Ppmd7Dec.c
@@ -1,6 +1,8 @@
-/* Ppmd7Dec.c -- PPMdH Decoder
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#include "Precomp.h"
@@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
+
+#define READ_BYTE(p) IByteIn_Read((p)->Stream)
+
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
- if (IByteIn_Read(p->Stream) != 0)
+ if (READ_BYTE(p) != 0)
return False;
for (i = 0; i < 4; i++)
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+ p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
-#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
-
-static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
-{
- GET_Ppmd7z_RangeDec
- return p->Code / (p->Range /= total);
-}
+#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
+ { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
-static void Range_Normalize(CPpmd7z_RangeDec *p)
-{
- if (p->Range < kTopValue)
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
- p->Range <<= 8;
- if (p->Range < kTopValue)
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
- p->Range <<= 8;
- }
- }
-}
+#define RC_NORM_1(p) RC_NORM_BASE(p) }
+#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
-static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
-{
- GET_Ppmd7z_RangeDec
- p->Code -= start * p->Range;
- p->Range *= size;
- Range_Normalize(p);
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
-static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
-{
- GET_Ppmd7z_RangeDec
- UInt32 newBound = (p->Range >> 14) * size0;
- UInt32 symbol;
- if (p->Code < newBound)
- {
- symbol = 0;
- p->Range = newBound;
- }
- else
- {
- symbol = 1;
- p->Code -= newBound;
- p->Range -= newBound;
- }
- Range_Normalize(p);
- return symbol;
-}
+#define R (&p->rc.dec)
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
- p->vt.GetThreshold = Range_GetThreshold;
- p->vt.Decode = Range_Decode;
- p->vt.DecodeBit = Range_DecodeBit;
+
+
+ R->Code -= start * R->Range;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
}
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
-#define MASK(sym) ((signed char *)charMask)[sym]
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd7_UpdateModel(CPpmd7 *p);
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+// MY_FORCE_INLINE
+// static
+int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
- if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+
+
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
{
- Byte symbol;
- rc->Decode(rc, 0, s->Freq);
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update1_0(p);
- return symbol;
+ return sym;
}
+
p->PrevSuccess = 0;
- i = p->MinContext->NumStats - 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
+
do
{
- if ((hiCnt += (++s)->Freq) > count)
+ if ((Int32)(count -= (++s)->Freq) < 0)
{
- Byte symbol;
- rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update1(p);
- return symbol;
+ return sym;
}
}
while (--i);
- if (count >= p->MinContext->SummFreq)
- return -2;
- p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
- rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
+
+ if (hiCnt >= summFreq)
+ return PPMD7_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats - 1;
- do { MASK((--s)->Symbol) = 0; } while (--i);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
+ CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
- if (rc->DecodeBit(rc, *prob) == 0)
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
{
- Byte symbol;
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
- Ppmd7_UpdateBin(p);
- return symbol;
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM_1(R)
+ /* we can use single byte normalization here because of
+ (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
+
+ // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
+ return sym;
}
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(size0);
+
+ R->Code -= size0;
+ R->Range -= size0;
+ RC_NORM_LOCAL(R)
+
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
+
for (;;)
{
- CPpmd_State *ps[256], *s;
+ CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
+
CPpmd_See *see;
- unsigned i, num, numMasked = p->MinContext->NumStats;
+ CPpmd7_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
- return -1;
- p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+ if (!mc->Suffix)
+ return PPMD7_SYM_END;
+ mc = Ppmd7_GetContext(p, mc->Suffix);
}
- while (p->MinContext->NumStats == numMasked);
- hiCnt = 0;
- s = Ppmd7_GetStats(p, p->MinContext);
- i = 0;
- num = p->MinContext->NumStats - numMasked;
- do
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd7_GetStats(p, mc);
+
{
- int k = (int)(MASK(s->Symbol));
- hiCnt += (s->Freq & k);
- ps[i] = s++;
- i -= k;
+ unsigned num = mc->NumStats;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
}
- while (i != num);
-
+
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
- count = rc->GetThreshold(rc, freqSum);
+
+
+
+
+ count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
- Byte symbol;
- CPpmd_State **pps = ps;
- for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
- s = *pps;
- rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ hiCnt = count;
+ // count -= s->Freq & (unsigned)(MASK(s->Symbol));
+ // if ((Int32)count >= 0)
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ };
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update2(p);
- return symbol;
+ return sym;
}
+
if (count >= freqSum)
- return -2;
- rc->Decode(rc, hiCnt, freqSum - hiCnt);
+ return PPMD7_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
- do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
+ }
+}
+
+/*
+Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
+{
+ int sym = 0;
+ if (buf != lim)
+ do
+ {
+ sym = Ppmd7z_DecodeSymbol(p);
+ if (sym < 0)
+ break;
+ *buf = (Byte)sym;
}
+ while (++buf < lim);
+ p->LastSymbol = sym;
+ return buf;
}
+*/
diff --git a/multiarc/src/formats/7z/C/Ppmd7Enc.c b/multiarc/src/formats/7z/C/Ppmd7Enc.c
index 286b8712..62139c5b 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7Enc.c
+++ b/multiarc/src/formats/7z/C/Ppmd7Enc.c
@@ -1,6 +1,8 @@
-/* Ppmd7Enc.c -- PPMdH Encoder
-2017-04-03 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#include "Precomp.h"
@@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p)
+#define R (&p->rc.enc)
+
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
{
- p->Low = 0;
- p->Range = 0xFFFFFFFF;
- p->Cache = 0;
- p->CacheSize = 1;
+ R->Low = 0;
+ R->Range = 0xFFFFFFFF;
+ R->Cache = 0;
+ R->CacheSize = 1;
}
-static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p)
+MY_NO_INLINE
+static void RangeEnc_ShiftLow(CPpmd7 *p)
{
- if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0)
+ if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
{
- Byte temp = p->Cache;
+ Byte temp = R->Cache;
do
{
- IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32)));
+ IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32)));
temp = 0xFF;
}
- while (--p->CacheSize != 0);
- p->Cache = (Byte)((UInt32)p->Low >> 24);
+ while (--R->CacheSize != 0);
+ R->Cache = (Byte)((UInt32)R->Low >> 24);
}
- p->CacheSize++;
- p->Low = (UInt32)p->Low << 8;
+ R->CacheSize++;
+ R->Low = (UInt32)((UInt32)R->Low << 8);
}
-static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total)
-{
- p->Low += start * (p->Range /= total);
- p->Range *= size;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
-}
+#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
+#define RC_NORM_1(p) RC_NORM_BASE(p) }
+#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
-static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0)
-{
- p->Range = (p->Range >> 14) * size0;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+/*
+#define RangeEnc_Encode(p, start, _size_) \
+ { UInt32 size = _size_; \
+ R->Low += start * R->Range; \
+ R->Range *= size; \
+ RC_NORM_LOCAL(p); }
+*/
-static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
{
- UInt32 newBound = (p->Range >> 14) * size0;
- p->Low += newBound;
- p->Range -= newBound;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
+ R->Low += start * R->Range;
+ R->Range *= size;
+ RC_NORM_LOCAL(p);
}
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
{
unsigned i;
for (i = 0; i < 5; i++)
@@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
}
-#define MASK(sym) ((signed char *)charMask)[sym]
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
+#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
+#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+#define SUFFIX(ctx) CTX((ctx)->Suffix)
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+MY_FORCE_INLINE
+static
+void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
+
+
+
+
+ R->Range /= p->MinContext->Union2.SummFreq;
+
if (s->Symbol == symbol)
{
- RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq);
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_EncodeFinal(0, s->Freq);
p->FoundState = s;
Ppmd7_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
- i = p->MinContext->NumStats - 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((++s)->Symbol == symbol)
{
- RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq);
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_EncodeFinal(sum, s->Freq);
p->FoundState = s;
Ppmd7_Update1(p);
return;
@@ -106,82 +125,199 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
sum += s->Freq;
}
while (--i);
+
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
- p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats - 1;
- do { MASK((--s)->Symbol) = 0; } while (--i);
- RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
+ // MASK(s->Symbol) = 0;
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
+ UInt32 pr = *prob;
+ UInt32 bound = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
- RangeEnc_EncodeBit_0(rc, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- p->FoundState = s;
- Ppmd7_UpdateBin(p);
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+ // RangeEnc_EncodeBit_0(p, bound);
+ R->Range = bound;
+ RC_NORM_1(p);
+
+ // p->FoundState = s;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
return;
}
- else
- {
- RangeEnc_EncodeBit_1(rc, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
- PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- p->PrevSuccess = 0;
- }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+ // RangeEnc_EncodeBit_1(p, bound);
+ R->Low += bound;
+ R->Range -= bound;
+ RC_NORM_LOCAL(p)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(s->Symbol) = 0;
+ p->PrevSuccess = 0;
}
+
for (;;)
{
- UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
- UInt32 sum;
- unsigned i, numMasked = p->MinContext->NumStats;
+ UInt32 sum, escFreq;
+ CPpmd7_Context *mc;
+ unsigned i, numMasked;
+
+ RC_NORM_REMOTE(p)
+
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
+ if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
- p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+ mc = Ppmd7_GetContext(p, mc->Suffix);
+ i = mc->NumStats;
}
- while (p->MinContext->NumStats == numMasked);
+ while (i == numMasked);
+
+ p->MinContext = mc;
- see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
- s = Ppmd7_GetStats(p, p->MinContext);
+ // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
+ {
+ if (i != 256)
+ {
+ unsigned nonMasked = i - numMasked;
+ see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ + p->HiBitsFlag
+ + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i)
+ + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i)
+ + 4 * (unsigned)(numMasked > nonMasked);
+ {
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
+ escFreq = r + (r == 0);
+ }
+ }
+ else
+ {
+ see = &p->DummySee;
+ escFreq = 1;
+ }
+ }
+
+ s = Ppmd7_GetStats(p, mc);
sum = 0;
- i = p->MinContext->NumStats;
+ // i = mc->NumStats;
+
do
{
- int cur = s->Symbol;
- if (cur == symbol)
+ unsigned cur = s->Symbol;
+ if ((int)cur == symbol)
{
UInt32 low = sum;
- CPpmd_State *s1 = s;
- do
+ UInt32 freq = s->Freq;
+ unsigned num2;
+
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sum += escFreq;
+
+ num2 = i / 2;
+ i &= 1;
+ sum += freq & (0 - (UInt32)i);
+ if (num2 != 0)
{
- sum += (s->Freq & (int)(MASK(s->Symbol)));
- s++;
+ s += i;
+ for (;;)
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ if (--num2 == 0)
+ break;
+ }
}
- while (--i);
- RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq);
- Ppmd_See_Update(see);
- p->FoundState = s1;
+
+
+ R->Range /= sum;
+ RC_EncodeFinal(low, freq);
Ppmd7_Update2(p);
return;
}
- sum += (s->Freq & (int)(MASK(cur)));
- MASK(cur) = 0;
+ sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
- RangeEnc_Encode(rc, sum, escFreq, sum + escFreq);
- see->Summ = (UInt16)(see->Summ + sum + escFreq);
+ {
+ UInt32 total = sum + escFreq;
+ see->Summ = (UInt16)(see->Summ + total);
+
+ R->Range /= total;
+ RC_Encode(sum, escFreq);
+ }
+
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ s--;
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
+ }
+}
+
+
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
+{
+ for (; buf < lim; buf++)
+ {
+ Ppmd7z_EncodeSymbol(p, *buf);
}
}
diff --git a/multiarc/src/formats/7z/C/Ppmd7aDec.c b/multiarc/src/formats/7z/C/Ppmd7aDec.c
new file mode 100755
index 00000000..c4245784
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Ppmd7aDec.c
@@ -0,0 +1,279 @@
+/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain
+ Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
+
+#include "Precomp.h"
+
+#include "Ppmd7.h"
+
+#define kTop (1 << 24)
+#define kBot (1 << 15)
+
+#define READ_BYTE(p) IByteIn_Read((p)->Stream)
+
+BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
+{
+ unsigned i;
+ p->Code = 0;
+ p->Range = 0xFFFFFFFF;
+ p->Low = 0;
+
+ for (i = 0; i < 4; i++)
+ p->Code = (p->Code << 8) | READ_BYTE(p);
+ return (p->Code < 0xFFFFFFFF);
+}
+
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
+ p->Code = (p->Code << 8) | READ_BYTE(p); \
+ p->Range <<= 8; p->Low <<= 8; }
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+#define R (&p->rc.dec)
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
+{
+ start *= R->Range;
+ R->Low += start;
+ R->Code -= start;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
+}
+
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd7_UpdateModel(CPpmd7 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+
+int Ppmd7a_DecodeSymbol(CPpmd7 *p)
+{
+ size_t charMask[256 / sizeof(size_t)];
+
+ if (p->MinContext->NumStats != 1)
+ {
+ CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
+ unsigned i;
+ UInt32 count, hiCnt;
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ if (summFreq > R->Range)
+ return PPMD7_SYM_ERROR;
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
+ {
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update1_0(p);
+ return sym;
+ }
+
+ p->PrevSuccess = 0;
+ i = (unsigned)p->MinContext->NumStats - 1;
+
+ do
+ {
+ if ((Int32)(count -= (++s)->Freq) < 0)
+ {
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update1(p);
+ return sym;
+ }
+ }
+ while (--i);
+
+ if (hiCnt >= summFreq)
+ return PPMD7_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
+ }
+ else
+ {
+ CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
+ UInt16 *prob = Ppmd7_GetBinSumm(p);
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
+ {
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM(R)
+
+
+
+ // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
+ return sym;
+ }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(size0);
+ R->Low += size0;
+ R->Code -= size0;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
+ RC_NORM_LOCAL(R)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
+ p->PrevSuccess = 0;
+ }
+
+ for (;;)
+ {
+ CPpmd_State *s, *s2;
+ UInt32 freqSum, count, hiCnt;
+
+ CPpmd_See *see;
+ CPpmd7_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
+ do
+ {
+ p->OrderFall++;
+ if (!mc->Suffix)
+ return PPMD7_SYM_END;
+ mc = Ppmd7_GetContext(p, mc->Suffix);
+ }
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd7_GetStats(p, mc);
+
+ {
+ unsigned num = mc->NumStats;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
+ }
+
+ see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
+ freqSum += hiCnt;
+
+ if (freqSum > R->Range)
+ return PPMD7_SYM_ERROR;
+
+ count = RC_GetThreshold(freqSum);
+
+ if (count < hiCnt)
+ {
+ Byte sym;
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ hiCnt = count;
+ // count -= s->Freq & (unsigned)(MASK(s->Symbol));
+ // if ((Int32)count >= 0)
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ };
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update2(p);
+ return sym;
+ }
+
+ if (count >= freqSum)
+ return PPMD7_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
+ see->Summ = (UInt16)(see->Summ + freqSum);
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
+ }
+}
diff --git a/multiarc/src/formats/7z/C/Ppmd8.c b/multiarc/src/formats/7z/C/Ppmd8.c
index 58141633..fda8b88a 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8.c
+++ b/multiarc/src/formats/7z/C/Ppmd8.c
@@ -1,5 +1,5 @@
/* Ppmd8.c -- PPMdI codec
-2018-07-04 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,7 +8,12 @@ This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
#include "Ppmd8.h"
-const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+
+
+
+MY_ALIGN(16)
+static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
@@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
-#define I2U(indx) (p->Indx2Units[indx])
+#define I2U(indx) ((unsigned)p->Indx2Units[indx])
-#ifdef PPMD_32BIT
- #define REF(ptr) (ptr)
-#else
- #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
-#endif
+
+#define REF(ptr) Ppmd_GetRef(p, ptr)
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
@@ -35,34 +37,23 @@ typedef CPpmd8_Context * CTX_PTR;
struct CPpmd8_Node_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd8_Node_ *
- #else
- UInt32
- #endif
- CPpmd8_Node_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd8_Node_) CPpmd8_Node_Ref;
typedef struct CPpmd8_Node_
{
UInt32 Stamp;
+
CPpmd8_Node_Ref Next;
UInt32 NU;
} CPpmd8_Node;
-#ifdef PPMD_32BIT
- #define NODE(ptr) (ptr)
-#else
- #define NODE(offs) ((CPpmd8_Node *)(p->Base + (offs)))
-#endif
-
-#define EMPTY_NODE 0xFFFFFFFF
+#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd8_Node)
void Ppmd8_Construct(CPpmd8 *p)
{
unsigned i, k, m;
- p->Base = 0;
+ p->Base = NULL;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
@@ -78,39 +69,51 @@ void Ppmd8_Construct(CPpmd8 *p)
for (i = 0; i < 5; i++)
p->NS2Indx[i] = (Byte)i;
+
for (m = i, k = 1; i < 260; i++)
{
p->NS2Indx[i] = (Byte)m;
if (--k == 0)
k = (++m) - 4;
}
+
+ memcpy(p->ExpEscape, PPMD8_kExpEscape, 16);
}
+
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
- p->Base = 0;
+ p->Base = NULL;
}
+
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
Ppmd8_Free(p, alloc);
- p->AlignOffset =
- #ifdef PPMD_32BIT
- (4 - size) & 3;
- #else
- 4 - (size & 3);
- #endif
- if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == 0)
+ p->AlignOffset = (4 - size) & 3;
+ if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
return False;
p->Size = size;
}
return True;
}
+
+
+// ---------- Internal Memory Allocator ----------
+
+
+
+
+
+
+#define EMPTY_NODE 0xFFFFFFFF
+
+
static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
{
((CPpmd8_Node *)node)->Stamp = EMPTY_NODE;
@@ -120,14 +123,17 @@ static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
p->Stamps[indx]++;
}
+
static void *RemoveNode(CPpmd8 *p, unsigned indx)
{
CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]);
p->FreeList[indx] = node->Next;
p->Stamps[indx]--;
+
return node;
}
+
static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
@@ -140,51 +146,96 @@ static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
InsertNode(p, ptr, i);
}
+
+
+
+
+
+
+
+
+
+
+
+
+
static void GlueFreeBlocks(CPpmd8 *p)
{
- CPpmd8_Node_Ref head = 0;
- CPpmd8_Node_Ref *prev = &head;
- unsigned i;
+ /*
+ we use first UInt32 field of 12-bytes UNITs as record type stamp
+ CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0xFF
+ CPpmd8_Context { Byte NumStats; Byte Flags; UInt16 SummFreq; : Flags != 0xFF ???
+ CPpmd8_Node { UInt32 Stamp : Stamp == 0xFFFFFFFF for free record
+ : Stamp == 0 for guard
+ Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd8_Context record
+ */
+ CPpmd8_Node_Ref n;
p->GlueCount = 1 << 13;
memset(p->Stamps, 0, sizeof(p->Stamps));
- /* Order-0 context is always at top UNIT, so we don't need guard NODE at the end.
- All blocks up to p->LoUnit can be free, so we need guard NODE at LoUnit. */
+ /* we set guard NODE at LoUnit */
if (p->LoUnit != p->HiUnit)
- ((CPpmd8_Node *)p->LoUnit)->Stamp = 0;
+ ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0;
- /* Glue free blocks */
- for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i];
- p->FreeList[i] = 0;
- while (next != 0)
+ /* Glue free blocks */
+ CPpmd8_Node_Ref *prev = &n;
+ unsigned i;
+ for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node *node = NODE(next);
- if (node->NU != 0)
+
+ CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i];
+ p->FreeList[i] = 0;
+ while (next != 0)
{
- CPpmd8_Node *node2;
+ CPpmd8_Node *node = NODE(next);
+ UInt32 nu = node->NU;
*prev = next;
- prev = &(node->Next);
- while ((node2 = node + node->NU)->Stamp == EMPTY_NODE)
+ next = node->Next;
+ if (nu != 0)
{
- node->NU += node2->NU;
- node2->NU = 0;
+ CPpmd8_Node *node2;
+ prev = &(node->Next);
+ while ((node2 = node + nu)->Stamp == EMPTY_NODE)
+ {
+ nu += node2->NU;
+ node2->NU = 0;
+ node->NU = nu;
+ }
}
}
- next = node->Next;
}
+
+ *prev = 0;
}
- *prev = 0;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
/* Fill lists of free blocks */
- while (head != 0)
+ while (n != 0)
{
- CPpmd8_Node *node = NODE(head);
- unsigned nu;
- head = node->Next;
- nu = node->NU;
+ CPpmd8_Node *node = NODE(n);
+ UInt32 nu = node->NU;
+ unsigned i;
+ n = node->Next;
if (nu == 0)
continue;
for (; nu > 128; nu -= 128, node += 128)
@@ -192,57 +243,70 @@ static void GlueFreeBlocks(CPpmd8 *p)
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
- InsertNode(p, node + k, nu - k - 1);
+ InsertNode(p, node + k, (unsigned)nu - k - 1);
}
InsertNode(p, node, i);
}
}
+
+MY_NO_INLINE
static void *AllocUnitsRare(CPpmd8 *p, unsigned indx)
{
unsigned i;
- void *retVal;
+
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
+
i = indx;
+
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
+ Byte *us = p->UnitsStart;
p->GlueCount--;
- return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+ return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : (NULL);
}
}
while (p->FreeList[i] == 0);
- retVal = RemoveNode(p, i);
- SplitBlock(p, retVal, i, indx);
- return retVal;
+
+ {
+ void *block = RemoveNode(p, i);
+ SplitBlock(p, block, i, indx);
+ return block;
+ }
}
+
static void *AllocUnits(CPpmd8 *p, unsigned indx)
{
- UInt32 numBytes;
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
- numBytes = U2B(I2U(indx));
- if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
{
- void *retVal = p->LoUnit;
- p->LoUnit += numBytes;
- return retVal;
+ UInt32 numBytes = U2B(I2U(indx));
+ Byte *lo = p->LoUnit;
+ if ((UInt32)(p->HiUnit - lo) >= numBytes)
+ {
+ p->LoUnit = lo + numBytes;
+ return lo;
+ }
}
return AllocUnitsRare(p, indx);
}
+
#define MyMem12Cpy(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
+
+
static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
@@ -260,11 +324,13 @@ static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU
return oldPtr;
}
+
static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu)
{
InsertNode(p, ptr, U2I(nu));
}
+
static void SpecialFreeUnit(CPpmd8 *p, void *ptr)
{
if ((Byte *)ptr != p->UnitsStart)
@@ -272,77 +338,91 @@ static void SpecialFreeUnit(CPpmd8 *p, void *ptr)
else
{
#ifdef PPMD8_FREEZE_SUPPORT
- *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts */
+ *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts() */
#endif
p->UnitsStart += UNIT_SIZE;
}
}
+
+/*
static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu)
{
unsigned indx = U2I(nu);
void *ptr;
- if ((Byte *)oldPtr > p->UnitsStart + 16 * 1024 || REF(oldPtr) > p->FreeList[indx])
+ if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx])
return oldPtr;
ptr = RemoveNode(p, indx);
MyMem12Cpy(ptr, oldPtr, nu);
- if ((Byte*)oldPtr != p->UnitsStart)
+ if ((Byte *)oldPtr != p->UnitsStart)
InsertNode(p, oldPtr, indx);
else
p->UnitsStart += U2B(I2U(indx));
return ptr;
}
+*/
static void ExpandTextArea(CPpmd8 *p)
{
UInt32 count[PPMD_NUM_INDEXES];
unsigned i;
+
memset(count, 0, sizeof(count));
if (p->LoUnit != p->HiUnit)
- ((CPpmd8_Node *)p->LoUnit)->Stamp = 0;
+ ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0;
{
- CPpmd8_Node *node = (CPpmd8_Node *)p->UnitsStart;
- for (; node->Stamp == EMPTY_NODE; node += node->NU)
+ CPpmd8_Node *node = (CPpmd8_Node *)(void *)p->UnitsStart;
+ while (node->Stamp == EMPTY_NODE)
{
+ UInt32 nu = node->NU;
node->Stamp = 0;
- count[U2I(node->NU)]++;
+ count[U2I(nu)]++;
+ node += nu;
}
p->UnitsStart = (Byte *)node;
}
for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node_Ref *next = (CPpmd8_Node_Ref *)&p->FreeList[i];
- while (count[i] != 0)
+ UInt32 cnt = count[i];
+ if (cnt == 0)
+ continue;
{
- CPpmd8_Node *node = NODE(*next);
- while (node->Stamp == 0)
+ CPpmd8_Node_Ref *prev = (CPpmd8_Node_Ref *)&p->FreeList[i];
+ CPpmd8_Node_Ref n = *prev;
+ p->Stamps[i] -= cnt;
+ for (;;)
{
- *next = node->Next;
- node = NODE(*next);
- p->Stamps[i]--;
- if (--count[i] == 0)
+ CPpmd8_Node *node = NODE(n);
+ n = node->Next;
+ if (node->Stamp != 0)
+ {
+ prev = &node->Next;
+ continue;
+ }
+ *prev = n;
+ if (--cnt == 0)
break;
}
- next = &node->Next;
}
}
}
-#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
- (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
- (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+ Ppmd_SET_SUCCESSOR(p, v);
}
#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); }
-static void RestartModel(CPpmd8 *p)
+MY_NO_INLINE
+static
+void RestartModel(CPpmd8 *p)
{
- unsigned i, k, m, r;
+ unsigned i, k, m;
memset(p->FreeList, 0, sizeof(p->FreeList));
memset(p->Stamps, 0, sizeof(p->Stamps));
@@ -355,30 +435,47 @@ static void RestartModel(CPpmd8 *p)
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
- p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
- p->MinContext->Suffix = 0;
- p->MinContext->NumStats = 255;
- p->MinContext->Flags = 0;
- p->MinContext->SummFreq = 256 + 1;
- p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
- p->LoUnit += U2B(256 / 2);
- p->MinContext->Stats = REF(p->FoundState);
- for (i = 0; i < 256; i++)
{
- CPpmd_State *s = &p->FoundState[i];
- s->Symbol = (Byte)i;
- s->Freq = 1;
- SetSuccessor(s, 0);
+ CPpmd8_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+ CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+
+ p->LoUnit += U2B(256 / 2);
+ p->MaxContext = p->MinContext = mc;
+ p->FoundState = s;
+ mc->Flags = 0;
+ mc->NumStats = 256 - 1;
+ mc->Union2.SummFreq = 256 + 1;
+ mc->Union4.Stats = REF(s);
+ mc->Suffix = 0;
+
+ for (i = 0; i < 256; i++, s++)
+ {
+ s->Symbol = (Byte)i;
+ s->Freq = 1;
+ SetSuccessor(s, 0);
+ }
}
+
+
+
+
+
+
+
+
+
+
+
for (i = m = 0; m < 25; m++)
{
while (p->NS2Indx[i] == m)
i++;
for (k = 0; k < 8; k++)
{
- UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1));
+ unsigned r;
UInt16 *dest = p->BinSumm[m] + k;
+ UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1));
for (r = 0; r < 64; r += 8)
dest[r] = val;
}
@@ -386,50 +483,104 @@ static void RestartModel(CPpmd8 *p)
for (i = m = 0; m < 24; m++)
{
+ unsigned summ;
+ CPpmd_See *s;
while (p->NS2Indx[(size_t)i + 3] == m + 3)
i++;
- for (k = 0; k < 32; k++)
+ s = p->See[m];
+ summ = ((2 * i + 5) << (PPMD_PERIOD_BITS - 4));
+ for (k = 0; k < 32; k++, s++)
{
- CPpmd_See *s = &p->See[m][k];
- s->Summ = (UInt16)((2 * i + 5) << (s->Shift = PPMD_PERIOD_BITS - 4));
+ s->Summ = (UInt16)summ;
+ s->Shift = (PPMD_PERIOD_BITS - 4);
s->Count = 7;
}
}
+
+ p->DummySee.Summ = 0; /* unused */
+ p->DummySee.Shift = PPMD_PERIOD_BITS;
+ p->DummySee.Count = 64; /* unused */
}
+
void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod)
{
p->MaxOrder = maxOrder;
p->RestoreMethod = restoreMethod;
RestartModel(p);
- p->DummySee.Shift = PPMD_PERIOD_BITS;
- p->DummySee.Summ = 0; /* unused */
- p->DummySee.Count = 64; /* unused */
}
+
+#define FLAG_RESCALED (1 << 2)
+// #define FLAG_SYM_HIGH (1 << 3)
+#define FLAG_PREV_HIGH (1 << 4)
+
+#define HiBits_Prepare(sym) ((unsigned)(sym) + 0xC0)
+
+#define HiBits_Convert_3(flags) (((flags) >> (8 - 3)) & (1 << 3))
+#define HiBits_Convert_4(flags) (((flags) >> (8 - 4)) & (1 << 4))
+
+#define PPMD8_HiBitsFlag_3(sym) HiBits_Convert_3(HiBits_Prepare(sym))
+#define PPMD8_HiBitsFlag_4(sym) HiBits_Convert_4(HiBits_Prepare(sym))
+
+// #define PPMD8_HiBitsFlag_3(sym) (0x08 * ((sym) >= 0x40))
+// #define PPMD8_HiBitsFlag_4(sym) (0x10 * ((sym) >= 0x40))
+
+/*
+Refresh() is called when we remove some symbols (successors) in context.
+It increases Escape_Freq for sum of all removed symbols.
+*/
+
static void Refresh(CPpmd8 *p, CTX_PTR ctx, unsigned oldNU, unsigned scale)
{
unsigned i = ctx->NumStats, escFreq, sumFreq, flags;
CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1);
- ctx->Stats = REF(s);
- #ifdef PPMD8_FREEZE_SUPPORT
- /* fixed over Shkarin's code. Fixed code is not compatible with original code for some files in FREEZE mode. */
- scale |= (ctx->SummFreq >= ((UInt32)1 << 15));
- #endif
- flags = (ctx->Flags & (0x10 + 0x04 * scale)) + 0x08 * (s->Symbol >= 0x40);
- escFreq = ctx->SummFreq - s->Freq;
- sumFreq = (s->Freq = (Byte)((s->Freq + scale) >> scale));
+ ctx->Union4.Stats = REF(s);
+
+ // #ifdef PPMD8_FREEZE_SUPPORT
+ /*
+ (ctx->Union2.SummFreq >= ((UInt32)1 << 15)) can be in FREEZE mode for some files.
+ It's not good for range coder. So new versions of support fix:
+ - original PPMdI code rev.1
+ + original PPMdI code rev.2
+ - 7-Zip default ((PPMD8_FREEZE_SUPPORT is not defined)
+ + 7-Zip (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE)
+ if we use that fixed line, we can lose compatibility with some files created before fix
+ if we don't use that fixed line, the program can work incorrectly in FREEZE mode in rare case.
+ */
+ // if (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE)
+ {
+ scale |= (ctx->Union2.SummFreq >= ((UInt32)1 << 15));
+ }
+ // #endif
+
+
+
+ flags = HiBits_Prepare(s->Symbol);
+ {
+ unsigned freq = s->Freq;
+ escFreq = ctx->Union2.SummFreq - freq;
+ freq = (freq + scale) >> scale;
+ sumFreq = freq;
+ s->Freq = (Byte)freq;
+ }
+
do
{
- escFreq -= (++s)->Freq;
- sumFreq += (s->Freq = (Byte)((s->Freq + scale) >> scale));
- flags |= 0x08 * (s->Symbol >= 0x40);
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + scale) >> scale;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ flags |= HiBits_Prepare(s->Symbol);
}
while (--i);
- ctx->SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale));
- ctx->Flags = (Byte)flags;
+
+ ctx->Union2.SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale));
+ ctx->Flags = (Byte)((ctx->Flags & (FLAG_PREV_HIGH + FLAG_RESCALED * scale)) + HiBits_Convert_3(flags));
}
+
static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
{
CPpmd_State tmp = *t1;
@@ -437,98 +588,169 @@ static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
*t2 = tmp;
}
+
+/*
+CutOff() reduces contexts:
+ It conversts Successors at MaxOrder to another Contexts to NULL-Successors
+ It removes RAW-Successors and NULL-Successors that are not Order-0
+ and it removes contexts when it has no Successors.
+ if the (Union4.Stats) is close to (UnitsStart), it moves it up.
+*/
+
static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
{
- int i;
- unsigned tmp;
- CPpmd_State *s;
+ int ns = ctx->NumStats;
+ unsigned nu;
+ CPpmd_State *stats;
- if (!ctx->NumStats)
+ if (ns == 0)
{
- s = ONE_STATE(ctx);
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart)
+ CPpmd_State *s = ONE_STATE(ctx);
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart)
{
if (order < p->MaxOrder)
- SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1));
+ successor = CutOff(p, CTX(successor), order + 1);
else
- SetSuccessor(s, 0);
- if (SUCCESSOR(s) || order <= 9) /* O_BOUND */
+ successor = 0;
+ SetSuccessor(s, successor);
+ if (successor || order <= 9) /* O_BOUND */
return REF(ctx);
}
SpecialFreeUnit(p, ctx);
return 0;
}
- ctx->Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), tmp = ((unsigned)ctx->NumStats + 2) >> 1));
+ nu = ((unsigned)ns + 2) >> 1;
+ // ctx->Union4.Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), nu));
+ {
+ unsigned indx = U2I(nu);
+ stats = STATS(ctx);
- for (s = STATS(ctx) + (i = ctx->NumStats); s >= STATS(ctx); s--)
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) < p->UnitsStart)
+ if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14)
+ && (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx])
{
- CPpmd_State *s2 = STATS(ctx) + (i--);
- SetSuccessor(s, 0);
- SwapStates(s, s2);
+ void *ptr = RemoveNode(p, indx);
+ ctx->Union4.Stats = STATS_REF(ptr);
+ MyMem12Cpy(ptr, (const void *)stats, nu);
+ if ((Byte *)stats != p->UnitsStart)
+ InsertNode(p, stats, indx);
+ else
+ p->UnitsStart += U2B(I2U(indx));
+ stats = ptr;
}
- else if (order < p->MaxOrder)
- SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1));
- else
- SetSuccessor(s, 0);
-
- if (i != ctx->NumStats && order)
+ }
+
+ {
+ CPpmd_State *s = stats + (unsigned)ns;
+ do
+ {
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) < p->UnitsStart)
+ {
+ CPpmd_State *s2 = stats + (unsigned)(ns--);
+ if (order)
+ {
+ if (s != s2)
+ *s = *s2;
+ }
+ else
+ {
+ SwapStates(s, s2);
+ SetSuccessor(s2, 0);
+ }
+ }
+ else
+ {
+ if (order < p->MaxOrder)
+ SetSuccessor(s, CutOff(p, CTX(successor), order + 1));
+ else
+ SetSuccessor(s, 0);
+ }
+ }
+ while (--s >= stats);
+ }
+
+ if (ns != ctx->NumStats && order)
{
- ctx->NumStats = (Byte)i;
- s = STATS(ctx);
- if (i < 0)
+ if (ns < 0)
{
- FreeUnits(p, s, tmp);
+ FreeUnits(p, stats, nu);
SpecialFreeUnit(p, ctx);
return 0;
}
- if (i == 0)
+ ctx->NumStats = (Byte)ns;
+ if (ns == 0)
{
- ctx->Flags = (Byte)((ctx->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40));
- *ONE_STATE(ctx) = *s;
- FreeUnits(p, s, tmp);
- /* 9.31: the code was fixed. It's was not BUG, if Freq <= MAX_FREQ = 124 */
- ONE_STATE(ctx)->Freq = (Byte)(((unsigned)ONE_STATE(ctx)->Freq + 11) >> 3);
+ const Byte sym = stats->Symbol;
+ ctx->Flags = (Byte)((ctx->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(sym));
+ // *ONE_STATE(ctx) = *stats;
+ ctx->Union2.State2.Symbol = sym;
+ ctx->Union2.State2.Freq = (Byte)(((unsigned)stats->Freq + 11) >> 3);
+ ctx->Union4.State4.Successor_0 = stats->Successor_0;
+ ctx->Union4.State4.Successor_1 = stats->Successor_1;
+ FreeUnits(p, stats, nu);
}
else
- Refresh(p, ctx, tmp, ctx->SummFreq > 16 * i);
+ {
+ Refresh(p, ctx, nu, ctx->Union2.SummFreq > 16 * (unsigned)ns);
+ }
}
+
return REF(ctx);
}
+
+
#ifdef PPMD8_FREEZE_SUPPORT
+
+/*
+RemoveBinContexts()
+ It conversts Successors at MaxOrder to another Contexts to NULL-Successors
+ It changes RAW-Successors to NULL-Successors
+ removes Bin Context without Successor, if suffix of that context is also binary.
+*/
+
static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order)
{
- CPpmd_State *s;
if (!ctx->NumStats)
{
- s = ONE_STATE(ctx);
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder)
- SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1));
+ CPpmd_State *s = ONE_STATE(ctx);
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder)
+ successor = RemoveBinContexts(p, CTX(successor), order + 1);
else
- SetSuccessor(s, 0);
+ successor = 0;
+ SetSuccessor(s, successor);
/* Suffix context can be removed already, since different (high-order)
Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */
- if (!SUCCESSOR(s) && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF))
+ if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF))
{
FreeUnits(p, ctx, 1);
return 0;
}
- else
- return REF(ctx);
}
-
- for (s = STATS(ctx) + ctx->NumStats; s >= STATS(ctx); s--)
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder)
- SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1));
- else
- SetSuccessor(s, 0);
+ else
+ {
+ CPpmd_State *s = STATS(ctx) + ctx->NumStats;
+ do
+ {
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder)
+ SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1));
+ else
+ SetSuccessor(s, 0);
+ }
+ while (--s >= STATS(ctx));
+ }
return REF(ctx);
}
+
#endif
+
+
static UInt32 GetUsedMemory(const CPpmd8 *p)
{
UInt32 v = 0;
@@ -544,7 +766,8 @@ static UInt32 GetUsedMemory(const CPpmd8 *p)
#define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1)
#endif
-static void RestoreModel(CPpmd8 *p, CTX_PTR c1
+
+static void RestoreModel(CPpmd8 *p, CTX_PTR ctxError
#ifdef PPMD8_FREEZE_SUPPORT
, CTX_PTR fSuccessor
#endif
@@ -553,36 +776,55 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR c1
CTX_PTR c;
CPpmd_State *s;
RESET_TEXT(0);
- for (c = p->MaxContext; c != c1; c = SUFFIX(c))
+
+ // we go here in cases of error of allocation for context (c1)
+ // Order(MinContext) < Order(ctxError) <= Order(MaxContext)
+
+ // We remove last symbol from each of contexts [p->MaxContext ... ctxError) contexts
+ // So we rollback all created (symbols) before error.
+ for (c = p->MaxContext; c != ctxError; c = SUFFIX(c))
if (--(c->NumStats) == 0)
{
s = STATS(c);
- c->Flags = (Byte)((c->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40));
- *ONE_STATE(c) = *s;
+ c->Flags = (Byte)((c->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(s->Symbol));
+ // *ONE_STATE(c) = *s;
+ c->Union2.State2.Symbol = s->Symbol;
+ c->Union2.State2.Freq = (Byte)(((unsigned)s->Freq + 11) >> 3);
+ c->Union4.State4.Successor_0 = s->Successor_0;
+ c->Union4.State4.Successor_1 = s->Successor_1;
+
SpecialFreeUnit(p, s);
- ONE_STATE(c)->Freq = (Byte)(((unsigned)ONE_STATE(c)->Freq + 11) >> 3);
}
else
- Refresh(p, c, (c->NumStats+3) >> 1, 0);
+ {
+ /* Refresh() can increase Escape_Freq on value of Freq of last symbol, that was added before error.
+ so the largest possible increase for Escape_Freq is (8) from value before ModelUpoadet() */
+ Refresh(p, c, ((unsigned)c->NumStats + 3) >> 1, 0);
+ }
+ // increase Escape Freq for context [ctxError ... p->MinContext)
for (; c != p->MinContext; c = SUFFIX(c))
- if (!c->NumStats)
- ONE_STATE(c)->Freq = (Byte)(ONE_STATE(c)->Freq - (ONE_STATE(c)->Freq >> 1));
- else if ((c->SummFreq += 4) > 128 + 4 * c->NumStats)
- Refresh(p, c, (c->NumStats + 2) >> 1, 1);
+ if (c->NumStats == 0)
+ {
+ // ONE_STATE(c)
+ c->Union2.State2.Freq = (Byte)(((unsigned)c->Union2.State2.Freq + 1) >> 1);
+ }
+ else if ((c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 4)) > 128 + 4 * c->NumStats)
+ Refresh(p, c, ((unsigned)c->NumStats + 2) >> 1, 1);
#ifdef PPMD8_FREEZE_SUPPORT
if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
{
p->MaxContext = fSuccessor;
- p->GlueCount += !(p->Stamps[1] & 1);
+ p->GlueCount += !(p->Stamps[1] & 1); // why?
}
else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE)
{
while (p->MaxContext->Suffix)
p->MaxContext = SUFFIX(p->MaxContext);
RemoveBinContexts(p, p->MaxContext, 0);
- p->RestoreMethod++;
+ // we change the current mode to (PPMD8_RESTORE_METHOD_FREEZE + 1)
+ p->RestoreMethod = PPMD8_RESTORE_METHOD_FREEZE + 1;
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
}
@@ -603,16 +845,19 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR c1
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
}
+ p->MinContext = p->MaxContext;
}
+
+
+MY_NO_INLINE
static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PTR c)
{
- CPpmd_State upState;
- Byte flags;
+
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
- /* fixed over Shkarin's code. Maybe it could work without + 1 too. */
- CPpmd_State *ps[PPMD8_MAX_ORDER + 1];
+ Byte newSym, newFreq, flags;
unsigned numPs = 0;
+ CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; /* fixed over Shkarin's code. Maybe it could work without + 1 too. */
if (!skip)
ps[numPs++] = p->FoundState;
@@ -622,19 +867,13 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
- if (s1)
- {
- s = s1;
- s1 = NULL;
- }
+
+ if (s1) { s = s1; s1 = NULL; }
else if (c->NumStats != 0)
{
- for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
- if (s->Freq < MAX_FREQ - 9)
- {
- s->Freq++;
- c->SummFreq++;
- }
+ Byte sym = p->FoundState->Symbol;
+ for (s = STATS(c); s->Symbol != sym; s++);
+ if (s->Freq < MAX_FREQ - 9) { s->Freq++; c->Union2.SummFreq++; }
}
else
{
@@ -644,36 +883,54 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
successor = SUCCESSOR(s);
if (successor != upBranch)
{
+
c = CTX(successor);
if (numPs == 0)
+ {
+
+
return c;
+ }
break;
}
ps[numPs++] = s;
}
- upState.Symbol = *(const Byte *)Ppmd8_GetPtr(p, upBranch);
- SetSuccessor(&upState, upBranch + 1);
- flags = (Byte)(0x10 * (p->FoundState->Symbol >= 0x40) + 0x08 * (upState.Symbol >= 0x40));
-
+
+
+
+
+ newSym = *(const Byte *)Ppmd8_GetPtr(p, upBranch);
+ upBranch++;
+ flags = (Byte)(PPMD8_HiBitsFlag_4(p->FoundState->Symbol) + PPMD8_HiBitsFlag_3(newSym));
+
if (c->NumStats == 0)
- upState.Freq = ONE_STATE(c)->Freq;
+ newFreq = c->Union2.State2.Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
- for (s = STATS(c); s->Symbol != upState.Symbol; s++);
- cf = s->Freq - 1;
- s0 = c->SummFreq - c->NumStats - cf;
- upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0)));
+ for (s = STATS(c); s->Symbol != newSym; s++);
+ cf = (UInt32)s->Freq - 1;
+ s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
+ /*
+
+
+ max(newFreq)= (s->Freq - 1), when (s0 == 1)
+
+
+ */
+ newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0)));
}
+
+
do
{
- /* Create Child */
- CTX_PTR c1; /* = AllocContext(p); */
+ CTX_PTR c1;
+ /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
- c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+ c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
@@ -682,9 +939,11 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
if (!c1)
return NULL;
}
- c1->NumStats = 0;
c1->Flags = flags;
- *ONE_STATE(c1) = upState;
+ c1->NumStats = 0;
+ c1->Union2.State2.Symbol = newSym;
+ c1->Union2.State2.Freq = newFreq;
+ SetSuccessor(ONE_STATE(c1), upBranch);
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
@@ -694,6 +953,7 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
return c;
}
+
static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
{
CPpmd_State *s = NULL;
@@ -739,8 +999,8 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
do { s++; } while (s->Symbol != p->FoundState->Symbol);
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
else
@@ -776,33 +1036,42 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
p->FoundState = s;
successor = CreateSuccessors(p, False, NULL, c);
- if (successor == NULL)
+ if (!successor)
SetSuccessor(s, 0);
else
SetSuccessor(s, REF(successor));
p->FoundState = s2;
}
- if (p->OrderFall == 1 && c1 == p->MaxContext)
{
- SetSuccessor(p->FoundState, SUCCESSOR(s));
- p->Text--;
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if (p->OrderFall == 1 && c1 == p->MaxContext)
+ {
+ SetSuccessor(p->FoundState, successor);
+ p->Text--;
+ }
+ if (successor == 0)
+ return NULL;
+ return CTX(successor);
}
- if (SUCCESSOR(s) == 0)
- return NULL;
- return CTX(SUCCESSOR(s));
}
-static void UpdateModel(CPpmd8 *p)
+
+
+void Ppmd8_UpdateModel(CPpmd8 *p);
+MY_NO_INLINE
+void Ppmd8_UpdateModel(CPpmd8 *p)
{
- CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
+ CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState);
CTX_PTR c;
unsigned s0, ns, fFreq = p->FoundState->Freq;
Byte flag, fSymbol = p->FoundState->Symbol;
+ {
CPpmd_State *s = NULL;
-
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
+ /* Update Freqs in Suffix Context */
+
c = SUFFIX(p->MinContext);
if (c->NumStats == 0)
@@ -813,91 +1082,134 @@ static void UpdateModel(CPpmd8 *p)
}
else
{
+ Byte sym = p->FoundState->Symbol;
s = STATS(c);
- if (s->Symbol != p->FoundState->Symbol)
+
+ if (s->Symbol != sym)
{
- do { s++; } while (s->Symbol != p->FoundState->Symbol);
+ do
+ {
+
+ s++;
+ }
+ while (s->Symbol != sym);
+
if (s[0].Freq >= s[-1].Freq)
{
SwapStates(&s[0], &s[-1]);
s--;
}
}
+
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
}
c = p->MaxContext;
- if (p->OrderFall == 0 && fSuccessor)
+ if (p->OrderFall == 0 && minSuccessor)
{
CTX_PTR cs = CreateSuccessors(p, True, s, p->MinContext);
- if (cs == 0)
+ if (!cs)
{
SetSuccessor(p->FoundState, 0);
- RESTORE_MODEL(c, CTX(fSuccessor));
- }
- else
- {
- SetSuccessor(p->FoundState, REF(cs));
- p->MaxContext = cs;
+ RESTORE_MODEL(c, CTX(minSuccessor));
+ return;
}
+ SetSuccessor(p->FoundState, REF(cs));
+ p->MinContext = p->MaxContext = cs;
return;
}
- *p->Text++ = p->FoundState->Symbol;
- successor = REF(p->Text);
- if (p->Text >= p->UnitsStart)
+
+
+
{
- RESTORE_MODEL(c, CTX(fSuccessor)); /* check it */
- return;
+ Byte *text = p->Text;
+ *text++ = p->FoundState->Symbol;
+ p->Text = text;
+ if (text >= p->UnitsStart)
+ {
+ RESTORE_MODEL(c, CTX(minSuccessor)); /* check it */
+ return;
+ }
+ maxSuccessor = REF(text);
}
-
- if (!fSuccessor)
+
+ if (!minSuccessor)
{
CTX_PTR cs = ReduceOrder(p, s, p->MinContext);
- if (cs == NULL)
+ if (!cs)
{
- RESTORE_MODEL(c, 0);
+ RESTORE_MODEL(c, NULL);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
- else if ((Byte *)Ppmd8_GetPtr(p, fSuccessor) < p->UnitsStart)
+ else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart)
{
CTX_PTR cs = CreateSuccessors(p, False, s, p->MinContext);
- if (cs == NULL)
+ if (!cs)
{
- RESTORE_MODEL(c, 0);
+ RESTORE_MODEL(c, NULL);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
if (--p->OrderFall == 0)
{
- successor = fSuccessor;
+ maxSuccessor = minSuccessor;
p->Text -= (p->MaxContext != p->MinContext);
}
#ifdef PPMD8_FREEZE_SUPPORT
else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
{
- successor = fSuccessor;
+ maxSuccessor = minSuccessor;
RESET_TEXT(0);
p->OrderFall = 0;
}
#endif
+ }
+
+
+
+
+
+
+
+
+
+
- s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - fFreq;
- flag = (Byte)(0x08 * (fSymbol >= 0x40));
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ flag = (Byte)(PPMD8_HiBitsFlag_3(fSymbol));
+ s0 = p->MinContext->Union2.SummFreq - (ns = p->MinContext->NumStats) - fFreq;
for (; c != p->MinContext; c = SUFFIX(c))
{
unsigned ns1;
- UInt32 cf, sf;
+ UInt32 sum;
+
if ((ns1 = c->NumStats) != 0)
{
if ((ns1 & 1) != 0)
@@ -911,91 +1223,133 @@ static void UpdateModel(CPpmd8 *p)
void *oldPtr;
if (!ptr)
{
- RESTORE_MODEL(c, CTX(fSuccessor));
+ RESTORE_MODEL(c, CTX(minSuccessor));
return;
}
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
- c->Stats = STATS_REF(ptr);
+ c->Union4.Stats = STATS_REF(ptr);
}
}
- c->SummFreq = (UInt16)(c->SummFreq + (3 * ns1 + 1 < ns));
+ sum = c->Union2.SummFreq;
+ /* max increase of Escape_Freq is 1 here.
+ an average increase is 1/3 per symbol */
+ sum += (3 * ns1 + 1 < ns);
+ /* original PPMdH uses 16-bit variable for (sum) here.
+ But (sum < ???). Do we need to truncate (sum) to 16-bit */
+ // sum = (UInt16)sum;
}
else
{
- CPpmd_State *s2 = (CPpmd_State*)AllocUnits(p, 0);
- if (!s2)
+
+ CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
+ if (!s)
{
- RESTORE_MODEL(c, CTX(fSuccessor));
+ RESTORE_MODEL(c, CTX(minSuccessor));
return;
}
- *s2 = *ONE_STATE(c);
- c->Stats = REF(s2);
- if (s2->Freq < MAX_FREQ / 4 - 1)
- s2->Freq <<= 1;
- else
- s2->Freq = MAX_FREQ - 4;
- c->SummFreq = (UInt16)(s2->Freq + p->InitEsc + (ns > 2));
- }
- cf = 2 * fFreq * (c->SummFreq + 6);
- sf = (UInt32)s0 + c->SummFreq;
- if (cf < 6 * sf)
- {
- cf = 1 + (cf > sf) + (cf >= 4 * sf);
- c->SummFreq += 4;
- }
- else
- {
- cf = 4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf);
- c->SummFreq = (UInt16)(c->SummFreq + cf);
+ {
+ unsigned freq = c->Union2.State2.Freq;
+ // s = *ONE_STATE(c);
+ s->Symbol = c->Union2.State2.Symbol;
+ s->Successor_0 = c->Union4.State4.Successor_0;
+ s->Successor_1 = c->Union4.State4.Successor_1;
+ // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
+ // (Successor_0 and Successor_1) in LE/BE.
+ c->Union4.Stats = REF(s);
+ if (freq < MAX_FREQ / 4 - 1)
+ freq <<= 1;
+ else
+ freq = MAX_FREQ - 4;
+
+ s->Freq = (Byte)freq;
+
+ sum = freq + p->InitEsc + (ns > 2); // Ppmd8 (> 2)
+ }
}
+
{
- CPpmd_State *s2 = STATS(c) + ns1 + 1;
- SetSuccessor(s2, successor);
- s2->Symbol = fSymbol;
- s2->Freq = (Byte)cf;
- c->Flags |= flag;
+ CPpmd_State *s = STATS(c) + ns1 + 1;
+ UInt32 cf = 2 * (sum + 6) * (UInt32)fFreq;
+ UInt32 sf = (UInt32)s0 + sum;
+ s->Symbol = fSymbol;
c->NumStats = (Byte)(ns1 + 1);
+ SetSuccessor(s, maxSuccessor);
+ c->Flags |= flag;
+ if (cf < 6 * sf)
+ {
+ cf = (unsigned)1 + (cf > sf) + (cf >= 4 * sf);
+ sum += 4;
+ /* It can add (1, 2, 3) to Escape_Freq */
+ }
+ else
+ {
+ cf = (unsigned)4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf);
+ sum += cf;
+ }
+
+ c->Union2.SummFreq = (UInt16)sum;
+ s->Freq = (Byte)cf;
}
+
}
- p->MaxContext = p->MinContext = CTX(fSuccessor);
+ p->MaxContext = p->MinContext = CTX(minSuccessor);
}
+
+
+MY_NO_INLINE
static void Rescale(CPpmd8 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
+
+ /* Sort the list by Freq */
+ if (s != stats)
{
CPpmd_State tmp = *s;
- for (; s != stats; s--)
+ do
s[0] = s[-1];
+ while (--s != stats);
*s = tmp;
}
- escFreq = p->MinContext->SummFreq - s->Freq;
- s->Freq += 4;
- adder = (p->OrderFall != 0
- #ifdef PPMD8_FREEZE_SUPPORT
- || p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE
- #endif
- );
- s->Freq = (Byte)((s->Freq + adder) >> 1);
+
sumFreq = s->Freq;
+ escFreq = p->MinContext->Union2.SummFreq - sumFreq;
+
+
+
+
+
+ adder = (p->OrderFall != 0);
+
+ #ifdef PPMD8_FREEZE_SUPPORT
+ adder |= (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE);
+ #endif
+
+ sumFreq = (sumFreq + 4 + adder) >> 1;
i = p->MinContext->NumStats;
+ s->Freq = (Byte)sumFreq;
+
do
{
- escFreq -= (++s)->Freq;
- s->Freq = (Byte)((s->Freq + adder) >> 1);
- sumFreq += s->Freq;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + adder) >> 1;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
+ CPpmd_State tmp = *s;
CPpmd_State *s1 = s;
- CPpmd_State tmp = *s1;
do
+ {
s1[0] = s1[-1];
- while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+ }
+ while (--s1 != stats && freq > s1[-1].Freq);
*s1 = tmp;
}
}
@@ -1003,49 +1357,89 @@ static void Rescale(CPpmd8 *p)
if (s->Freq == 0)
{
- unsigned numStats = p->MinContext->NumStats;
- unsigned n0, n1;
- do { i++; } while ((--s)->Freq == 0);
+ /* Remove all items with Freq == 0 */
+ CPpmd8_Context *mc;
+ unsigned numStats, numStatsNew, n0, n1;
+
+ i = 0; do { i++; } while ((--s)->Freq == 0);
+
+
+
+
escFreq += i;
- p->MinContext->NumStats = (Byte)(p->MinContext->NumStats - i);
- if (p->MinContext->NumStats == 0)
+ mc = p->MinContext;
+ numStats = mc->NumStats;
+ numStatsNew = numStats - i;
+ mc->NumStats = (Byte)(numStatsNew);
+ n0 = (numStats + 2) >> 1;
+
+ if (numStatsNew == 0)
{
- CPpmd_State tmp = *stats;
- tmp.Freq = (Byte)((2 * tmp.Freq + escFreq - 1) / escFreq);
- if (tmp.Freq > MAX_FREQ / 3)
- tmp.Freq = MAX_FREQ / 3;
- InsertNode(p, stats, U2I((numStats + 2) >> 1));
- p->MinContext->Flags = (Byte)((p->MinContext->Flags & 0x10) + 0x08 * (tmp.Symbol >= 0x40));
- *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+
+ unsigned freq = (2 * (unsigned)stats->Freq + escFreq - 1) / escFreq;
+ if (freq > MAX_FREQ / 3)
+ freq = MAX_FREQ / 3;
+ mc->Flags = (Byte)((mc->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(stats->Symbol));
+
+
+
+
+
+ s = ONE_STATE(mc);
+ *s = *stats;
+ s->Freq = (Byte)freq;
+ p->FoundState = s;
+ InsertNode(p, stats, U2I(n0));
return;
}
- n0 = (numStats + 2) >> 1;
- n1 = (p->MinContext->NumStats + 2) >> 1;
+
+ n1 = (numStatsNew + 2) >> 1;
if (n0 != n1)
- p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
- p->MinContext->Flags &= ~0x08;
- p->MinContext->Flags |= 0x08 * ((s = STATS(p->MinContext))->Symbol >= 0x40);
- i = p->MinContext->NumStats;
- do { p->MinContext->Flags |= 0x08*((++s)->Symbol >= 0x40); } while (--i);
+ mc->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ {
+ // here we are for max order only. So Ppmd8_MakeEscFreq() doesn't use mc->Flags
+ // but we still need current (Flags & FLAG_PREV_HIGH), if we will convert context to 1-symbol context later.
+ /*
+ unsigned flags = HiBits_Prepare((s = STATS(mc))->Symbol);
+ i = mc->NumStats;
+ do { flags |= HiBits_Prepare((++s)->Symbol); } while (--i);
+ mc->Flags = (Byte)((mc->Flags & ~FLAG_SYM_HIGH) + HiBits_Convert_3(flags));
+ */
+ }
+ }
+
+
+
+
+
+
+ {
+ CPpmd8_Context *mc = p->MinContext;
+ mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+ mc->Flags |= FLAG_RESCALED;
+ p->FoundState = STATS(mc);
}
- p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
- p->MinContext->Flags |= 0x4;
- p->FoundState = STATS(p->MinContext);
}
+
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
{
CPpmd_See *see;
- if (p->MinContext->NumStats != 0xFF)
+ const CPpmd8_Context *mc = p->MinContext;
+ unsigned numStats = mc->NumStats;
+ if (numStats != 0xFF)
{
- see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)(unsigned)p->MinContext->NumStats + 2] - 3] +
- (p->MinContext->SummFreq > 11 * ((unsigned)p->MinContext->NumStats + 1)) +
- 2 * (unsigned)(2 * (unsigned)p->MinContext->NumStats <
- ((unsigned)SUFFIX(p->MinContext)->NumStats + numMasked1)) +
- p->MinContext->Flags;
+ // (3 <= numStats + 2 <= 256) (3 <= NS2Indx[3] and NS2Indx[256] === 26)
+ see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)numStats + 2] - 3]
+ + (mc->Union2.SummFreq > 11 * (numStats + 1))
+ + 2 * (unsigned)(2 * numStats < ((unsigned)SUFFIX(mc)->NumStats + numMasked1))
+ + mc->Flags;
+
{
- unsigned r = (see->Summ >> see->Shift);
- see->Summ = (UInt16)(see->Summ - r);
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
}
}
@@ -1057,67 +1451,87 @@ CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
return see;
}
+
static void NextContext(CPpmd8 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
- if (p->OrderFall == 0 && (Byte *)c >= p->UnitsStart)
- p->MinContext = p->MaxContext = c;
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
else
- {
- UpdateModel(p);
- p->MinContext = p->MaxContext;
- }
+ Ppmd8_UpdateModel(p);
}
+
void Ppmd8_Update1(CPpmd8 *p)
{
CPpmd_State *s = p->FoundState;
- s->Freq += 4;
- p->MinContext->SummFreq += 4;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = s->Freq;
+ freq += 4;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s, &s[-1]);
p->FoundState = --s;
- if (s->Freq > MAX_FREQ)
+ if (freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
+
void Ppmd8_Update1_0(CPpmd8 *p)
{
- p->PrevSuccess = (2 * p->FoundState->Freq >= p->MinContext->SummFreq);
- p->RunLength += p->PrevSuccess;
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
+ CPpmd_State *s = p->FoundState;
+ CPpmd8_Context *mc = p->MinContext;
+ unsigned freq = s->Freq;
+ unsigned summFreq = mc->Union2.SummFreq;
+ p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=)
+ p->RunLength += (int)p->PrevSuccess;
+ mc->Union2.SummFreq = (UInt16)(summFreq + 4);
+ freq += 4;
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
Rescale(p);
NextContext(p);
}
+
+/*
void Ppmd8_UpdateBin(CPpmd8 *p)
{
- p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 196));
+ unsigned freq = p->FoundState->Freq;
+ p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
+*/
void Ppmd8_Update2(CPpmd8 *p)
{
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
- Rescale(p);
+ CPpmd_State *s = p->FoundState;
+ unsigned freq = s->Freq;
+ freq += 4;
p->RunLength = p->InitRL;
- UpdateModel(p);
- p->MinContext = p->MaxContext;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
+ Rescale(p);
+ Ppmd8_UpdateModel(p);
}
/* H->I changes:
NS2Indx
- GlewCount, and Glue method
+ GlueCount, and Glue method
BinSum
See / EscFreq
CreateSuccessors updates more suffix contexts
- UpdateModel consts.
+ Ppmd8_UpdateModel consts.
PrevSuccess Update
+
+Flags:
+ (1 << 2) - the Context was Rescaled
+ (1 << 3) - there is symbol in Stats with (sym >= 0x40) in
+ (1 << 4) - main symbol of context is (sym >= 0x40)
*/
diff --git a/multiarc/src/formats/7z/C/Ppmd8.h b/multiarc/src/formats/7z/C/Ppmd8.h
index 51c497dc..fe93fe7c 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8.h
+++ b/multiarc/src/formats/7z/C/Ppmd8.h
@@ -1,5 +1,5 @@
-/* Ppmd8.h -- PPMdI codec
-2018-07-04 : Igor Pavlov : Public domain
+/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -14,35 +14,45 @@ EXTERN_C_BEGIN
#define PPMD8_MIN_ORDER 2
#define PPMD8_MAX_ORDER 16
+
+
+
struct CPpmd8_Context_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd8_Context_ *
- #else
- UInt32
- #endif
- CPpmd8_Context_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref;
-#pragma pack(push, 1)
+// MY_CPU_pragma_pack_push_1
typedef struct CPpmd8_Context_
{
Byte NumStats;
Byte Flags;
- UInt16 SummFreq;
- CPpmd_State_Ref Stats;
+
+ union
+ {
+ UInt16 SummFreq;
+ CPpmd_State2 State2;
+ } Union2;
+
+ union
+ {
+ CPpmd_State_Ref Stats;
+ CPpmd_State4 State4;
+ } Union4;
+
CPpmd8_Context_Ref Suffix;
} CPpmd8_Context;
-#pragma pack(pop)
+// MY_CPU_pragma_pop
-#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
-/* The BUG in Shkarin's code for FREEZE mode was fixed, but that fixed
- code is not compatible with original code for some files compressed
+/* PPMdI code rev.2 contains the fix over PPMdI code rev.1.
+ But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed
in FREEZE mode. So we disable FREEZE mode support. */
+// #define PPMD8_FREEZE_SUPPORT
+
enum
{
PPMD8_RESTORE_METHOD_RESTART,
@@ -50,22 +60,28 @@ enum
#ifdef PPMD8_FREEZE_SUPPORT
, PPMD8_RESTORE_METHOD_FREEZE
#endif
+ , PPMD8_RESTORE_METHOD_UNSUPPPORTED
};
+
+
+
+
+
+
+
typedef struct
{
CPpmd8_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
- unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder;
+ unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
- Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
- unsigned RestoreMethod;
+ Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
- /* Range Coder */
UInt32 Range;
UInt32 Code;
UInt32 Low;
@@ -75,16 +91,18 @@ typedef struct
IByteOut *Out;
} Stream;
- Byte Indx2Units[PPMD_NUM_INDEXES];
+ Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
UInt32 Stamps[PPMD_NUM_INDEXES];
-
Byte NS2BSIndx[256], NS2Indx[260];
+ Byte ExpEscape[16];
CPpmd_See DummySee, See[24][32];
UInt16 BinSumm[25][64];
+
} CPpmd8;
+
void Ppmd8_Construct(CPpmd8 *p);
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc);
@@ -94,43 +112,69 @@ void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod);
/* ---------- Internal Functions ---------- */
-extern const Byte PPMD8_kExpEscape[16];
-
-#ifdef PPMD_32BIT
- #define Ppmd8_GetPtr(p, ptr) (ptr)
- #define Ppmd8_GetContext(p, ptr) (ptr)
- #define Ppmd8_GetStats(p, ctx) ((ctx)->Stats)
-#else
- #define Ppmd8_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
- #define Ppmd8_GetContext(p, offs) ((CPpmd8_Context *)Ppmd8_GetPtr((p), (offs)))
- #define Ppmd8_GetStats(p, ctx) ((CPpmd_State *)Ppmd8_GetPtr((p), ((ctx)->Stats)))
-#endif
+#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
+#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context)
+#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd8_Update1(CPpmd8 *p);
void Ppmd8_Update1_0(CPpmd8 *p);
void Ppmd8_Update2(CPpmd8 *p);
-void Ppmd8_UpdateBin(CPpmd8 *p);
+
+
+
+
+
#define Ppmd8_GetBinSumm(p) \
- &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]][ \
- p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
- p->PrevSuccess + p->MinContext->Flags + ((p->RunLength >> 26) & 0x20)]
+ &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \
+ [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
+ + p->MinContext->Flags ]
+
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale);
+/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases,
+ where the original PPMdI code can give "Divide by Zero" operation.
+ We use the following fix to allow correct working of encoder and decoder in any cases.
+ We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */
+#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range;
+
+
/* ---------- Decode ---------- */
-BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p);
+#define PPMD8_SYM_END (-1)
+#define PPMD8_SYM_ERROR (-2)
+
+/*
+You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init()
+
+Ppmd8_DecodeSymbol()
+out:
+ >= 0 : decoded byte
+ -1 : PPMD8_SYM_END : End of payload marker
+ -2 : PPMD8_SYM_ERROR : Data error
+*/
+
+
+BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p);
#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
-int Ppmd8_DecodeSymbol(CPpmd8 *p); /* returns: -1 as EndMarker, -2 as DataError */
+int Ppmd8_DecodeSymbol(CPpmd8 *p);
+
+
+
+
+
+
/* ---------- Encode ---------- */
-#define Ppmd8_RangeEnc_Init(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
-void Ppmd8_RangeEnc_FlushData(CPpmd8 *p);
-void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); /* symbol = -1 means EndMarker */
+#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
+void Ppmd8_Flush_RangeEnc(CPpmd8 *p);
+void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol);
+
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Ppmd8Dec.c b/multiarc/src/formats/7z/C/Ppmd8Dec.c
index a18ec677..d205de28 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8Dec.c
+++ b/multiarc/src/formats/7z/C/Ppmd8Dec.c
@@ -1,5 +1,5 @@
-/* Ppmd8Dec.c -- PPMdI Decoder
-2018-07-04 : Igor Pavlov : Public domain
+/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,147 +11,269 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
-BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p)
+#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
+
+BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
{
unsigned i;
- p->Low = 0;
- p->Range = 0xFFFFFFFF;
p->Code = 0;
+ p->Range = 0xFFFFFFFF;
+ p->Low = 0;
+
for (i = 0; i < 4; i++)
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
+ p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
-static UInt32 RangeDec_GetThreshold(CPpmd8 *p, UInt32 total)
-{
- return p->Code / (p->Range /= total);
-}
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
+ p->Code = (p->Code << 8) | READ_BYTE(p); \
+ p->Range <<= 8; p->Low <<= 8; }
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+#define R p
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
{
- start *= p->Range;
- p->Low += start;
- p->Code -= start;
- p->Range *= size;
-
- while ((p->Low ^ (p->Low + p->Range)) < kTop ||
- (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
- p->Range <<= 8;
- p->Low <<= 8;
- }
+ start *= R->Range;
+ R->Low += start;
+ R->Code -= start;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
}
-#define MASK(sym) ((signed char *)charMask)[sym]
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
+
+#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
+typedef CPpmd8_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd8_UpdateModel(CPpmd8 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
int Ppmd8_DecodeSymbol(CPpmd8 *p)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
- if ((count = RangeDec_GetThreshold(p, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ PPMD8_CORRECT_SUM_RANGE(p, summFreq)
+
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
{
- Byte symbol;
- RangeDec_Decode(p, 0, s->Freq);
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update1_0(p);
- return symbol;
+ return sym;
}
+
p->PrevSuccess = 0;
i = p->MinContext->NumStats;
+
do
{
- if ((hiCnt += (++s)->Freq) > count)
+ if ((Int32)(count -= (++s)->Freq) < 0)
{
- Byte symbol;
- RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update1(p);
- return symbol;
+ return sym;
}
}
while (--i);
- if (count >= p->MinContext->SummFreq)
- return -2;
- RangeDec_Decode(p, hiCnt, p->MinContext->SummFreq - hiCnt);
+
+ if (hiCnt >= summFreq)
+ return PPMD8_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats;
- do { MASK((--s)->Symbol) = 0; } while (--i);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
+ CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt16 *prob = Ppmd8_GetBinSumm(p);
- if (((p->Code / (p->Range >>= 14)) < *prob))
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
{
- Byte symbol;
- RangeDec_Decode(p, 0, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- symbol = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
- Ppmd8_UpdateBin(p);
- return symbol;
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM(R)
+
+
+
+ // sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
+ // Ppmd8_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 196));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd8_UpdateModel(p);
+ }
+ return sym;
}
- RangeDec_Decode(p, *prob, (1 << 14) - *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(rc2, size0);
+ R->Low += size0;
+ R->Code -= size0;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
+ RC_NORM_LOCAL(R)
+
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
+
for (;;)
{
- CPpmd_State *ps[256], *s;
+ CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
+ UInt32 freqSum2;
CPpmd_See *see;
- unsigned i, num, numMasked = p->MinContext->NumStats;
+ CPpmd8_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
- return -1;
- p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
+ if (!mc->Suffix)
+ return PPMD8_SYM_END;
+ mc = Ppmd8_GetContext(p, mc->Suffix);
}
- while (p->MinContext->NumStats == numMasked);
- hiCnt = 0;
- s = Ppmd8_GetStats(p, p->MinContext);
- i = 0;
- num = p->MinContext->NumStats - numMasked;
- do
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd8_GetStats(p, mc);
+
{
- int k = (int)(MASK(s->Symbol));
- hiCnt += (s->Freq & k);
- ps[i] = s++;
- i -= k;
+ unsigned num = (unsigned)mc->NumStats + 1;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
}
- while (i != num);
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
- count = RangeDec_GetThreshold(p, freqSum);
+ freqSum2 = freqSum;
+ PPMD8_CORRECT_SUM_RANGE(R, freqSum2);
+
+
+ count = RC_GetThreshold(freqSum2);
if (count < hiCnt)
{
- Byte symbol;
- CPpmd_State **pps = ps;
- for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
- s = *pps;
- RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ // Ppmd_See_Update(see); // new (see->Summ) value can overflow over 16-bits in some rare cases
+ s = Ppmd8_GetStats(p, p->MinContext);
+ hiCnt = count;
+
+
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ }
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update2(p);
- return symbol;
+ return sym;
}
- if (count >= freqSum)
- return -2;
- RangeDec_Decode(p, hiCnt, freqSum - hiCnt);
+
+ if (count >= freqSum2)
+ return PPMD8_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum2 - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
- do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+
+ s = Ppmd8_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats + 1;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
}
}
diff --git a/multiarc/src/formats/7z/C/Ppmd8Enc.c b/multiarc/src/formats/7z/C/Ppmd8Enc.c
index 1cbc17f1..32ff8052 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8Enc.c
+++ b/multiarc/src/formats/7z/C/Ppmd8Enc.c
@@ -1,5 +1,5 @@
-/* Ppmd8Enc.c -- PPMdI Encoder
-2017-04-03 : Igor Pavlov : Public domain
+/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,59 +11,100 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
-void Ppmd8_RangeEnc_FlushData(CPpmd8 *p)
+#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
+
+void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
{
unsigned i;
for (i = 0; i < 4; i++, p->Low <<= 8 )
- IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
+ WRITE_BYTE(p);
}
-static void RangeEnc_Normalize(CPpmd8 *p)
-{
- while ((p->Low ^ (p->Low + p->Range)) < kTop ||
- (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
- {
- IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
- p->Range <<= 8;
- p->Low <<= 8;
- }
-}
+
+
+
+
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \
+ { WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; }
+
+
+
+
+
+
+
+
+
+
+
+
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+// #define RC_PRE(total) p->Range /= total;
+// #define RC_PRE(total)
+
+#define R p
+
+
+
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
{
- p->Low += start * (p->Range /= total);
- p->Range *= size;
- RangeEnc_Normalize(p);
+ R->Low += start * (R->Range /= total);
+ R->Range *= size;
+ RC_NORM_LOCAL(R);
}
-static void RangeEnc_EncodeBit_0(CPpmd8 *p, UInt32 size0)
-{
- p->Range >>= 14;
- p->Range *= size0;
- RangeEnc_Normalize(p);
-}
-static void RangeEnc_EncodeBit_1(CPpmd8 *p, UInt32 size0)
-{
- p->Low += size0 * (p->Range >>= 14);
- p->Range *= ((1 << 14) - size0);
- RangeEnc_Normalize(p);
-}
-#define MASK(sym) ((signed char *)charMask)[sym]
+
+
+
+
+
+#define RC_Encode(start, size, total) RangeEnc_Encode(p, start, size, total);
+#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total); RC_NORM_REMOTE(p);
+
+#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
+
+typedef CPpmd8_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+
+void Ppmd8_UpdateModel(CPpmd8 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+// MY_FORCE_INLINE
+// static
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ PPMD8_CORRECT_SUM_RANGE(p, summFreq)
+
+ // RC_PRE(summFreq);
+
if (s->Symbol == symbol)
{
- RangeEnc_Encode(p, 0, s->Freq, p->MinContext->SummFreq);
+
+ RC_EncodeFinal(0, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1_0(p);
return;
@@ -75,7 +116,8 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
if ((++s)->Symbol == symbol)
{
- RangeEnc_Encode(p, sum, s->Freq, p->MinContext->SummFreq);
+
+ RC_EncodeFinal(sum, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1(p);
return;
@@ -84,80 +126,189 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
}
while (--i);
+
+ RC_Encode(sum, summFreq - sum, summFreq);
+
+
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats;
- do { MASK((--s)->Symbol) = 0; } while (--i);
- RangeEnc_Encode(p, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
+ // MASK(s->Symbol) = 0;
+ // i = p->MinContext->NumStats;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
UInt16 *prob = Ppmd8_GetBinSumm(p);
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
+ UInt32 pr = *prob;
+ UInt32 bound = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
- RangeEnc_EncodeBit_0(p, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- p->FoundState = s;
- Ppmd8_UpdateBin(p);
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+ // RangeEnc_EncodeBit_0(p, bound);
+ R->Range = bound;
+ RC_NORM(R);
+
+ // p->FoundState = s;
+ // Ppmd8_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd8_UpdateModel(p);
+ }
return;
}
- else
- {
- RangeEnc_EncodeBit_1(p, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
- PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- p->PrevSuccess = 0;
- }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+ // RangeEnc_EncodeBit_1(p, bound);
+ R->Low += bound;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
+ RC_NORM_LOCAL(R)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(s->Symbol) = 0;
+ p->PrevSuccess = 0;
}
+
for (;;)
{
- UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
- UInt32 sum;
- unsigned i, numMasked = p->MinContext->NumStats;
+ UInt32 sum, escFreq;
+ CPpmd8_Context *mc;
+ unsigned i, numMasked;
+
+ RC_NORM_REMOTE(p)
+
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
+ if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
- p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
+ mc = Ppmd8_GetContext(p, mc->Suffix);
+
}
- while (p->MinContext->NumStats == numMasked);
+ while (mc->NumStats == numMasked);
+ p->MinContext = mc;
+
see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
s = Ppmd8_GetStats(p, p->MinContext);
sum = 0;
- i = p->MinContext->NumStats + 1;
+ i = (unsigned)p->MinContext->NumStats + 1;
+
do
{
- int cur = s->Symbol;
- if (cur == symbol)
+ unsigned cur = s->Symbol;
+ if ((int)cur == symbol)
{
UInt32 low = sum;
- CPpmd_State *s1 = s;
- do
+ UInt32 freq = s->Freq;
+ unsigned num2;
+
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sum += escFreq;
+
+ num2 = i / 2;
+ i &= 1;
+ sum += freq & (0 - (UInt32)i);
+ if (num2 != 0)
{
- sum += (s->Freq & (int)(MASK(s->Symbol)));
- s++;
+ s += i;
+ for (;;)
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ if (--num2 == 0)
+ break;
+ }
}
- while (--i);
- RangeEnc_Encode(p, low, s1->Freq, sum + escFreq);
- Ppmd_See_Update(see);
- p->FoundState = s1;
+
+ PPMD8_CORRECT_SUM_RANGE(p, sum);
+
+ RC_EncodeFinal(low, freq, sum);
Ppmd8_Update2(p);
return;
}
- sum += (s->Freq & (int)(MASK(cur)));
- MASK(cur) = 0;
+ sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
- RangeEnc_Encode(p, sum, escFreq, sum + escFreq);
- see->Summ = (UInt16)(see->Summ + sum + escFreq);
+ {
+ UInt32 total = sum + escFreq;
+ see->Summ = (UInt16)(see->Summ + total);
+ PPMD8_CORRECT_SUM_RANGE(p, total);
+
+ RC_Encode(sum, total - sum, total);
+ }
+
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ s--;
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
}
diff --git a/multiarc/src/formats/7z/C/Precomp.h b/multiarc/src/formats/7z/C/Precomp.h
index e8ff8b40..e8ff8b40 100644..100755
--- a/multiarc/src/formats/7z/C/Precomp.h
+++ b/multiarc/src/formats/7z/C/Precomp.h
diff --git a/multiarc/src/formats/7z/C/RotateDefs.h b/multiarc/src/formats/7z/C/RotateDefs.h
index 8f01d1a6..8f01d1a6 100644..100755
--- a/multiarc/src/formats/7z/C/RotateDefs.h
+++ b/multiarc/src/formats/7z/C/RotateDefs.h
diff --git a/multiarc/src/formats/7z/C/Sha1.c b/multiarc/src/formats/7z/C/Sha1.c
index 96b5e787..9665b5b5 100644..100755
--- a/multiarc/src/formats/7z/C/Sha1.c
+++ b/multiarc/src/formats/7z/C/Sha1.c
@@ -1,5 +1,5 @@
/* Sha1.c -- SHA-1 Hash
-2017-04-03 : Igor Pavlov : Public domain
+2021-07-13 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,331 +10,464 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l
#include "RotateDefs.h"
#include "Sha1.h"
-// define it for speed optimization
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// #define USE_MY_MM
+#endif
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1200
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#elif defined(MY_CPU_ARM_OR_ARM64)
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#endif
+
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+#ifdef _SHA_SUPPORTED
+ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+ static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
+ static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
+
+ #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
+#else
+ #define UPDATE_BLOCKS(p) Sha1_UpdateBlocks
+#endif
+
+
+BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
+{
+ SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
+
+ #ifdef _SHA_SUPPORTED
+ if (algo != SHA1_ALGO_SW)
+ {
+ if (algo == SHA1_ALGO_DEFAULT)
+ func = g_FUNC_UPDATE_BLOCKS;
+ else
+ {
+ if (algo != SHA1_ALGO_HW)
+ return False;
+ func = g_FUNC_UPDATE_BLOCKS_HW;
+ if (!func)
+ return False;
+ }
+ }
+ #else
+ if (algo > 1)
+ return False;
+ #endif
+
+ p->func_UpdateBlocks = func;
+ return True;
+}
+
+
+/* define it for speed optimization */
// #define _SHA1_UNROLL
+// allowed unroll steps: (1, 2, 4, 5, 20)
+
#ifdef _SHA1_UNROLL
- #define kNumW 16
- #define WW(i) W[(i)&15]
+ #define STEP_PRE 20
+ #define STEP_MAIN 20
#else
+ #define _SHA1_BIG_W
+ #define STEP_PRE 5
+ #define STEP_MAIN 5
+#endif
+
+
+#ifdef _SHA1_BIG_W
#define kNumW 80
- #define WW(i) W[i]
+ #define w(i) W[i]
+#else
+ #define kNumW 16
+ #define w(i) W[(i)&15]
#endif
-#define w0(i) (W[i] = data[i])
+#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
+#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))
-#define w1(i) (WW(i) = rotlFixed(WW((i)-3) ^ WW((i)-8) ^ WW((i)-14) ^ WW((i)-16), 1))
+#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) )
+#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) )
+#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
+#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) )
-#define f1(x,y,z) (z^(x&(y^z)))
-#define f2(x,y,z) (x^y^z)
-#define f3(x,y,z) ((x&y)|(z&(x|y)))
-#define f4(x,y,z) (x^y^z)
+/*
+#define T1(fx, ww) \
+ tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \
+ e = d; \
+ d = c; \
+ c = rotlFixed(b, 30); \
+ b = a; \
+ a = tmp; \
+*/
-#define RK(a,b,c,d,e, fx, w, k) e += fx(b,c,d) + w + k + rotlFixed(a,5); b = rotlFixed(b,30);
+#define T5(a,b,c,d,e, fx, ww) \
+ e += fx(b,c,d) + ww + rotlFixed(a, 5); \
+ b = rotlFixed(b, 30); \
-#define R0(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w0(i), 0x5A827999)
-#define R1(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w1(i), 0x5A827999)
-#define R2(a,b,c,d,e, i) RK(a,b,c,d,e, f2, w1(i), 0x6ED9EBA1)
-#define R3(a,b,c,d,e, i) RK(a,b,c,d,e, f3, w1(i), 0x8F1BBCDC)
-#define R4(a,b,c,d,e, i) RK(a,b,c,d,e, f4, w1(i), 0xCA62C1D6)
-#define RX_1_4(rx1, rx4, i) \
- rx1(a,b,c,d,e, i); \
- rx4(e,a,b,c,d, i+1); \
- rx4(d,e,a,b,c, i+2); \
- rx4(c,d,e,a,b, i+3); \
- rx4(b,c,d,e,a, i+4); \
+/*
+#define R1(i, fx, wx) \
+ T1 ( fx, wx(i)); \
-#define RX_5(rx, i) RX_1_4(rx, rx, i);
+#define R2(i, fx, wx) \
+ R1 ( (i) , fx, wx); \
+ R1 ( (i) + 1, fx, wx); \
-#ifdef _SHA1_UNROLL
+#define R4(i, fx, wx) \
+ R2 ( (i) , fx, wx); \
+ R2 ( (i) + 2, fx, wx); \
+*/
+
+#define M5(i, fx, wx0, wx1) \
+ T5 ( a,b,c,d,e, fx, wx0((i) ) ); \
+ T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
+ T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
+ T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
+ T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \
- #define RX_15 \
- RX_5(R0, 0); \
- RX_5(R0, 5); \
- RX_5(R0, 10);
+#define R5(i, fx, wx) \
+ M5 ( i, fx, wx, wx) \
+
+
+#if STEP_PRE > 5
+
+ #define R20_START \
+ R5 ( 0, f0, w0); \
+ R5 ( 5, f0, w0); \
+ R5 ( 10, f0, w0); \
+ M5 ( 15, f0, w0, w1); \
+
+ #elif STEP_PRE == 5
- #define RX_20(rx, i) \
- RX_5(rx, i); \
- RX_5(rx, i + 5); \
- RX_5(rx, i + 10); \
- RX_5(rx, i + 15);
+ #define R20_START \
+ { size_t i; for (i = 0; i < 15; i += STEP_PRE) \
+ { R5(i, f0, w0); } } \
+ M5 ( 15, f0, w0, w1); \
#else
-
-#define RX_15 { size_t i; for (i = 0; i < 15; i += 5) { RX_5(R0, i); } }
-#define RX_20(rx, ii) { size_t i; i = ii; for (; i < ii + 20; i += 5) { RX_5(rx, i); } }
+
+ #if STEP_PRE == 1
+ #define R_PRE R1
+ #elif STEP_PRE == 2
+ #define R_PRE R2
+ #elif STEP_PRE == 4
+ #define R_PRE R4
+ #endif
+
+ #define R20_START \
+ { size_t i; for (i = 0; i < 16; i += STEP_PRE) \
+ { R_PRE(i, f0, w0); } } \
+ R4 ( 16, f0, w1); \
#endif
-void Sha1_Init(CSha1 *p)
+
+#if STEP_MAIN > 5
+
+ #define R20(ii, fx) \
+ R5 ( (ii) , fx, w1); \
+ R5 ( (ii) + 5 , fx, w1); \
+ R5 ( (ii) + 10, fx, w1); \
+ R5 ( (ii) + 15, fx, w1); \
+
+#else
+
+ #if STEP_MAIN == 1
+ #define R_MAIN R1
+ #elif STEP_MAIN == 2
+ #define R_MAIN R2
+ #elif STEP_MAIN == 4
+ #define R_MAIN R4
+ #elif STEP_MAIN == 5
+ #define R_MAIN R5
+ #endif
+
+ #define R20(ii, fx) \
+ { size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
+ { R_MAIN(i, fx, w1); } } \
+
+#endif
+
+
+
+void Sha1_InitState(CSha1 *p)
{
+ p->count = 0;
p->state[0] = 0x67452301;
p->state[1] = 0xEFCDAB89;
p->state[2] = 0x98BADCFE;
p->state[3] = 0x10325476;
p->state[4] = 0xC3D2E1F0;
- p->count = 0;
}
-void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest)
+void Sha1_Init(CSha1 *p)
{
- UInt32 a, b, c, d, e;
- UInt32 W[kNumW];
-
- a = p->state[0];
- b = p->state[1];
- c = p->state[2];
- d = p->state[3];
- e = p->state[4];
-
- RX_15
-
- RX_1_4(R0, R1, 15);
-
- RX_20(R2, 20);
- RX_20(R3, 40);
- RX_20(R4, 60);
-
- destDigest[0] = p->state[0] + a;
- destDigest[1] = p->state[1] + b;
- destDigest[2] = p->state[2] + c;
- destDigest[3] = p->state[3] + d;
- destDigest[4] = p->state[4] + e;
+ p->func_UpdateBlocks =
+ #ifdef _SHA_SUPPORTED
+ g_FUNC_UPDATE_BLOCKS;
+ #else
+ NULL;
+ #endif
+ Sha1_InitState(p);
}
-void Sha1_UpdateBlock_Rar(CSha1 *p, UInt32 *data, int returnRes)
+
+MY_NO_INLINE
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
+ // if (numBlocks != 0x1264378347) return;
+ if (numBlocks == 0)
+ return;
- a = p->state[0];
- b = p->state[1];
- c = p->state[2];
- d = p->state[3];
- e = p->state[4];
-
- RX_15
-
- RX_1_4(R0, R1, 15);
-
- RX_20(R2, 20);
- RX_20(R3, 40);
- RX_20(R4, 60);
-
- p->state[0] += a;
- p->state[1] += b;
- p->state[2] += c;
- p->state[3] += d;
- p->state[4] += e;
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
- if (returnRes)
+ do
{
- size_t i;
- for (i = 0 ; i < SHA1_NUM_BLOCK_WORDS; i++)
- data[i] = W[kNumW - SHA1_NUM_BLOCK_WORDS + i];
+ #if STEP_PRE < 5 || STEP_MAIN < 5
+ UInt32 tmp;
+ #endif
+
+ R20_START
+ R20(20, f1);
+ R20(40, f2);
+ R20(60, f3);
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+
+ data += 64;
}
+ while (--numBlocks);
}
-#define Sha1_UpdateBlock(p) Sha1_GetBlockDigest(p, p->buffer, p->state)
+
+#define Sha1_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
- unsigned pos, pos2;
if (size == 0)
return;
- pos = (unsigned)p->count & 0x3F;
- p->count += size;
- pos2 = pos & 3;
- pos >>= 2;
-
- if (pos2 != 0)
- {
- UInt32 w;
- pos2 = (3 - pos2) * 8;
- w = ((UInt32)*data++) << pos2;
- if (--size && pos2)
- {
- pos2 -= 8;
- w |= ((UInt32)*data++) << pos2;
- if (--size && pos2)
- {
- pos2 -= 8;
- w |= ((UInt32)*data++) << pos2;
- size--;
- }
- }
- p->buffer[pos] |= w;
- if (pos2 == 0)
- pos++;
- }
- for (;;)
{
- if (pos == SHA1_NUM_BLOCK_WORDS)
+ unsigned pos = (unsigned)p->count & 0x3F;
+ unsigned num;
+
+ p->count += size;
+
+ num = 64 - pos;
+ if (num > size)
{
- for (;;)
- {
- size_t i;
- Sha1_UpdateBlock(p);
- if (size < SHA1_BLOCK_SIZE)
- break;
- size -= SHA1_BLOCK_SIZE;
- for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i += 2)
- {
- p->buffer[i ] = GetBe32(data);
- p->buffer[i + 1] = GetBe32(data + 4);
- data += 8;
- }
- }
- pos = 0;
+ memcpy(p->buffer + pos, data, size);
+ return;
}
- if (size < 4)
- break;
-
- p->buffer[pos] = GetBe32(data);
- data += 4;
- size -= 4;
- pos++;
- }
-
- if (size != 0)
- {
- UInt32 w = ((UInt32)data[0]) << 24;
- if (size > 1)
+
+ if (pos != 0)
{
- w |= ((UInt32)data[1]) << 16;
- if (size > 2)
- w |= ((UInt32)data[2]) << 8;
+ size -= num;
+ memcpy(p->buffer + pos, data, num);
+ data += num;
+ Sha1_UpdateBlock(p);
}
- p->buffer[pos] = w;
}
-}
-
-void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */)
-{
- int returnRes = False;
-
- unsigned pos = (unsigned)p->count & 0x3F;
- p->count += size;
-
- while (size--)
{
- unsigned pos2 = (pos & 3);
- UInt32 v = ((UInt32)*data++) << (8 * (3 - pos2));
- UInt32 *ref = &(p->buffer[pos >> 2]);
- pos++;
- if (pos2 == 0)
- {
- *ref = v;
- continue;
- }
- *ref |= v;
-
- if (pos == SHA1_BLOCK_SIZE)
- {
- pos = 0;
- Sha1_UpdateBlock_Rar(p, p->buffer, returnRes);
- if (returnRes)
- {
- size_t i;
- for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i++)
- {
- UInt32 d = p->buffer[i];
- Byte *prev = data + i * 4 - SHA1_BLOCK_SIZE;
- SetUi32(prev, d);
- }
- }
- // returnRes = rar350Mode;
- returnRes = True;
- }
+ size_t numBlocks = size >> 6;
+ UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+ size &= 0x3F;
+ if (size == 0)
+ return;
+ data += (numBlocks << 6);
+ memcpy(p->buffer, data, size);
}
}
+
void Sha1_Final(CSha1 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
- unsigned pos2 = (pos & 3);
- UInt64 numBits;
- UInt32 w;
- unsigned i;
- pos >>= 2;
+
+ p->buffer[pos++] = 0x80;
- w = 0;
- if (pos2 != 0)
- w = p->buffer[pos];
- p->buffer[pos++] = w | (((UInt32)0x80000000) >> (8 * pos2));
+ if (pos > (64 - 8))
+ {
+ while (pos != 64) { p->buffer[pos++] = 0; }
+ // memset(&p->buf.buffer[pos], 0, 64 - pos);
+ Sha1_UpdateBlock(p);
+ pos = 0;
+ }
- while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
+ /*
+ if (pos & 3)
{
- pos &= 0xF;
- if (pos == 0)
- Sha1_UpdateBlock(p);
- p->buffer[pos++] = 0;
+ p->buffer[pos] = 0;
+ p->buffer[pos + 1] = 0;
+ p->buffer[pos + 2] = 0;
+ pos += 3;
+ pos &= ~3;
}
-
- numBits = (p->count << 3);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
- Sha1_UpdateBlock(p);
+ {
+ for (; pos < 64 - 8; pos += 4)
+ *(UInt32 *)(&p->buffer[pos]) = 0;
+ }
+ */
+
+ memset(&p->buffer[pos], 0, (64 - 8) - pos);
- for (i = 0; i < SHA1_NUM_DIGEST_WORDS; i++)
{
- UInt32 v = p->state[i];
- SetBe32(digest, v);
- digest += 4;
+ UInt64 numBits = (p->count << 3);
+ SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
+ SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
+
+ Sha1_UpdateBlock(p);
- Sha1_Init(p);
-}
+ SetBe32(digest, p->state[0]);
+ SetBe32(digest + 4, p->state[1]);
+ SetBe32(digest + 8, p->state[2]);
+ SetBe32(digest + 12, p->state[3]);
+ SetBe32(digest + 16, p->state[4]);
+
-void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size)
-{
- const UInt64 numBits = (p->count + size) << 5;
- block[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- block[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
- block[size++] = 0x80000000;
- while (size != (SHA1_NUM_BLOCK_WORDS - 2))
- block[size++] = 0;
+
+ Sha1_InitState(p);
}
-void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size)
+
+void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
- unsigned pos = (unsigned)p->count & 0xF;
- p->count += size;
- while (size--)
+ const UInt64 numBits = (p->count + size) << 3;
+ SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
+ SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
+ // SetBe32((UInt32 *)(block + size), 0x80000000);
+ SetUi32((UInt32 *)(void *)(block + size), 0x80);
+ size += 4;
+ while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
{
- p->buffer[pos++] = *data++;
- if (pos == SHA1_NUM_BLOCK_WORDS)
- {
- pos = 0;
- Sha1_UpdateBlock(p);
- }
+ *((UInt32 *)(void *)(block + size)) = 0;
+ size += 4;
}
}
-void Sha1_32_Final(CSha1 *p, UInt32 *digest)
+void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
- UInt64 numBits;
- unsigned pos = (unsigned)p->count & 0xF;
- p->buffer[pos++] = 0x80000000;
+ MY_ALIGN (16)
+ UInt32 st[SHA1_NUM_DIGEST_WORDS];
+
+ st[0] = p->state[0];
+ st[1] = p->state[1];
+ st[2] = p->state[2];
+ st[3] = p->state[3];
+ st[4] = p->state[4];
+
+ UPDATE_BLOCKS(p)(st, data, 1);
- while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
+ SetBe32(destDigest + 0 , st[0]);
+ SetBe32(destDigest + 1 * 4, st[1]);
+ SetBe32(destDigest + 2 * 4, st[2]);
+ SetBe32(destDigest + 3 * 4, st[3]);
+ SetBe32(destDigest + 4 * 4, st[4]);
+}
+
+
+void Sha1Prepare()
+{
+ #ifdef _SHA_SUPPORTED
+ SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
+ f = Sha1_UpdateBlocks;
+ f_hw = NULL;
+ #ifdef MY_CPU_X86_OR_AMD64
+ #ifndef USE_MY_MM
+ if (CPU_IsSupported_SHA()
+ && CPU_IsSupported_SSSE3()
+ // && CPU_IsSupported_SSE41()
+ )
+ #endif
+ #else
+ if (CPU_IsSupported_SHA1())
+ #endif
{
- pos &= 0xF;
- if (pos == 0)
- Sha1_UpdateBlock(p);
- p->buffer[pos++] = 0;
- }
-
- numBits = (p->count << 5);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
+ // printf("\n========== HW SHA1 ======== \n");
+ #if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
+ /* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
+ It generated incorrect SHA-1 code.
+ 21.03 : we test sha1-hardware code at runtime initialization */
+
+ #pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
+
+ UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
+ Byte data[64];
+ unsigned i;
+ for (i = 0; i < sizeof(data); i += 2)
+ {
+ data[i ] = (Byte)(i);
+ data[i + 1] = (Byte)(i + 1);
+ }
- Sha1_GetBlockDigest(p, p->buffer, digest);
-
- Sha1_Init(p);
+ Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
+
+ if ( state[0] != 0x9acd7297
+ || state[1] != 0x4624d898
+ || state[2] != 0x0bf079f0
+ || state[3] != 0x031e61b3
+ || state[4] != 0x8323fe20)
+ {
+ // printf("\n========== SHA-1 hardware version failure ======== \n");
+ }
+ else
+ #endif
+ {
+ f = f_hw = Sha1_UpdateBlocks_HW;
+ }
+ }
+ g_FUNC_UPDATE_BLOCKS = f;
+ g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+ #endif
}
diff --git a/multiarc/src/formats/7z/C/Sha1.h b/multiarc/src/formats/7z/C/Sha1.h
index aa22ec36..345a816a 100644..100755
--- a/multiarc/src/formats/7z/C/Sha1.h
+++ b/multiarc/src/formats/7z/C/Sha1.h
@@ -1,5 +1,5 @@
/* Sha1.h -- SHA-1 Hash
-2016-05-20 : Igor Pavlov : Public domain */
+2021-02-08 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA1_H
#define __7Z_SHA1_H
@@ -14,24 +14,62 @@ EXTERN_C_BEGIN
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
+typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+/*
+ if (the system supports different SHA1 code implementations)
+ {
+ (CSha1::func_UpdateBlocks) will be used
+ (CSha1::func_UpdateBlocks) can be set by
+ Sha1_Init() - to default (fastest)
+ Sha1_SetFunction() - to any algo
+ }
+ else
+ {
+ (CSha1::func_UpdateBlocks) is ignored.
+ }
+*/
+
typedef struct
{
- UInt32 state[SHA1_NUM_DIGEST_WORDS];
+ SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
- UInt32 buffer[SHA1_NUM_BLOCK_WORDS];
+ UInt64 __pad_2[2];
+ UInt32 state[SHA1_NUM_DIGEST_WORDS];
+ UInt32 __pad_3[3];
+ Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;
-void Sha1_Init(CSha1 *p);
-void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest);
+#define SHA1_ALGO_DEFAULT 0
+#define SHA1_ALGO_SW 1
+#define SHA1_ALGO_HW 2
+
+/*
+Sha1_SetFunction()
+return:
+ 0 - (algo) value is not supported, and func_UpdateBlocks was not changed
+ 1 - func_UpdateBlocks was set according (algo) value.
+*/
+
+BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo);
+
+void Sha1_InitState(CSha1 *p);
+void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final(CSha1 *p, Byte *digest);
-void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */);
+void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
+void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
+
+// void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+/*
+call Sha1Prepare() once at program start.
+It prepares all supported implementations, and detects the fastest implementation.
+*/
-void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size);
-void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size);
-void Sha1_32_Final(CSha1 *p, UInt32 *digest);
+void Sha1Prepare(void);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Sha1Opt.c b/multiarc/src/formats/7z/C/Sha1Opt.c
new file mode 100755
index 00000000..63132da3
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Sha1Opt.c
@@ -0,0 +1,373 @@
+/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+// #define USE_MY_MM
+#endif
+#endif
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ #if defined(_MSC_VER)
+ // SSSE3: for clang-cl:
+ #include <tmmintrin.h>
+ #define __SHA__
+ #endif
+ #endif
+ #pragma clang diagnostic ignored "-Wvector-conversion"
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ // #pragma GCC target("sha,ssse3")
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #ifdef USE_MY_MM
+ #define USE_VER_MIN 1300
+ #else
+ #define USE_VER_MIN 1910
+ #endif
+ #if _MSC_VER >= USE_VER_MIN
+ #define USE_HW_SHA
+ #endif
+ #endif
+// #endif // MY_CPU_X86_OR_AMD64
+
+#ifdef USE_HW_SHA
+
+// #pragma message("Sha1 HW")
+// #include <wmmintrin.h>
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include <immintrin.h>
+#else
+#include <emmintrin.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+// #include <intrin.h>
+#endif
+
+#ifdef USE_MY_MM
+#include "My_mm.h"
+#endif
+
+#endif
+
+/*
+SHA1 uses:
+SSE2:
+ _mm_loadu_si128
+ _mm_storeu_si128
+ _mm_set_epi32
+ _mm_add_epi32
+ _mm_shuffle_epi32 / pshufd
+ _mm_xor_si128
+ _mm_cvtsi128_si32
+ _mm_cvtsi32_si128
+SSSE3:
+ _mm_shuffle_epi8 / pshufb
+
+SHA:
+ _mm_sha1*
+*/
+
+#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
+#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
+#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
+
+#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
+#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
+
+
+
+
+
+#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
+#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
+
+
+#define LOAD_SHUFFLE(m, k) \
+ m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+ SHUFFLE_EPI8(m, mask); \
+
+#define SM1(m0, m1, m2, m3) \
+ SHA1_MSG1(m0, m1); \
+
+#define SM2(m0, m1, m2, m3) \
+ XOR_SI128(m3, m1); \
+ SHA1_MSG2(m3, m2); \
+
+#define SM3(m0, m1, m2, m3) \
+ XOR_SI128(m3, m1); \
+ SM1(m0, m1, m2, m3) \
+ SHA1_MSG2(m3, m2); \
+
+#define NNN(m0, m1, m2, m3)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
+ e1 = abcd; \
+ SHA1_RND4(abcd, e0, (k) / 5); \
+ SHA1_NEXTE(e1, m1); \
+ OP(m0, m1, m2, m3); \
+
+#define R16(k, mx, OP0, OP1, OP2, OP3) \
+ R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
+ R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
+ R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
+ R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
+
+#define PREPARE_STATE \
+ SHUFFLE_EPI32 (abcd, 0x1B); \
+ SHUFFLE_EPI32 (e0, 0x1B); \
+
+
+
+
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
+{
+ const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
+
+ __m128i abcd, e0;
+
+ if (numBlocks == 0)
+ return;
+
+ abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca
+ e0 = _mm_cvtsi32_si128((int)state[4]); // 000e
+
+ PREPARE_STATE
+
+ do
+ {
+ __m128i abcd_save, e2;
+ __m128i m0, m1, m2, m3;
+ __m128i e1;
+
+
+ abcd_save = abcd;
+ e2 = e0;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ ADD_EPI32(e0, m0);
+
+ R16 ( 0, m0, SM1, SM3, SM3, SM3 );
+ R16 ( 1, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 2, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 3, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 4, e2, SM2, NNN, NNN, NNN );
+
+ ADD_EPI32(abcd, abcd_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ PREPARE_STATE
+
+ _mm_storeu_si128((__m128i *) (void *) state, abcd);
+ *(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
+}
+
+#endif // USE_HW_SHA
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_SHA
+ #endif
+ #endif
+
+#ifdef USE_HW_SHA
+
+// #pragma message("=== Sha1 HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+
+typedef uint32x4_t v128;
+// typedef __n128 v128; // MSVC
+
+#ifdef MY_CPU_BE
+ #define MY_rev32_for_LE(x)
+#else
+ #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
+#endif
+
+#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
+#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
+
+#define LOAD_SHUFFLE(m, k) \
+ m = LOAD_128((data + (k) * 16)); \
+ MY_rev32_for_LE(m); \
+
+#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
+#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
+#define C(e) abcd = vsha1cq_u32(abcd, e, t);
+#define P(e) abcd = vsha1pq_u32(abcd, e, t);
+#define M(e) abcd = vsha1mq_u32(abcd, e, t);
+#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
+#define T(m, c) t = vaddq_u32(m, c)
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ v128 abcd;
+ v128 c0, c1, c2, c3;
+ uint32_t e0;
+
+ if (numBlocks == 0)
+ return;
+
+ c0 = vdupq_n_u32(0x5a827999);
+ c1 = vdupq_n_u32(0x6ed9eba1);
+ c2 = vdupq_n_u32(0x8f1bbcdc);
+ c3 = vdupq_n_u32(0xca62c1d6);
+
+ abcd = LOAD_128(&state[0]);
+ e0 = state[4];
+
+ do
+ {
+ v128 abcd_save;
+ v128 m0, m1, m2, m3;
+ v128 t;
+ uint32_t e0_save, e1;
+
+ abcd_save = abcd;
+ e0_save = e0;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ T(m0, c0); H(e1); C(e0);
+ T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
+ T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
+ T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
+ T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
+ T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
+ T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
+ T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
+ T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
+ T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
+ T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
+ T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
+ T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
+ T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
+ T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
+ T(m1, c3); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c3); H(e1); P(e0);
+ T(m3, c3); H(e0); P(e1);
+
+ abcd = vaddq_u32(abcd, abcd_save);
+ e0 += e0_save;
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ STORE_128(&state[0], abcd);
+ state[4] = e0;
+}
+
+#endif // USE_HW_SHA
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#ifndef USE_HW_SHA
+
+// #error Stop_Compiling_UNSUPPORTED_SHA
+// #include <stdlib.h>
+
+// #include "Sha1.h"
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+#pragma message("Sha1 HW-SW stub was used")
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
+{
+ Sha1_UpdateBlocks(state, data, numBlocks);
+ /*
+ UNUSED_VAR(state);
+ UNUSED_VAR(data);
+ UNUSED_VAR(numBlocks);
+ exit(1);
+ return;
+ */
+}
+
+#endif
diff --git a/multiarc/src/formats/7z/C/Sha256.c b/multiarc/src/formats/7z/C/Sha256.c
index 04b688c6..8b3983ea 100644..100755
--- a/multiarc/src/formats/7z/C/Sha256.c
+++ b/multiarc/src/formats/7z/C/Sha256.c
@@ -1,5 +1,5 @@
-/* Crypto/Sha256.c -- SHA-256 Hash
-2017-04-03 : Igor Pavlov : Public domain
+/* Sha256.c -- SHA-256 Hash
+2021-04-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "RotateDefs.h"
#include "Sha256.h"
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// #define USE_MY_MM
+#endif
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1200
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#elif defined(MY_CPU_ARM_OR_ARM64)
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1910
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#endif
+
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#ifdef _SHA_SUPPORTED
+ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+ static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
+ static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
+
+ #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
+#else
+ #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
+#endif
+
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+{
+ SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
+
+ #ifdef _SHA_SUPPORTED
+ if (algo != SHA256_ALGO_SW)
+ {
+ if (algo == SHA256_ALGO_DEFAULT)
+ func = g_FUNC_UPDATE_BLOCKS;
+ else
+ {
+ if (algo != SHA256_ALGO_HW)
+ return False;
+ func = g_FUNC_UPDATE_BLOCKS_HW;
+ if (!func)
+ return False;
+ }
+ }
+ #else
+ if (algo > 1)
+ return False;
+ #endif
+
+ p->func_UpdateBlocks = func;
+ return True;
+}
+
+
/* define it for speed optimization */
-#ifndef _SFX
-#define _SHA256_UNROLL
-#define _SHA256_UNROLL2
+
+#ifdef _SFX
+ #define STEP_PRE 1
+ #define STEP_MAIN 1
+#else
+ #define STEP_PRE 2
+ #define STEP_MAIN 4
+ // #define _SHA256_UNROLL
#endif
-/* #define _SHA256_UNROLL2 */
+#if STEP_MAIN != 16
+ #define _SHA256_BIG_W
+#endif
-void Sha256_Init(CSha256 *p)
+
+
+
+void Sha256_InitState(CSha256 *p)
{
+ p->count = 0;
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
@@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p)
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
- p->count = 0;
+}
+
+void Sha256_Init(CSha256 *p)
+{
+ p->func_UpdateBlocks =
+ #ifdef _SHA_SUPPORTED
+ g_FUNC_UPDATE_BLOCKS;
+ #else
+ NULL;
+ #endif
+ Sha256_InitState(p);
}
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
@@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p)
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
-#define blk0(i) (W[i])
-#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15]))
-
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
-#ifdef _SHA256_UNROLL2
-
-#define R(a,b,c,d,e,f,g,h, i) \
- h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
- d += h; \
- h += S0(a) + Maj(a, b, c)
-#define RX_8(i) \
- R(a,b,c,d,e,f,g,h, i); \
- R(h,a,b,c,d,e,f,g, i+1); \
- R(g,h,a,b,c,d,e,f, i+2); \
- R(f,g,h,a,b,c,d,e, i+3); \
- R(e,f,g,h,a,b,c,d, i+4); \
- R(d,e,f,g,h,a,b,c, i+5); \
- R(c,d,e,f,g,h,a,b, i+6); \
- R(b,c,d,e,f,g,h,a, i+7)
+#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
-#define RX_16 RX_8(0); RX_8(8);
+#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
+#ifdef _SHA256_BIG_W
+ // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
+ #define w(j, i) W[(size_t)(j) + i]
+ #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
#else
+ #if STEP_MAIN == 16
+ #define w(j, i) W[(i) & 15]
+ #else
+ #define w(j, i) W[((size_t)(j) + (i)) & 15]
+ #endif
+ #define blk2(j, i) (w(j, i) += blk2_main(j, i))
+#endif
-#define a(i) T[(0-(i))&7]
-#define b(i) T[(1-(i))&7]
-#define c(i) T[(2-(i))&7]
-#define d(i) T[(3-(i))&7]
-#define e(i) T[(4-(i))&7]
-#define f(i) T[(5-(i))&7]
-#define g(i) T[(6-(i))&7]
-#define h(i) T[(7-(i))&7]
-
-#define R(i) \
- h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
- d(i) += h(i); \
- h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \
+#define W_MAIN(i) blk2(j, i)
-#ifdef _SHA256_UNROLL
-#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
-#define RX_16 RX_8(0); RX_8(8);
+#define T1(wx, i) \
+ tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ h = g; \
+ g = f; \
+ f = e; \
+ e = d + tmp; \
+ tmp += S0(a) + Maj(a, b, c); \
+ d = c; \
+ c = b; \
+ b = a; \
+ a = tmp; \
-#else
+#define R1_PRE(i) T1( W_PRE, i)
+#define R1_MAIN(i) T1( W_MAIN, i)
-#define RX_16 unsigned i; for (i = 0; i < 16; i++) { R(i); }
+#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
+#define R2_MAIN(i) \
+ R1_MAIN(i) \
+ R1_MAIN(i + 1) \
#endif
+
+
+#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+#define T4( a,b,c,d,e,f,g,h, wx, i) \
+ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ tmp = h; \
+ h += d; \
+ d = tmp + S0(a) + Maj(a, b, c); \
+
+#define R4( wx, i) \
+ T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
+ T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
+ T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
+ T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
+
+#define R4_PRE(i) R4( W_PRE, i)
+#define R4_MAIN(i) R4( W_MAIN, i)
+
+
+#define T8( a,b,c,d,e,f,g,h, wx, i) \
+ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ d += h; \
+ h += S0(a) + Maj(a, b, c); \
+
+#define R8( wx, i) \
+ T8 ( a,b,c,d,e,f,g,h, wx, i ); \
+ T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
+ T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
+ T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
+ T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
+ T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
+ T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
+ T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
+
+#define R8_PRE(i) R8( W_PRE, i)
+#define R8_MAIN(i) R8( W_MAIN, i)
+
#endif
-static const UInt32 K[64] = {
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+// static
+extern MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -109,30 +249,27 @@ static const UInt32 K[64] = {
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
-static void Sha256_WriteByteBlock(CSha256 *p)
-{
- UInt32 W[16];
- unsigned j;
- UInt32 *state;
+#define K SHA256_K_ARRAY
- #ifdef _SHA256_UNROLL2
- UInt32 a,b,c,d,e,f,g,h;
+
+MY_NO_INLINE
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ UInt32 W
+ #ifdef _SHA256_BIG_W
+ [64];
#else
- UInt32 T[8];
+ [16];
#endif
- for (j = 0; j < 16; j += 4)
- {
- const Byte *ccc = p->buffer + j * 4;
- W[j ] = GetBe32(ccc);
- W[j + 1] = GetBe32(ccc + 4);
- W[j + 2] = GetBe32(ccc + 8);
- W[j + 3] = GetBe32(ccc + 12);
- }
+ unsigned j;
- state = p->state;
+ UInt32 a,b,c,d,e,f,g,h;
- #ifdef _SHA256_UNROLL2
+ #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
+ UInt32 tmp;
+ #endif
+
a = state[0];
b = state[1];
c = state[2];
@@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p)
f = state[5];
g = state[6];
h = state[7];
- #else
- for (j = 0; j < 8; j++)
- T[j] = state[j];
- #endif
- for (j = 0; j < 64; j += 16)
+ while (numBlocks)
{
- RX_16
+
+ for (j = 0; j < 16; j += STEP_PRE)
+ {
+ #if STEP_PRE > 4
+
+ #if STEP_PRE < 8
+ R4_PRE(0);
+ #else
+ R8_PRE(0);
+ #if STEP_PRE == 16
+ R8_PRE(8);
+ #endif
+ #endif
+
+ #else
+
+ R1_PRE(0);
+ #if STEP_PRE >= 2
+ R1_PRE(1);
+ #if STEP_PRE >= 4
+ R1_PRE(2);
+ R1_PRE(3);
+ #endif
+ #endif
+
+ #endif
+ }
+
+ for (j = 16; j < 64; j += STEP_MAIN)
+ {
+ #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+ #if STEP_MAIN < 8
+ R4_MAIN(0);
+ #else
+ R8_MAIN(0);
+ #if STEP_MAIN == 16
+ R8_MAIN(8);
+ #endif
+ #endif
+
+ #else
+
+ R1_MAIN(0);
+ #if STEP_MAIN >= 2
+ R1_MAIN(1);
+ #if STEP_MAIN >= 4
+ R2_MAIN(2);
+ #if STEP_MAIN >= 8
+ R2_MAIN(4);
+ R2_MAIN(6);
+ #if STEP_MAIN >= 16
+ R2_MAIN(8);
+ R2_MAIN(10);
+ R2_MAIN(12);
+ R2_MAIN(14);
+ #endif
+ #endif
+ #endif
+ #endif
+ #endif
+ }
+
+ a += state[0]; state[0] = a;
+ b += state[1]; state[1] = b;
+ c += state[2]; state[2] = c;
+ d += state[3]; state[3] = d;
+ e += state[4]; state[4] = e;
+ f += state[5]; state[5] = f;
+ g += state[6]; state[6] = g;
+ h += state[7]; state[7] = h;
+
+ data += 64;
+ numBlocks--;
}
- #ifdef _SHA256_UNROLL2
- state[0] += a;
- state[1] += b;
- state[2] += c;
- state[3] += d;
- state[4] += e;
- state[5] += f;
- state[6] += g;
- state[7] += h;
- #else
- for (j = 0; j < 8; j++)
- state[j] += T[j];
- #endif
-
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
- /* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
+#undef K
+
+#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
{
@@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
return;
}
- size -= num;
- memcpy(p->buffer + pos, data, num);
- data += num;
+ if (pos != 0)
+ {
+ size -= num;
+ memcpy(p->buffer + pos, data, num);
+ data += num;
+ Sha256_UpdateBlock(p);
+ }
}
-
- for (;;)
{
- Sha256_WriteByteBlock(p);
- if (size < 64)
- break;
- size -= 64;
- memcpy(p->buffer, data, 64);
- data += 64;
- }
-
- if (size != 0)
+ size_t numBlocks = size >> 6;
+ UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+ size &= 0x3F;
+ if (size == 0)
+ return;
+ data += (numBlocks << 6);
memcpy(p->buffer, data, size);
+ }
}
+
void Sha256_Final(CSha256 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
@@ -219,13 +414,30 @@ void Sha256_Final(CSha256 *p, Byte *digest)
p->buffer[pos++] = 0x80;
- while (pos != (64 - 8))
+ if (pos > (64 - 8))
+ {
+ while (pos != 64) { p->buffer[pos++] = 0; }
+ // memset(&p->buf.buffer[pos], 0, 64 - pos);
+ Sha256_UpdateBlock(p);
+ pos = 0;
+ }
+
+ /*
+ if (pos & 3)
{
- pos &= 0x3F;
- if (pos == 0)
- Sha256_WriteByteBlock(p);
- p->buffer[pos++] = 0;
+ p->buffer[pos] = 0;
+ p->buffer[pos + 1] = 0;
+ p->buffer[pos + 2] = 0;
+ pos += 3;
+ pos &= ~3;
}
+ {
+ for (; pos < 64 - 8; pos += 4)
+ *(UInt32 *)(&p->buffer[pos]) = 0;
+ }
+ */
+
+ memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
@@ -233,16 +445,42 @@ void Sha256_Final(CSha256 *p, Byte *digest)
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
- Sha256_WriteByteBlock(p);
+ Sha256_UpdateBlock(p);
for (i = 0; i < 8; i += 2)
{
UInt32 v0 = p->state[i];
- UInt32 v1 = p->state[i + 1];
+ UInt32 v1 = p->state[(size_t)i + 1];
SetBe32(digest , v0);
SetBe32(digest + 4, v1);
digest += 8;
}
- Sha256_Init(p);
+ Sha256_InitState(p);
+}
+
+
+void Sha256Prepare()
+{
+ #ifdef _SHA_SUPPORTED
+ SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
+ f = Sha256_UpdateBlocks;
+ f_hw = NULL;
+ #ifdef MY_CPU_X86_OR_AMD64
+ #ifndef USE_MY_MM
+ if (CPU_IsSupported_SHA()
+ && CPU_IsSupported_SSSE3()
+ // && CPU_IsSupported_SSE41()
+ )
+ #endif
+ #else
+ if (CPU_IsSupported_SHA2())
+ #endif
+ {
+ // printf("\n========== HW SHA256 ======== \n");
+ f = f_hw = Sha256_UpdateBlocks_HW;
+ }
+ g_FUNC_UPDATE_BLOCKS = f;
+ g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+ #endif
}
diff --git a/multiarc/src/formats/7z/C/Sha256.h b/multiarc/src/formats/7z/C/Sha256.h
index 3f455dbc..aa38501e 100644..100755
--- a/multiarc/src/formats/7z/C/Sha256.h
+++ b/multiarc/src/formats/7z/C/Sha256.h
@@ -1,26 +1,76 @@
/* Sha256.h -- SHA-256 Hash
-2013-01-18 : Igor Pavlov : Public domain */
+2021-01-01 : Igor Pavlov : Public domain */
-#ifndef __CRYPTO_SHA256_H
-#define __CRYPTO_SHA256_H
+#ifndef __7Z_SHA256_H
+#define __7Z_SHA256_H
#include "7zTypes.h"
EXTERN_C_BEGIN
-#define SHA256_DIGEST_SIZE 32
+#define SHA256_NUM_BLOCK_WORDS 16
+#define SHA256_NUM_DIGEST_WORDS 8
+
+#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
+#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
+
+typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+ if (the system supports different SHA256 code implementations)
+ {
+ (CSha256::func_UpdateBlocks) will be used
+ (CSha256::func_UpdateBlocks) can be set by
+ Sha256_Init() - to default (fastest)
+ Sha256_SetFunction() - to any algo
+ }
+ else
+ {
+ (CSha256::func_UpdateBlocks) is ignored.
+ }
+*/
typedef struct
{
- UInt32 state[8];
+ SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
- Byte buffer[64];
+ UInt64 __pad_2[2];
+ UInt32 state[SHA256_NUM_DIGEST_WORDS];
+
+ Byte buffer[SHA256_BLOCK_SIZE];
} CSha256;
+
+#define SHA256_ALGO_DEFAULT 0
+#define SHA256_ALGO_SW 1
+#define SHA256_ALGO_HW 2
+
+/*
+Sha256_SetFunction()
+return:
+ 0 - (algo) value is not supported, and func_UpdateBlocks was not changed
+ 1 - func_UpdateBlocks was set according (algo) value.
+*/
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);
+
+void Sha256_InitState(CSha256 *p);
void Sha256_Init(CSha256 *p);
void Sha256_Update(CSha256 *p, const Byte *data, size_t size);
void Sha256_Final(CSha256 *p, Byte *digest);
+
+
+
+// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+call Sha256Prepare() once at program start.
+It prepares all supported implementations, and detects the fastest implementation.
+*/
+
+void Sha256Prepare(void);
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/Sha256Opt.c b/multiarc/src/formats/7z/C/Sha256Opt.c
new file mode 100755
index 00000000..decc1382
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Sha256Opt.c
@@ -0,0 +1,373 @@
+/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+// #define USE_MY_MM
+#endif
+#endif
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ #if defined(_MSC_VER)
+ // SSSE3: for clang-cl:
+ #include <tmmintrin.h>
+ #define __SHA__
+ #endif
+ #endif
+
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ // #pragma GCC target("sha,ssse3")
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #ifdef USE_MY_MM
+ #define USE_VER_MIN 1300
+ #else
+ #define USE_VER_MIN 1910
+ #endif
+ #if _MSC_VER >= USE_VER_MIN
+ #define USE_HW_SHA
+ #endif
+ #endif
+// #endif // MY_CPU_X86_OR_AMD64
+
+#ifdef USE_HW_SHA
+
+// #pragma message("Sha256 HW")
+// #include <wmmintrin.h>
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include <immintrin.h>
+#else
+#include <emmintrin.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+// #include <intrin.h>
+#endif
+
+#ifdef USE_MY_MM
+#include "My_mm.h"
+#endif
+
+#endif
+
+/*
+SHA256 uses:
+SSE2:
+ _mm_loadu_si128
+ _mm_storeu_si128
+ _mm_set_epi32
+ _mm_add_epi32
+ _mm_shuffle_epi32 / pshufd
+
+
+
+SSSE3:
+ _mm_shuffle_epi8 / pshufb
+ _mm_alignr_epi8
+SHA:
+ _mm_sha256*
+*/
+
+// K array must be aligned for 16-bytes at least.
+// The compiler can look align attribute and selects
+// movdqu - for code without align attribute
+// movdqa - for code with align attribute
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
+#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
+
+
+#define LOAD_SHUFFLE(m, k) \
+ m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+ m = _mm_shuffle_epi8(m, mask); \
+
+#define SM1(g0, g1, g2, g3) \
+ SHA256_MSG1(g3, g0); \
+
+#define SM2(g0, g1, g2, g3) \
+ tmp = _mm_alignr_epi8(g1, g0, 4); \
+ ADD_EPI32(g2, tmp); \
+ SHA25G_MSG2(g2, g1); \
+
+// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
+// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
+
+
+#define NNN(g0, g1, g2, g3)
+
+
+#define RND2(t0, t1) \
+ t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
+
+#define RND2_0(m, k) \
+ msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
+ RND2(state0, state1); \
+ msg = _mm_shuffle_epi32(msg, 0x0E); \
+
+
+#define RND2_1 \
+ RND2(state1, state0); \
+
+
+// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+ RND2_0(g0, k); \
+ OP0(g0, g1, g2, g3); \
+ RND2_1; \
+ OP1(g0, g1, g2, g3); \
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+ R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+ R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+ R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+ R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+#define PREPARE_STATE \
+ tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
+ state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
+ state1 = state0; \
+ state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
+ state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+ __m128i tmp;
+ __m128i state0, state1;
+
+ if (numBlocks == 0)
+ return;
+
+ state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
+ state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
+
+ PREPARE_STATE
+
+ do
+ {
+ __m128i state0_save, state1_save;
+ __m128i m0, m1, m2, m3;
+ __m128i msg;
+ // #define msg tmp
+
+ state0_save = state0;
+ state1_save = state1;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+
+
+ R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+ R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+
+ ADD_EPI32(state0, state0_save);
+ ADD_EPI32(state1, state1_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ PREPARE_STATE
+
+ _mm_storeu_si128((__m128i *) (void *) &state[0], state0);
+ _mm_storeu_si128((__m128i *) (void *) &state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_SHA
+ #endif
+ #endif
+
+#ifdef USE_HW_SHA
+
+// #pragma message("=== Sha256 HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+
+typedef uint32x4_t v128;
+// typedef __n128 v128; // MSVC
+
+#ifdef MY_CPU_BE
+ #define MY_rev32_for_LE(x)
+#else
+ #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
+#endif
+
+#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
+#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
+
+#define LOAD_SHUFFLE(m, k) \
+ m = LOAD_128((data + (k) * 16)); \
+ MY_rev32_for_LE(m); \
+
+// K array must be aligned for 16-bytes at least.
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
+#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
+
+#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0)
+#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1)
+#define NNN(g0, g1, g2, g3)
+
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+ msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
+ tmp = state0; \
+ state0 = vsha256hq_u32( state0, state1, msg ); \
+ state1 = vsha256h2q_u32( state1, tmp, msg ); \
+ OP0(g0, g1, g2, g3); \
+ OP1(g0, g1, g2, g3); \
+
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+ R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+ R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+ R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+ R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ v128 state0, state1;
+
+ if (numBlocks == 0)
+ return;
+
+ state0 = LOAD_128(&state[0]);
+ state1 = LOAD_128(&state[4]);
+
+ do
+ {
+ v128 state0_save, state1_save;
+ v128 m0, m1, m2, m3;
+ v128 msg, tmp;
+
+ state0_save = state0;
+ state1_save = state1;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+ R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+
+ state0 = vaddq_u32(state0, state0_save);
+ state1 = vaddq_u32(state1, state1_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ STORE_128(&state[0], state0);
+ STORE_128(&state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#ifndef USE_HW_SHA
+
+// #error Stop_Compiling_UNSUPPORTED_SHA
+// #include <stdlib.h>
+
+// #include "Sha256.h"
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#pragma message("Sha256 HW-SW stub was used")
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ Sha256_UpdateBlocks(state, data, numBlocks);
+ /*
+ UNUSED_VAR(state);
+ UNUSED_VAR(data);
+ UNUSED_VAR(numBlocks);
+ exit(1);
+ return;
+ */
+}
+
+#endif
diff --git a/multiarc/src/formats/7z/C/Sort.c b/multiarc/src/formats/7z/C/Sort.c
index e1097e38..e1097e38 100644..100755
--- a/multiarc/src/formats/7z/C/Sort.c
+++ b/multiarc/src/formats/7z/C/Sort.c
diff --git a/multiarc/src/formats/7z/C/Sort.h b/multiarc/src/formats/7z/C/Sort.h
index 2e2963a2..2e2963a2 100644..100755
--- a/multiarc/src/formats/7z/C/Sort.h
+++ b/multiarc/src/formats/7z/C/Sort.h
diff --git a/multiarc/src/formats/7z/C/Threads.c b/multiarc/src/formats/7z/C/Threads.c
index 930ad271..58eb90ff 100644..100755
--- a/multiarc/src/formats/7z/C/Threads.c
+++ b/multiarc/src/formats/7z/C/Threads.c
@@ -1,9 +1,11 @@
/* Threads.c -- multithreading library
-2017-06-26 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#ifndef UNDER_CE
+#ifdef _WIN32
+
+#ifndef USE_THREADS_CreateThread
#include <process.h>
#endif
@@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p)
return 0;
}
-WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
+WRes Handle_WaitObject(HANDLE h)
+{
+ DWORD dw = WaitForSingleObject(h, INFINITE);
+ /*
+ (dw) result:
+ WAIT_OBJECT_0 // 0
+ WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
+ WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
+ WAIT_FAILED // 0xFFFFFFFF
+ */
+ if (dw == WAIT_FAILED)
+ {
+ dw = GetLastError();
+ if (dw == 0)
+ return WAIT_FAILED;
+ }
+ return (WRes)dw;
+}
+
+#define Thread_Wait(p) Handle_WaitObject(*(p))
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ WRes res = Thread_Wait(p);
+ WRes res2 = Thread_Close(p);
+ return (res != 0 ? res : res2);
+}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
-
- #ifdef UNDER_CE
-
- DWORD threadId;
- *p = CreateThread(0, 0, func, param, 0, &threadId);
- #else
+ #ifdef USE_THREADS_CreateThread
+ DWORD threadId;
+ *p = CreateThread(NULL, 0, func, param, 0, &threadId);
+
+ #else
+
unsigned threadId;
- *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
-
+ *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
+
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return HandleToWRes(*p);
}
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ #ifdef USE_THREADS_CreateThread
+
+ UNUSED_VAR(affinity)
+ return Thread_Create(p, func, param);
+
+ #else
+
+ /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+ HANDLE h;
+ WRes wres;
+ unsigned threadId;
+ h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
+ *p = h;
+ wres = HandleToWRes(h);
+ if (h)
+ {
+ {
+ // DWORD_PTR prevMask =
+ SetThreadAffinityMask(h, (DWORD_PTR)affinity);
+ /*
+ if (prevMask == 0)
+ {
+ // affinity change is non-critical error, so we can ignore it
+ // wres = GetError();
+ }
+ */
+ }
+ {
+ DWORD prevSuspendCount = ResumeThread(h);
+ /* ResumeThread() returns:
+ 0 : was_not_suspended
+ 1 : was_resumed
+ -1 : error
+ */
+ if (prevSuspendCount == (DWORD)-1)
+ wres = GetError();
+ }
+ }
+
+ /* maybe we must use errno here, but probably GetLastError() is also OK. */
+ return wres;
+
+ #endif
+}
+
+
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
@@ -68,10 +145,22 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
+ // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ // if (Semaphore_IsCreated(p))
+ {
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
@@ -80,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
- /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
+ /* InitializeCriticalSection() can raise exception:
+ Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
+ Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
@@ -89,7 +180,361 @@ WRes CriticalSection_Init(CCriticalSection *p)
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
- __except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
+ __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
+
+
+
+
+#else // _WIN32
+
+// ---------- POSIX ----------
+
+#ifndef __APPLE__
+#ifndef _7ZIP_AFFINITY_DISABLE
+// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
+#define _GNU_SOURCE
+#endif
+#endif
+
+#include "Threads.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+// #include <sched.h>
+#endif
+
+
+// #include <stdio.h>
+// #define PRF(p) p
+#define PRF(p)
+
+#define Print(s) PRF(printf("\n%s\n", s))
+
+// #include <stdio.h>
+
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
+{
+ // new thread in Posix probably inherits affinity from parrent thread
+ Print("Thread_Create_With_CpuSet");
+
+ pthread_attr_t attr;
+ int ret;
+ // int ret2;
+
+ p->_created = 0;
+
+ RINOK(pthread_attr_init(&attr));
+
+ ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+ if (!ret)
+ {
+ if (cpuSet)
+ {
+ #ifdef _7ZIP_AFFINITY_SUPPORTED
+
+ /*
+ printf("\n affinity :");
+ unsigned i;
+ for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
+ {
+ Byte b = *((const Byte *)cpuSet + i);
+ char temp[32];
+ #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+ temp[0] = GET_HEX_CHAR((b & 0xF));
+ temp[1] = GET_HEX_CHAR((b >> 4));
+ // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
+ // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
+ temp[2] = 0;
+ printf("%s", temp);
+ }
+ printf("\n");
+ */
+
+ // ret2 =
+ pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ #endif
+ }
+
+ ret = pthread_create(&p->_tid, &attr, func, param);
+
+ if (!ret)
+ {
+ p->_created = 1;
+ /*
+ if (cpuSet)
+ {
+ // ret2 =
+ pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ }
+ */
+ }
+ }
+ // ret2 =
+ pthread_attr_destroy(&attr);
+ // if (ret2 != 0) ret = ret2;
+ return ret;
+}
+
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+ return Thread_Create_With_CpuSet(p, func, param, NULL);
+}
+
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ Print("Thread_Create_WithAffinity");
+ CCpuSet cs;
+ unsigned i;
+ CpuSet_Zero(&cs);
+ for (i = 0; i < sizeof(affinity) * 8; i++)
+ {
+ if (affinity == 0)
+ break;
+ if (affinity & 1)
+ {
+ CpuSet_Set(&cs, i);
+ }
+ affinity >>= 1;
+ }
+ return Thread_Create_With_CpuSet(p, func, param, &cs);
+}
+
+
+WRes Thread_Close(CThread *p)
+{
+ // Print("Thread_Close");
+ int ret;
+ if (!p->_created)
+ return 0;
+
+ ret = pthread_detach(p->_tid);
+ p->_tid = 0;
+ p->_created = 0;
+ return ret;
+}
+
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ // Print("Thread_Wait_Close");
+ void *thread_return;
+ int ret;
+ if (!p->_created)
+ return EINVAL;
+
+ ret = pthread_join(p->_tid, &thread_return);
+ // probably we can't use that (_tid) after pthread_join(), so we close thread here
+ p->_created = 0;
+ p->_tid = 0;
+ return ret;
+}
+
+
+
+static WRes Event_Create(CEvent *p, int manualReset, int signaled)
+{
+ RINOK(pthread_mutex_init(&p->_mutex, NULL));
+ RINOK(pthread_cond_init(&p->_cond, NULL));
+ p->_manual_reset = manualReset;
+ p->_state = (signaled ? True : False);
+ p->_created = 1;
+ return 0;
+}
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
+ { return Event_Create(p, True, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
+ { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
+ { return Event_Create(p, False, signaled); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
+ { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Event_Set(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ p->_state = True;
+ int res1 = pthread_cond_broadcast(&p->_cond);
+ int res2 = pthread_mutex_unlock(&p->_mutex);
+ return (res2 ? res2 : res1);
+}
+
+WRes Event_Reset(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ p->_state = False;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Wait(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ while (p->_state == False)
+ {
+ // ETIMEDOUT
+ // ret =
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ // if (ret != 0) break;
+ }
+ if (p->_manual_reset == False)
+ {
+ p->_state = False;
+ }
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Close(CEvent *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ RINOK(pthread_mutex_init(&p->_mutex, NULL));
+ RINOK(pthread_cond_init(&p->_cond, NULL));
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ p->_created = 1;
+ return 0;
+}
+
+
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (Semaphore_IsCreated(p))
+ {
+ /*
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ */
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ // return EINVAL; // for debug
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ return 0;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
+
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
+{
+ UInt32 newCount;
+ int ret;
+
+ if (releaseCount < 1)
+ return EINVAL;
+
+ RINOK(pthread_mutex_lock(&p->_mutex));
+
+ newCount = p->_count + releaseCount;
+ if (newCount > p->_maxCount)
+ ret = ERROR_TOO_MANY_POSTS; // EINVAL;
+ else
+ {
+ p->_count = newCount;
+ ret = pthread_cond_broadcast(&p->_cond);
+ }
+ RINOK(pthread_mutex_unlock(&p->_mutex));
+ return ret;
+}
+
+WRes Semaphore_Wait(CSemaphore *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ while (p->_count < 1)
+ {
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ }
+ p->_count--;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Semaphore_Close(CSemaphore *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+ // Print("CriticalSection_Init");
+ if (!p)
+ return EINTR;
+ return pthread_mutex_init(&p->_mutex, NULL);
+}
+
+void CriticalSection_Enter(CCriticalSection *p)
+{
+ // Print("CriticalSection_Enter");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_lock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Leave(CCriticalSection *p)
+{
+ // Print("CriticalSection_Leave");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_unlock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Delete(CCriticalSection *p)
+{
+ // Print("CriticalSection_Delete");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_destroy(&p->_mutex);
+ }
+}
+
+LONG InterlockedIncrement(LONG volatile *addend)
+{
+ // Print("InterlockedIncrement");
+ #ifdef USE_HACK_UNSAFE_ATOMIC
+ LONG val = *addend + 1;
+ *addend = val;
+ return val;
+ #else
+ return __sync_add_and_fetch(addend, 1);
+ #endif
+}
+
+#endif // _WIN32
diff --git a/multiarc/src/formats/7z/C/Threads.h b/multiarc/src/formats/7z/C/Threads.h
index e53ace43..89ecb92b 100644..100755
--- a/multiarc/src/formats/7z/C/Threads.h
+++ b/multiarc/src/formats/7z/C/Threads.h
@@ -1,38 +1,139 @@
/* Threads.h -- multithreading library
-2017-06-18 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
+#else
+
+#if defined(__linux__)
+#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
+#ifndef _7ZIP_AFFINITY_DISABLE
+#define _7ZIP_AFFINITY_SUPPORTED
+// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")
+// #define _GNU_SOURCE
+#endif
+#endif
+#endif
+
+#include <pthread.h>
+
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
+#ifdef _WIN32
+
WRes HandlePtr_Close(HANDLE *h);
WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread;
-#define Thread_Construct(p) *(p) = NULL
+
+#define Thread_Construct(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p)
-#define Thread_Wait(p) Handle_WaitObject(*(p))
+// #define Thread_Wait(p) Handle_WaitObject(*(p))
-typedef
#ifdef UNDER_CE
- DWORD
+ // if (USE_THREADS_CreateThread is defined), we use _beginthreadex()
+ // if (USE_THREADS_CreateThread is not definned), we use CreateThread()
+ #define USE_THREADS_CreateThread
+#endif
+
+typedef
+ #ifdef USE_THREADS_CreateThread
+ DWORD
+ #else
+ unsigned
+ #endif
+ THREAD_FUNC_RET_TYPE;
+
+typedef DWORD_PTR CAffinityMask;
+typedef DWORD_PTR CCpuSet;
+
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }
+
+#else // _WIN32
+
+typedef struct _CThread
+{
+ pthread_t _tid;
+ int _created;
+} CThread;
+
+#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }
+#define Thread_WasCreated(p) ((p)->_created != 0)
+WRes Thread_Close(CThread *p);
+// #define Thread_Wait Thread_Wait_Close
+
+typedef void * THREAD_FUNC_RET_TYPE;
+
+typedef UInt64 CAffinityMask;
+
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+
+typedef cpu_set_t CCpuSet;
+#define CpuSet_Zero(p) CPU_ZERO(p)
+#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
+#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
+
#else
- unsigned
+
+typedef UInt64 CCpuSet;
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }
+#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
+
#endif
- THREAD_FUNC_RET_TYPE;
+
+
+#endif // _WIN32
+
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL
-#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+
+#if defined(_WIN32) && defined(__GNUC__)
+/* GCC compiler for x86 32-bit uses the rule:
+ the stack is 16-byte aligned before CALL instruction for function calling.
+ But only root function main() contains instructions that
+ set 16-byte alignment for stack pointer. And another functions
+ just keep alignment, if it was set in some parent function.
+
+ The problem:
+ if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),
+ the root function of thread doesn't set 16-byte alignment.
+ And stack frames in all child functions also will be unaligned in that case.
+
+ Here we set (force_align_arg_pointer) attribute for root function of new thread.
+ Do we need (force_align_arg_pointer) also for another systems? */
+
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))
+ // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions
+#else
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG
+#endif
+
+#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
+WRes Thread_Wait_Close(CThread *p);
+
+#ifdef _WIN32
+#define Thread_Create_With_CpuSet(p, func, param, cs) \
+ Thread_Create_With_Affinity(p, func, param, *cs)
+#else
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
+#endif
+
+
+#ifdef _WIN32
typedef HANDLE CEvent;
typedef CEvent CAutoResetEvent;
@@ -54,6 +155,7 @@ typedef HANDLE CSemaphore;
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
@@ -63,6 +165,68 @@ WRes CriticalSection_Init(CCriticalSection *p);
#define CriticalSection_Enter(p) EnterCriticalSection(p)
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
+
+#else // _WIN32
+
+typedef struct _CEvent
+{
+ int _created;
+ int _manual_reset;
+ int _state;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CEvent;
+
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+
+#define Event_Construct(p) (p)->_created = 0
+#define Event_IsCreated(p) ((p)->_created)
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes Event_Wait(CEvent *p);
+WRes Event_Close(CEvent *p);
+
+
+typedef struct _CSemaphore
+{
+ int _created;
+ UInt32 _count;
+ UInt32 _maxCount;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CSemaphore;
+
+#define Semaphore_Construct(p) (p)->_created = 0
+#define Semaphore_IsCreated(p) ((p)->_created)
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
+WRes Semaphore_Wait(CSemaphore *p);
+WRes Semaphore_Close(CSemaphore *p);
+
+
+typedef struct _CCriticalSection
+{
+ pthread_mutex_t _mutex;
+} CCriticalSection;
+
+WRes CriticalSection_Init(CCriticalSection *p);
+void CriticalSection_Delete(CCriticalSection *cs);
+void CriticalSection_Enter(CCriticalSection *cs);
+void CriticalSection_Leave(CCriticalSection *cs);
+
+LONG InterlockedIncrement(LONG volatile *addend);
+
+#endif // _WIN32
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/Xz.c b/multiarc/src/formats/7z/C/Xz.c
index d9f83df1..7c53b600 100644..100755
--- a/multiarc/src/formats/7z/C/Xz.c
+++ b/multiarc/src/formats/7z/C/Xz.c
@@ -1,5 +1,5 @@
/* Xz.c - Xz
-2017-05-12 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc)
unsigned XzFlags_GetCheckSize(CXzStreamFlags f)
{
unsigned t = XzFlags_GetCheckType(f);
- return (t == 0) ? 0 : (4 << ((t - 1) / 3));
+ return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3));
}
void XzCheck_Init(CXzCheck *p, unsigned mode)
diff --git a/multiarc/src/formats/7z/C/Xz.h b/multiarc/src/formats/7z/C/Xz.h
index 544ee18f..849b944b 100644..100755
--- a/multiarc/src/formats/7z/C/Xz.h
+++ b/multiarc/src/formats/7z/C/Xz.h
@@ -1,5 +1,5 @@
/* Xz.h - Xz interface
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#ifndef __XZ_H
#define __XZ_H
@@ -47,7 +47,7 @@ typedef struct
CXzFilter filters[XZ_NUM_FILTERS_MAX];
} CXzBlock;
-#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
+#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
#define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0)
#define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
@@ -277,7 +277,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
{
XzUnpacker_Init()
for()
+ {
XzUnpacker_Code();
+ }
+ XzUnpacker_IsStreamWasFinished()
}
Interface-2 : Direct output buffer:
@@ -288,7 +291,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
XzUnpacker_Init()
XzUnpacker_SetOutBufMode(); // to set output buffer and size
for()
+ {
XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
+ }
+ XzUnpacker_IsStreamWasFinished()
}
Interface-3 : Direct output buffer : One call full decoding
@@ -296,6 +302,7 @@ void XzUnpacker_Free(CXzUnpacker *p);
It uses Interface-2 internally.
{
XzUnpacker_CodeFull()
+ XzUnpacker_IsStreamWasFinished()
}
*/
@@ -309,8 +316,12 @@ Returns:
SZ_OK
status:
CODER_STATUS_NOT_FINISHED,
- CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams,
- call XzUnpacker_IsStreamWasFinished to check that current stream was finished
+ CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
+ 1) it needs more input data to finish current xz stream
+ 2) xz stream was finished successfully. But the decoder supports multiple
+ concatented xz streams. So it expects more input data for new xz streams.
+ Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
+
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_DATA - Data error
SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
@@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen,
ECoderFinishMode finishMode, ECoderStatus *status);
+/*
+If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
+after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
+*/
+
BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
/*
-XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes,
+XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
-These bytes can be some bytes after xz archive, or
+These bytes can be some data after xz archive, or
it can be start of new xz stream.
Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
@@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
-/* ---------- Multi Threading Decoding ---------- */
+
+
+
+/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
+
+/*
+ if (CXzDecMtProps::numThreads > 1), the decoder can try to use
+ Multi-Threading. The decoder analyses xz block header, and if
+ there are pack size and unpack size values stored in xz block header,
+ the decoder reads compressed data of block to internal buffers,
+ and then it can start parallel decoding, if there are another blocks.
+ The decoder can switch back to Single-Thread decoding after some conditions.
+
+ The sequence of calls for xz decoding with in/out Streams:
+ {
+ XzDecMt_Create()
+ XzDecMtProps_Init(XzDecMtProps) to set default values of properties
+ // then you can change some XzDecMtProps parameters with required values
+ // here you can set the number of threads and (memUseMax) - the maximum
+ Memory usage for multithreading decoding.
+ for()
+ {
+ XzDecMt_Decode() // one call per one file
+ }
+ XzDecMt_Destroy()
+ }
+*/
typedef struct
{
- size_t inBufSize_ST;
- size_t outStep_ST;
- BoolInt ignoreErrors;
+ size_t inBufSize_ST; // size of input buffer for Single-Thread decoding
+ size_t outStep_ST; // size of output buffer for Single-Thread decoding
+ BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
#ifndef _7ZIP_ST
- unsigned numThreads;
- size_t inBufSize_MT;
- size_t memUseMax;
+ unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
+ size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
+ size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding.
+ // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
#endif
} CXzDecMtProps;
@@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p);
typedef void * CXzDecMtHandle;
/*
- alloc : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc).
+ alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
allocMid : for big allocations, aligned allocation is better
*/
@@ -407,33 +450,46 @@ typedef struct
Byte NumStreams_Defined;
Byte NumBlocks_Defined;
- Byte DataAfterEnd;
+ Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream.
Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
- UInt64 InSize; // pack size processed
+ UInt64 InSize; // pack size processed. That value doesn't include the data after
+ // end of xz stream, if that data was not correct
UInt64 OutSize;
UInt64 NumStreams;
UInt64 NumBlocks;
- SRes DecodeRes;
- SRes ReadRes;
- SRes ProgressRes;
- SRes CombinedRes;
- SRes CombinedRes_Type;
+ SRes DecodeRes; // the error code of xz streams data decoding
+ SRes ReadRes; // error code from ISeqInStream:Read()
+ SRes ProgressRes; // error code from ICompressProgress:Progress()
+ SRes CombinedRes; // Combined result error code that shows main rusult
+ // = S_OK, if there is no error.
+ // but check also (DataAfterEnd) that can show additional minor errors.
+
+ SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream
+ // = SZ_ERROR_PROGRESS, if error from ICompressProgress
+ // = SZ_ERROR_WRITE, if error from ISeqOutStream
+ // = SZ_ERROR_* codes for decoding
} CXzStatInfo;
void XzStatInfo_Clear(CXzStatInfo *p);
/*
+
XzDecMt_Decode()
-SRes:
- SZ_OK - OK
+SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
+
+ SZ_OK - no error
+ check also output value in (stat->DataAfterEnd)
+ that can show additional possible error
+
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_NO_ARCHIVE - is not xz archive
SZ_ERROR_ARCHIVE - Headers error
SZ_ERROR_DATA - Data Error
+ SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
SZ_ERROR_CRC - CRC Error
SZ_ERROR_INPUT_EOF - it needs more input data
SZ_ERROR_WRITE - ISeqOutStream error
@@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
// Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
// const Byte *inData, size_t inDataSize,
- CXzStatInfo *stat,
- int *isMT, // 0 means that ST (Single-Thread) version was used
+ CXzStatInfo *stat, // out: decoding results and statistics
+ int *isMT, // out: 0 means that ST (Single-Thread) version was used
+ // 1 means that MT (Multi-Thread) version was used
ICompressProgress *progress);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/XzCrc64.c b/multiarc/src/formats/7z/C/XzCrc64.c
index b6d02cbe..b6d02cbe 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64.c
+++ b/multiarc/src/formats/7z/C/XzCrc64.c
diff --git a/multiarc/src/formats/7z/C/XzCrc64.h b/multiarc/src/formats/7z/C/XzCrc64.h
index 08dbc330..08dbc330 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64.h
+++ b/multiarc/src/formats/7z/C/XzCrc64.h
diff --git a/multiarc/src/formats/7z/C/XzCrc64Opt.c b/multiarc/src/formats/7z/C/XzCrc64Opt.c
index b2852de4..93a9ffff 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64Opt.c
+++ b/multiarc/src/formats/7z/C/XzCrc64Opt.c
@@ -1,5 +1,5 @@
/* XzCrc64Opt.c -- CRC64 calculation
-2017-06-30 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- UInt32 d = (UInt32)v ^ *(const UInt32 *)p;
+ UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
v = (v >> 32)
^ (table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
+UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p;
+ UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
v = (v << 32)
^ (table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
diff --git a/multiarc/src/formats/7z/C/XzDec.c b/multiarc/src/formats/7z/C/XzDec.c
index 395e83f6..3f96a37f 100644..100755
--- a/multiarc/src/formats/7z/C/XzDec.c
+++ b/multiarc/src/formats/7z/C/XzDec.c
@@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-09-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp,
}
+SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc);
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc)
{
CBraState *decoder;
@@ -772,7 +773,8 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
- if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
+ if (s == 0) return SZ_ERROR_ARCHIVE; \
+ pos += s; }
static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
@@ -1038,7 +1040,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
(p->outBuf ? NULL : dest), &destLen2, destFinish,
src, &srcLen2, srcFinished2,
finishMode2);
-
+
*status = p->decoder.status;
XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2);
if (!p->outBuf)
@@ -1275,9 +1277,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
else
{
+ const Byte *ptr = p->buf;
p->state = XZ_STATE_STREAM_FOOTER;
p->pos = 0;
- if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf))
+ if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr))
return SZ_ERROR_CRC;
}
break;
@@ -1456,7 +1459,6 @@ typedef struct
ISeqInStream *inStream;
ISeqOutStream *outStream;
ICompressProgress *progress;
- // CXzStatInfo *stat;
BoolInt finishMode;
BoolInt outSize_Defined;
@@ -1492,8 +1494,9 @@ typedef struct
UInt64 numBlocks;
// UInt64 numBadBlocks;
- SRes mainErrorCode;
-
+ SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage
+ // it can be = SZ_ERROR_INPUT_EOF
+ // it can be = SZ_ERROR_DATA, in some another cases
BoolInt isBlockHeaderState_Parse;
BoolInt isBlockHeaderState_Write;
UInt64 outProcessed_Parse;
@@ -1877,7 +1880,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex)
{
// if (res == SZ_ERROR_MEM) return res;
if (me->props.ignoreErrors && res != SZ_ERROR_MEM)
- return S_OK;
+ return SZ_OK;
return res;
}
}
@@ -1898,15 +1901,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
*outCodePos = coder->outCodeSize;
*stop = True;
+ if (srcSize > coder->inPreSize - coder->inCodeSize)
+ return SZ_ERROR_FAIL;
+
if (coder->inCodeSize < coder->inPreHeaderSize)
{
- UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize;
- size_t step = srcSize;
- if (step > rem)
- step = (size_t)rem;
+ size_t step = coder->inPreHeaderSize - coder->inCodeSize;
+ if (step > srcSize)
+ step = srcSize;
src += step;
srcSize -= step;
coder->inCodeSize += step;
+ *inCodePos = coder->inCodeSize;
if (coder->inCodeSize < coder->inPreHeaderSize)
{
*stop = False;
@@ -1956,7 +1962,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
{
*inCodePos = coder->inPreSize;
*outCodePos = coder->outPreSize;
- return S_OK;
+ return SZ_OK;
}
return coder->codeRes;
}
@@ -1966,7 +1972,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode)
@@ -1985,7 +1991,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (!coder->dec.headerParsedOk || !coder->outBuf)
{
if (me->finishedDecoderIndex < 0)
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2077,7 +2083,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->codeRes != SZ_OK)
if (!me->props.ignoreErrors)
{
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return res;
}
@@ -2086,7 +2092,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->inPreSize != coder->inCodeSize
|| coder->blockPackTotal != coder->inCodeSize)
{
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2125,22 +2131,41 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
return SZ_OK;
}
+ /*
+ We have processed all xz-blocks of stream,
+ And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where
+ (src) is a pointer to xz-Index structure.
+ We finish reading of current xz-Stream, including Zero padding after xz-Stream.
+ We exit, if we reach extra byte (first byte of new-Stream or another data).
+ But we don't update input stream pointer for that new extra byte.
+ If extra byte is not correct first byte of xz-signature,
+ we have SZ_ERROR_NO_ARCHIVE error here.
+ */
+
res = XzUnpacker_Code(dec,
NULL, &outSizeCur,
src, &srcProcessed,
me->mtc.readWasFinished, // srcFinished
CODER_FINISH_END, // CODER_FINISH_ANY,
&status);
+
+ // res = SZ_ERROR_ARCHIVE; // for failure test
me->status = status;
me->codeRes = res;
+ if (isCross)
+ me->mtc.crossStart += srcProcessed;
+
me->mtc.inProcessed += srcProcessed;
me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
+ srcSize -= srcProcessed;
+ src += srcProcessed;
+
if (res != SZ_OK)
{
- return S_OK;
+ return SZ_OK;
// return res;
}
@@ -2149,20 +2174,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
*needContinue = True;
me->isBlockHeaderState_Parse = False;
me->isBlockHeaderState_Write = False;
+
+ if (!isCross)
{
Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc);
if (!crossBuf)
return SZ_ERROR_MEM;
- memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed);
+ if (srcSize != 0)
+ memcpy(crossBuf, src, srcSize);
+ me->mtc.crossStart = 0;
+ me->mtc.crossEnd = srcSize;
}
- me->mtc.crossStart = 0;
- me->mtc.crossEnd = srcSize - srcProcessed;
+
+ PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd);
+
return SZ_OK;
}
- if (status != CODER_STATUS_NEEDS_MORE_INPUT)
+ if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0)
{
- return E_FAIL;
+ return SZ_ERROR_FAIL;
}
if (me->mtc.readWasFinished)
@@ -2174,7 +2205,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
size_t inPos;
size_t inLim;
- const Byte *inData;
+ // const Byte *inData;
UInt64 inProgressPrev = me->mtc.inProcessed;
// XzDecMt_Prepare_InBuf_ST(p);
@@ -2184,9 +2215,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
inPos = 0;
inLim = 0;
- // outProcessed = 0;
- inData = crossBuf;
+ // inData = crossBuf;
for (;;)
{
@@ -2201,7 +2231,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
inPos = 0;
inLim = me->mtc.inBufSize;
- me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim);
+ me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim);
me->mtc.readProcessed += inLim;
if (inLim == 0 || me->mtc.readRes != SZ_OK)
me->mtc.readWasFinished = True;
@@ -2213,7 +2243,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
res = XzUnpacker_Code(dec,
NULL, &outProcessed,
- inData + inPos, &inProcessed,
+ crossBuf + inPos, &inProcessed,
(inProcessed == 0), // srcFinished
CODER_FINISH_END, &status);
@@ -2225,7 +2255,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (res != SZ_OK)
{
- return S_OK;
+ return SZ_OK;
// return res;
}
@@ -2240,7 +2270,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
}
if (status != CODER_STATUS_NEEDS_MORE_INPUT)
- return E_FAIL;
+ return SZ_ERROR_FAIL;
if (me->mtc.progress)
{
@@ -2276,13 +2306,6 @@ void XzStatInfo_Clear(CXzStatInfo *p)
p->NumStreams_Defined = False;
p->NumBlocks_Defined = False;
- // p->IsArc = False;
- // p->UnexpectedEnd = False;
- // p->Unsupported = False;
- // p->HeadersError = False;
- // p->DataError = False;
- // p->CrcError = False;
-
p->DataAfterEnd = False;
p->DecodingTruncated = False;
@@ -2296,6 +2319,16 @@ void XzStatInfo_Clear(CXzStatInfo *p)
+/*
+ XzDecMt_Decode_ST() can return SZ_OK or the following errors
+ - SZ_ERROR_MEM for memory allocation error
+ - error from XzUnpacker_Code() function
+ - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case
+ - ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS.
+ But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors.
+ ISeqInStream::Read() result is set to p->readRes.
+ also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
+*/
static SRes XzDecMt_Decode_ST(CXzDecMt *p
#ifndef _7ZIP_ST
@@ -2384,7 +2417,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
- p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+ p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim);
p->readProcessed += inLim;
if (inLim == 0 || p->readRes != SZ_OK)
p->readWasFinished = True;
@@ -2426,8 +2459,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (finished || outProcessed >= outSize)
if (outPos != 0)
{
- size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
- p->outProcessed += written;
+ const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
+ // p->outProcessed += written; // 21.01: BUG fixed
if (written != outPos)
{
stat->CombinedRes_Type = SZ_ERROR_WRITE;
@@ -2438,9 +2471,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (p->progress && res == SZ_OK)
{
- UInt64 inDelta = p->inProcessed - inPrev;
- UInt64 outDelta = p->outProcessed - outPrev;
- if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
+ if (p->inProcessed - inPrev >= (1 << 22) ||
+ p->outProcessed - outPrev >= (1 << 22))
{
res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed);
if (res != SZ_OK)
@@ -2455,14 +2487,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
}
if (finished)
- return res;
+ {
+ // p->codeRes is preliminary error from XzUnpacker_Code.
+ // and it can be corrected later as final result
+ // so we return SZ_OK here instead of (res);
+ return SZ_OK;
+ // return res;
+ }
}
}
-static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
+
+
+/*
+XzStatInfo_SetStat() transforms
+ CXzUnpacker return code and status to combined CXzStatInfo results.
+ it can convert SZ_OK to SZ_ERROR_INPUT_EOF
+ it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1)
+*/
+
+static void XzStatInfo_SetStat(const CXzUnpacker *dec,
int finishMode,
- UInt64 readProcessed, UInt64 inProcessed,
- SRes res, ECoderStatus status,
+ // UInt64 readProcessed,
+ UInt64 inProcessed,
+ SRes res, // it's result from CXzUnpacker unpacker
+ ECoderStatus status,
BoolInt decodingTruncated,
CXzStatInfo *stat)
{
@@ -2484,12 +2533,20 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
if (status == CODER_STATUS_NEEDS_MORE_INPUT)
{
// CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams
+ // any extra data is part of correct data
extraSize = 0;
+ // if xz stream was not finished, then we need more data
if (!XzUnpacker_IsStreamWasFinished(dec))
res = SZ_ERROR_INPUT_EOF;
}
- else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED)
- res = SZ_ERROR_DATA;
+ else
+ {
+ // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding
+ // so he we have (status == CODER_STATUS_NOT_FINISHED)
+ // if (status != CODER_STATUS_FINISHED_WITH_MARK)
+ if (!decodingTruncated || finishMode)
+ res = SZ_ERROR_DATA;
+ }
}
else if (res == SZ_ERROR_NO_ARCHIVE)
{
@@ -2497,24 +2554,29 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
SZ_ERROR_NO_ARCHIVE is possible for 2 states:
XZ_STATE_STREAM_HEADER - if bad signature or bad CRC
XZ_STATE_STREAM_PADDING - if non-zero padding data
- extraSize / inProcessed don't include "bad" byte
+ extraSize and inProcessed don't include "bad" byte
*/
- if (inProcessed != extraSize) // if good streams before error
- if (extraSize != 0 || readProcessed != inProcessed)
+ // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error
+ if (inProcessed != extraSize) // if there were good xz streams before error
+ {
+ // if (extraSize != 0 || readProcessed != inProcessed)
{
+ // he we suppose that all xz streams were finsihed OK, and we have
+ // some extra data after all streams
stat->DataAfterEnd = True;
- // there is some good xz stream before. So we set SZ_OK
res = SZ_OK;
}
+ }
}
- stat->DecodeRes = res;
+ if (stat->DecodeRes == SZ_OK)
+ stat->DecodeRes = res;
stat->InSize -= extraSize;
- return res;
}
+
SRes XzDecMt_Decode(CXzDecMtHandle pp,
const CXzDecMtProps *props,
const UInt64 *outDataSize, int finishMode,
@@ -2557,8 +2619,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
p->inProcessed = 0;
p->readProcessed = 0;
p->readWasFinished = False;
+ p->readRes = SZ_OK;
- p->codeRes = 0;
+ p->codeRes = SZ_OK;
p->status = CODER_STATUS_NOT_SPECIFIED;
XzUnpacker_Init(&p->dec);
@@ -2589,8 +2652,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
if (p->props.numThreads > 1)
{
- IMtDecCallback vt;
-
+ IMtDecCallback2 vt;
+ BoolInt needContinue;
+ SRes res;
// we just free ST buffers here
// but we still keep state variables, that was set in XzUnpacker_Init()
XzDecMt_FreeSt(p);
@@ -2628,45 +2692,45 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
vt.Code = XzDecMt_Callback_Code;
vt.Write = XzDecMt_Callback_Write;
- {
- BoolInt needContinue;
-
- SRes res = MtDec_Code(&p->mtc);
-
- stat->InSize = p->mtc.inProcessed;
- p->inProcessed = p->mtc.inProcessed;
- p->readRes = p->mtc.readRes;
- p->readWasFinished = p->mtc.readWasFinished;
- p->readProcessed = p->mtc.readProcessed;
+ res = MtDec_Code(&p->mtc);
- tMode = True;
- needContinue = False;
- if (res == SZ_OK)
+ stat->InSize = p->mtc.inProcessed;
+
+ p->inProcessed = p->mtc.inProcessed;
+ p->readRes = p->mtc.readRes;
+ p->readWasFinished = p->mtc.readWasFinished;
+ p->readProcessed = p->mtc.readProcessed;
+
+ tMode = True;
+ needContinue = False;
+
+ if (res == SZ_OK)
+ {
+ if (p->mtc.mtProgress.res != SZ_OK)
{
- if (p->mtc.mtProgress.res != SZ_OK)
- {
- res = p->mtc.mtProgress.res;
- stat->ProgressRes = res;
- stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
- }
- else
- needContinue = p->mtc.needContinue;
+ res = p->mtc.mtProgress.res;
+ stat->ProgressRes = res;
+ stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
}
-
- if (!needContinue)
+ else
+ needContinue = p->mtc.needContinue;
+ }
+
+ if (!needContinue)
+ {
{
SRes codeRes;
BoolInt truncated = False;
ECoderStatus status;
- CXzUnpacker *dec;
+ const CXzUnpacker *dec;
stat->OutSize = p->outProcessed;
if (p->finishedDecoderIndex >= 0)
{
- CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
+ const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
codeRes = coder->codeRes;
dec = &coder->dec;
status = coder->status;
@@ -2679,41 +2743,46 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
truncated = p->parsing_Truncated;
}
else
- return E_FAIL;
+ return SZ_ERROR_FAIL;
+
+ if (p->mainErrorCode != SZ_OK)
+ stat->DecodeRes = p->mainErrorCode;
XzStatInfo_SetStat(dec, p->finishMode,
- p->mtc.readProcessed, p->mtc.inProcessed,
+ // p->mtc.readProcessed,
+ p->mtc.inProcessed,
codeRes, status,
truncated,
stat);
+ }
- if (res == SZ_OK)
+ if (res == SZ_OK)
+ {
+ stat->ReadRes = p->mtc.readRes;
+
+ if (p->writeRes != SZ_OK)
{
- if (p->writeRes != SZ_OK)
- {
- res = p->writeRes;
- stat->CombinedRes_Type = SZ_ERROR_WRITE;
- }
- else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed)
- {
- res = p->mtc.readRes;
- stat->ReadRes = res;
- stat->CombinedRes_Type = SZ_ERROR_READ;
- }
- else if (p->mainErrorCode != SZ_OK)
- {
- res = p->mainErrorCode;
- }
+ res = p->writeRes;
+ stat->CombinedRes_Type = SZ_ERROR_WRITE;
}
-
- stat->CombinedRes = res;
- if (stat->CombinedRes_Type == SZ_OK)
- stat->CombinedRes_Type = res;
- return res;
+ else if (p->mtc.readRes != SZ_OK
+ // && p->mtc.inProcessed == p->mtc.readProcessed
+ && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
+ {
+ res = p->mtc.readRes;
+ stat->CombinedRes_Type = SZ_ERROR_READ;
+ }
+ else if (stat->DecodeRes != SZ_OK)
+ res = stat->DecodeRes;
}
-
- PRF_STR("----- decoding ST -----");
+
+ stat->CombinedRes = res;
+ if (stat->CombinedRes_Type == SZ_OK)
+ stat->CombinedRes_Type = res;
+ return res;
}
+
+ PRF_STR("----- decoding ST -----");
}
#endif
@@ -2729,33 +2798,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
, stat
);
+ #ifndef _7ZIP_ST
+ // we must set error code from MT decoding at first
+ if (p->mainErrorCode != SZ_OK)
+ stat->DecodeRes = p->mainErrorCode;
+ #endif
+
XzStatInfo_SetStat(&p->dec,
p->finishMode,
- p->readProcessed, p->inProcessed,
+ // p->readProcessed,
+ p->inProcessed,
p->codeRes, p->status,
False, // truncated
stat);
+ stat->ReadRes = p->readRes;
+
if (res == SZ_OK)
{
- /*
- if (p->writeRes != SZ_OK)
- {
- res = p->writeRes;
- stat->CombinedRes_Type = SZ_ERROR_WRITE;
- }
- else
- */
- if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed)
+ if (p->readRes != SZ_OK
+ // && p->inProcessed == p->readProcessed
+ && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
{
+ // we set read error as combined error, only if that error was the reason
+ // of decoding problem
res = p->readRes;
- stat->ReadRes = res;
stat->CombinedRes_Type = SZ_ERROR_READ;
}
- #ifndef _7ZIP_ST
- else if (p->mainErrorCode != SZ_OK)
- res = p->mainErrorCode;
- #endif
+ else if (stat->DecodeRes != SZ_OK)
+ res = stat->DecodeRes;
}
stat->CombinedRes = res;
diff --git a/multiarc/src/formats/7z/C/XzEnc.c b/multiarc/src/formats/7z/C/XzEnc.c
index d0a8b448..be174ccc 100644..100755
--- a/multiarc/src/formats/7z/C/XzEnc.c
+++ b/multiarc/src/formats/7z/C/XzEnc.c
@@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -36,7 +36,7 @@
#define XzBlock_ClearFlags(p) (p)->flags = 0;
-#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1);
+#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1));
#define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE;
#define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE;
@@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
- t2r = (unsigned)numBlocks;
+ t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -751,7 +751,8 @@ static SRes Xz_CompressBlock(
}
else if (fp->ipDefined)
{
- SetUi32(filter->props, fp->ip);
+ Byte *ptr = filter->props;
+ SetUi32(ptr, fp->ip);
filter->propsSize = 4;
}
}
@@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
p->outBufSize = destBlockSize;
}
- p->mtCoder.numThreadsMax = props->numBlockThreads_Max;
+ p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
RINOK(MtCoder_Code(&p->mtCoder));
diff --git a/multiarc/src/formats/7z/C/XzEnc.h b/multiarc/src/formats/7z/C/XzEnc.h
index 0c29e7e1..0c29e7e1 100644..100755
--- a/multiarc/src/formats/7z/C/XzEnc.h
+++ b/multiarc/src/formats/7z/C/XzEnc.h
diff --git a/multiarc/src/formats/7z/C/XzIn.c b/multiarc/src/formats/7z/C/XzIn.c
index ff48e2dd..84f868ec 100644..100755
--- a/multiarc/src/formats/7z/C/XzIn.c
+++ b/multiarc/src/formats/7z/C/XzIn.c
@@ -1,5 +1,5 @@
/* XzIn.c - Xz input
-2018-07-04 : Igor Pavlov : Public domain */
+2021-09-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -26,7 +26,8 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream)
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
- if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
+ if (s == 0) return SZ_ERROR_ARCHIVE; \
+ pos += s; }
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
{
@@ -152,7 +153,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
{
UInt64 indexSize;
Byte buf[XZ_STREAM_FOOTER_SIZE];
- UInt64 pos = *startOffset;
+ UInt64 pos = (UInt64)*startOffset;
if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
@@ -202,8 +203,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
if (!XzFlags_IsSupported(p->flags))
return SZ_ERROR_UNSUPPORTED;
- if (GetUi32(buf) != CrcCalc(buf + 4, 6))
- return SZ_ERROR_ARCHIVE;
+ {
+ /* to eliminate GCC 6.3 warning:
+ dereferencing type-punned pointer will break strict-aliasing rules */
+ const Byte *buf_ptr = buf;
+ if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6))
+ return SZ_ERROR_ARCHIVE;
+ }
indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2;
@@ -222,7 +228,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
return SZ_ERROR_ARCHIVE;
pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
RINOK(LookInStream_SeekTo(stream, pos));
- *startOffset = pos;
+ *startOffset = (Int64)pos;
}
{
CXzStreamFlags headerFlags;
@@ -294,12 +300,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
SRes res;
Xz_Construct(&st);
res = Xz_ReadBackward(&st, stream, startOffset, alloc);
- st.startOffset = *startOffset;
+ st.startOffset = (UInt64)*startOffset;
RINOK(res);
if (p->num == p->numAllocated)
{
- size_t newNum = p->num + p->num / 4 + 1;
- Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
+ const size_t newNum = p->num + p->num / 4 + 1;
+ void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
if (!data)
return SZ_ERROR_MEM;
p->numAllocated = newNum;
@@ -311,8 +317,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
p->streams[p->num++] = st;
if (*startOffset == 0)
break;
- RINOK(LookInStream_SeekTo(stream, *startOffset));
- if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK)
+ RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset));
+ if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS;
}
return SZ_OK;