Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/elfmz/far2l.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIvan Shatsky <root@free-tier-instance.us-west1-b.c.neural-map-305409.internal>2022-11-09 18:06:52 +0300
committerIvan Shatsky <root@free-tier-instance.us-west1-b.c.neural-map-305409.internal>2022-11-09 18:06:52 +0300
commit7aa83aab0760925946acfc9b50699c5c7ec7c6c4 (patch)
tree6f3286489bcdda84717e50c2e2dde9a181f33bc6
parent89d2196de63acaaec18d72518ebcea5e5392f8f0 (diff)
parent5bc4cca57baeaa694fd31f31401a56839f6730bc (diff)
Merge remote-tracking branch 'refs/remotes/origin/master'
-rw-r--r--HACKING.md70
-rw-r--r--README.md74
-rw-r--r--WinPort/WinCompat.h1
-rw-r--r--WinPort/src/APIFiles.cpp3
-rw-r--r--WinPort/src/Backend/TTY/TTYOutput.cpp28
-rw-r--r--WinPort/src/Backend/WX/Paint.cpp28
-rw-r--r--WinPort/src/Backend/WX/Paint.h3
-rw-r--r--colorer/configs/base/hrc/base/c-unix.ent.hrc682
-rw-r--r--colorer/configs/base/hrc/base/c.hrc5
-rw-r--r--colorer/configs/base/hrc/base/cpp.hrc11
-rw-r--r--colorer/configs/base/hrd/catalog-rgb.xml9
-rw-r--r--colorer/configs/base/hrd/reg.addons/conemu/ansi.reg19
-rw-r--r--colorer/configs/base/hrd/reg.addons/conemu/mirice.reg19
-rw-r--r--colorer/configs/base/hrd/reg.addons/conemu/mirror.reg19
-rw-r--r--colorer/configs/base/hrd/reg.addons/console/ansi.reg19
-rw-r--r--colorer/configs/base/hrd/reg.addons/console/mirice.reg19
-rw-r--r--colorer/configs/base/hrd/reg.addons/console/mirror.reg19
-rw-r--r--colorer/configs/base/hrd/rgb/blue.hrd4
-rw-r--r--colorer/configs/base/hrd/rgb/default.hrd90
-rw-r--r--colorer/configs/plug/colorere.hlf8
-rw-r--r--colorer/configs/plug/colorere.lng2
-rw-r--r--colorer/configs/plug/colorerr.hlf6
-rw-r--r--colorer/configs/plug/colorerr.lng2
-rw-r--r--colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.cpp9
-rw-r--r--colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.h4
-rw-r--r--colorer/src/Colorer-library/src/colorer/parsers/HRCParserImpl.cpp2
-rw-r--r--colorer/src/pcolorer2/FarEditor.cpp163
-rw-r--r--colorer/src/pcolorer2/FarEditorSet.cpp18
-rw-r--r--far2l/far2sdk/farplug-mb.h17
-rw-r--r--far2l/far2sdk/farplug-wide.h23
-rw-r--r--far2l/src/cfg/config.cpp3
-rw-r--r--far2l/src/console/AnsiEsc.cpp12
-rw-r--r--far2l/src/console/AnsiEsc.hpp1
-rw-r--r--far2l/src/console/keyboard.cpp2
-rw-r--r--far2l/src/console/scrbuf.cpp8
-rw-r--r--far2l/src/console/scrbuf.hpp8
-rw-r--r--far2l/src/copy.cpp9
-rw-r--r--far2l/src/edit.cpp92
-rw-r--r--far2l/src/edit.hpp13
-rw-r--r--far2l/src/editor.cpp18
-rw-r--r--far2l/src/plug/wrap.cpp2
-rw-r--r--far2l/src/setcolor.cpp55
-rw-r--r--multiarc/CMakeLists.txt4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7z.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zAlloc.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zAlloc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zArcIn.c44
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zBuf2.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrc.c196
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zCrcOpt.c16
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zDec.c47
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zFile.c252
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zFile.h14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zStream.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zTypes.h128
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zVersion.h10
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/7zVersion.rc0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Aes.c143
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Aes.h26
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/AesOpt.c796
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Alloc.c40
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Alloc.h11
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2.c10
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bcj2Enc.c8
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Blake2.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Blake2s.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra.c14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Bra86.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BraIA64.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BwtSort.c6
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/BwtSort.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Compiler.h12
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/CpuArch.c280
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/CpuArch.h169
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Delta.c167
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Delta.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/DllSecur.c16
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/DllSecur.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/HuffEnc.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/HuffEnc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFind.c1317
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFind.h41
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFindMt.c1303
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzFindMt.h46
-rwxr-xr-xmultiarc/src/formats/7z/C/LzFindOpt.c578
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzHash.h67
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Dec.c5
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Dec.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2DecMt.c36
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2DecMt.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Enc.c14
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma2Enc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86Dec.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Lzma86Enc.c2
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaDec.c414
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaDec.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaEnc.c448
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaEnc.h4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaLib.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/LzmaLib.h25
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtCoder.c22
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtCoder.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtDec.c65
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/MtDec.h9
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd.h144
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7.c890
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7.h175
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7Dec.c330
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd7Enc.c318
-rwxr-xr-xmultiarc/src/formats/7z/C/Ppmd7aDec.c279
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8.c1116
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8.h128
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8Dec.c276
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Ppmd8Enc.c291
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Precomp.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/RotateDefs.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha1.c623
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha1.h56
-rwxr-xr-xmultiarc/src/formats/7z/C/Sha1Opt.c373
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha256.c452
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sha256.h62
-rwxr-xr-xmultiarc/src/formats/7z/C/Sha256Opt.c373
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sort.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Sort.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Threads.c471
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Threads.h182
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Xz.c4
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/Xz.h107
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64.c0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzCrc64Opt.c8
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzDec.c317
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzEnc.c11
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzEnc.h0
-rwxr-xr-x[-rw-r--r--]multiarc/src/formats/7z/C/XzIn.c28
141 files changed, 11870 insertions, 3576 deletions
diff --git a/HACKING.md b/HACKING.md
new file mode 100644
index 00000000..b69d2031
--- /dev/null
+++ b/HACKING.md
@@ -0,0 +1,70 @@
+I implemented/borrowed from WINE some commonly used WinAPI functions. They are all declared in WinPort/WinPort.h and corresponding defines can be found in WinPort/WinCompat.h (both are included by WinPort/windows.h). Note that this stuff may not be 1-to-1 to corresponding Win32 functionality also doesn't provide full-UNIX functionality, but it simplifies porting and can be considered as temporary scaffold.
+
+However, only the main executable is linked statically to WinPort, although it also _exports_ WinPort functionality, so plugins use it without the neccessity to bring their own copies of this code. This is the reason that each plugin's binary should not statically link to WinPort.
+
+While FAR internally is UTF16 (because WinPort contains UTF16-related stuff), native Linux wchar_t size is 4 bytes (rather than 2 bytes) so potentially Linux FAR may be fully UTF32-capable console interaction in the future, but while it uses Win32-style UTF16 functions it does not. However, programmers need to be aware that wchar_t is not 2 bytes long anymore.
+
+Inspect all printf format strings: unlike Windows, in Linux both wide and multibyte printf-like functions have the same multibyte and wide specifiers. This means that %s is always multibyte while %ls is always wide. So, any %s used in wide-printf-s or %ws used in any printf should be replaced with %ls.
+
+Update from 27aug: now it's possible by defining WINPORT_DIRECT to avoid renaming used Windows API and also to avoid changing format strings as swprintf will be intercepted by a compatibility wrapper.
+Update from 03/11/22: far2l's console emulator capable to correctly render full-width and combining characters as well as 24 bit colors. This caused following deviation of console-simulation functions behavior comparing to original Win32 API counterparts:
+ * CHAR_INFO's Char::UnicodeChar field extended to 64 bit length to be able to associate sequence of multiple WCHARs with single cell.
+ * Writing to console full-width character causes two cells to be used: first will get given character code in UnicodeChar field but next one will have UnicodeChar set to zero.
+ * Writing combined characters - normal character followed by set of diactrical marks - will make UnicodeChar field to contain so-called 'composite' character code that represents sequence of character codes registered with WINPORT(CompositeCharRegister). Actual sequence of WCHARs can be obtained by WINPORT(CompositeCharLookup). There is macro CI_USING_COMPOSITE_CHAR that allows to detect if given CHAR_INFO contains composite character code or normal WCHAR.
+ * Both above transformations happen automatically _only_ if using WriteConsole API. If one uses WriteConsoleOutput - then its up to caller to perform that transformations. Failing to do so will cause incorrect rendering of full-width or diactrical characters.
+ * CHAR_INFO's and CONSOLE_SCREEN_BUFFER_INFO's Attributes fields extended to 64 bit to be able to hold 24 bit RGB colors in higher bytes. Use macroses GET_RGB_FORE/GET_RGB_BACK/SET_RGB_FORE/SET_RGB_BACK/SET_RGB_BOTH to access that colors. Note that such colors will be used only if FOREGROUND_TRUECOLOR/BACKGROUND_TRUECOLOR attribute is set. Old attributes define colors from usual 16-elements palette used to render if ..._TRUECOLOR is not set or if backend's target doesn't support more than 16 colors.
+
+## Plugin API
+
+Plugins API based on FAR Manager v2 plus following changes:
+
+### Added following entries to FarStandardFunctions:
+
+* `int Execute(const wchar_t *CmdStr, unsigned int ExecFlags);`
+...where ExecFlags - combination of values of EXECUTEFLAGS.
+Executes given command line, if EF_HIDEOUT and EF_NOWAIT are not specified then command will be executed on far2l virtual terminal.
+
+* `int ExecuteLibrary(const wchar_t *Library, const wchar_t *Symbol, const wchar_t *CmdStr, unsigned int ExecFlags)`
+Executes given shared library symbol in separate process (process creation behaviour is the same as for Execute).
+symbol function must be defined as: `int 'Symbol'(int argc, char *argv[])`
+
+* `void DisplayNotification(const wchar_t *action, const wchar_t *object);`
+Shows (depending on settings - always or if far2l in background) system shell-wide notification with given title and text.
+
+* `int DispatchInterThreadCalls();`
+far2l supports calling APIs from different threads by marshalling API calls from non-main threads into main one and dispatching them on main thread at certain known-safe points inside of dialog processing loops. DispatchInterThreadCalls() allows plugin to explicitly dispatch such calls and plugin must use it periodically in case it blocks main thread with some non-UI activity that may wait for other threads.
+
+* `void BackgroundTask(const wchar_t *Info, BOOL Started);`
+If plugin implements tasks running in background it may invoke this function to indicate about pending task in left-top corner.
+ * Info is a short description of task or just its owner and must be same string when invoked with Started TRUE or FALSE.
+
+* `size_t StrCellsCount(const wchar_t *Str, size_t CharsCount);`
+Returns count of console cells which will be used to display given string of CharsCount characters.
+
+* `size_t StrSizeOfCells(const wchar_t *Str, size_t CharsCount, size_t *CellsCount, BOOL RoundUp);`
+Returns count of characters which will be used to fill up to CellsCount cells from given string of CharsCount characters.
+RoundUp argument tells what to do with full-width characters that crossed by CellsCount.
+On return CellsCount contains cells count that will be filled by returned characters count, that:
+ * Can be smaller than initial value if string has too few characters to fill all CellsCount cells or if RoundUp was set to FALSE and last character would then overflow wanted amount.
+ * Can be larger by one than initial value if RoundUp was set to TRUE and last full-width character crossed initial value specified in *CellsCount.
+
+* `TruncStr and TruncPathStr`
+This two functions not added but changed to use console cells count as string limiting factor.
+
+
+### Added following commands into FILE_CONTROL_COMMANDS:
+* `FCTL_GETPANELPLUGINHANDLE`
+Can be used to interract with plugin that renders other panel.
+`hPlugin` can be set to `PANEL_ACTIVE` or `PANEL_PASSIVE`.
+`Param1` ignored.
+`Param2` points to value of type `HANDLE`, call sets that value to handle of plugin that renders specified panel or `INVALID_HANDLE_VALUE`.
+
+### Added following plugin-exported functions:
+* `int MayExitFARW();`
+far2l asks plugin if it can exit now. If plugin has some background tasks pending it may block exiting of far2l, however it highly recommended to give user choice using UI prompt.
+
+### Added following dialog messages:
+* `DM_GETCOLOR` - retrieves current color attributes of selected dialog item
+* `DM_SETCOLOR` - changes current color attributes of selected dialog item
+* `ECTL_ADDTRUECOLOR` - applies coloring to editor like ECTL_ADDCOLOR does but allows to specify 24 RGB color using EditorTrueColor structure.
+* `ECTL_GETTRUECOLOR` - retrieves coloring of editor like ECTL_GETCOLOR does but gets 24 RGB color using EditorTrueColor structure.
diff --git a/README.md b/README.md
index 055a1f13..eb7ab662 100644
--- a/README.md
+++ b/README.md
@@ -198,78 +198,8 @@ You can import the project into your favourite IDE like QtCreator, CodeLite, or
* Similar fork of Kitty: https://github.com/mihmig/KiTTY
* Tool to import color schemes from windows FAR manager 2 .reg format: https://github.com/unxed/far2l-deb/blob/master/far2l_import.pl
-## Notes on porting
-
-I implemented/borrowed from WINE some commonly used WinAPI functions. They are all declared in WinPort/WinPort.h and corresponding defines can be found in WinPort/WinCompat.h (both are included by WinPort/windows.h). Note that this stuff may not be 1-to-1 to corresponding Win32 functionality also doesn't provide full-UNIX functionality, but it simplifies porting and can be considered as temporary scaffold.
-
-However, only the main executable is linked statically to WinPort, although it also _exports_ WinPort functionality, so plugins use it without the neccessity to bring their own copies of this code. This is the reason that each plugin's binary should not statically link to WinPort.
-
-While FAR internally is UTF16 (because WinPort contains UTF16-related stuff), native Linux wchar_t size is 4 bytes (rather than 2 bytes) so potentially Linux FAR may be fully UTF32-capable console interaction in the future, but while it uses Win32-style UTF16 functions it does not. However, programmers need to be aware that wchar_t is not 2 bytes long anymore.
-
-Inspect all printf format strings: unlike Windows, in Linux both wide and multibyte printf-like functions have the same multibyte and wide specifiers. This means that %s is always multibyte while %ls is always wide. So, any %s used in wide-printf-s or %ws used in any printf should be replaced with %ls.
-
-Update from 27aug: now it's possible by defining WINPORT_DIRECT to avoid renaming used Windows API and also to avoid changing format strings as swprintf will be intercepted by a compatibility wrapper.
-
-Update from 03/11/22: far2l's console emulator capable to correctly render full-width and combining characters as well as 24 bit colors. This caused following deviation of console-simulation functions behavior comparing to original Win32 API counterparts:
- * CHAR_INFO's Char::UnicodeChar field extended to 64 bit length to be able to associate sequence of multiple WCHARs with single cell.
- * Writing to console full-width character causes two cells to be used: first will get given character code in UnicodeChar field but next one will have UnicodeChar set to zero.
- * Writing combined characters - normal character followed by set of diactrical marks - will make UnicodeChar field to contain so-called 'composite' character code that represents sequence of character codes registered with WINPORT(CompositeCharRegister). Actual sequence of WCHARs can be obtained by WINPORT(CompositeCharLookup). There is macro CI_USING_COMPOSITE_CHAR that allows to detect if given CHAR_INFO contains composite character code or normal WCHAR.
- * Both above transformations happen automatically _only_ if using WriteConsole API. If one uses WriteConsoleOutput - then its up to caller to perform that transformations. Failing to do so will cause incorrect rendering of full-width or diactrical characters.
- * CHAR_INFO's and CONSOLE_SCREEN_BUFFER_INFO's Attributes fields extended to 64 bit to be able to hold 24 bit RGB colors in higher bytes. Use macroses GET_RGB_FORE/GET_RGB_BACK/SET_RGB_FORE/SET_RGB_BACK/SET_RGB_BOTH to access that colors. Note that such colors will be used only if FOREGROUND_TRUECOLOR/BACKGROUND_TRUECOLOR attribute is set. Old attributes define colors from usual 16-elements palette used to render if ..._TRUECOLOR is not set or if backend's target doesn't support more than 16 colors.
-
-## Plugin API
-
-Plugins API based on FAR Manager v2 plus following changes:
-
-### Added following entries to FarStandardFunctions:
-
-* `int Execute(const wchar_t *CmdStr, unsigned int ExecFlags);`
-...where ExecFlags - combination of values of EXECUTEFLAGS.
-Executes given command line, if EF_HIDEOUT and EF_NOWAIT are not specified then command will be executed on far2l virtual terminal.
-
-* `int ExecuteLibrary(const wchar_t *Library, const wchar_t *Symbol, const wchar_t *CmdStr, unsigned int ExecFlags)`
-Executes given shared library symbol in separate process (process creation behaviour is the same as for Execute).
-symbol function must be defined as: `int 'Symbol'(int argc, char *argv[])`
-
-* `void DisplayNotification(const wchar_t *action, const wchar_t *object);`
-Shows (depending on settings - always or if far2l in background) system shell-wide notification with given title and text.
-
-* `int DispatchInterThreadCalls();`
-far2l supports calling APIs from different threads by marshalling API calls from non-main threads into main one and dispatching them on main thread at certain known-safe points inside of dialog processing loops. DispatchInterThreadCalls() allows plugin to explicitly dispatch such calls and plugin must use it periodically in case it blocks main thread with some non-UI activity that may wait for other threads.
-
-* `void BackgroundTask(const wchar_t *Info, BOOL Started);`
-If plugin implements tasks running in background it may invoke this function to indicate about pending task in left-top corner.
-Info is a short description of task or just its owner and must be same string when invoked with Started TRUE or FALSE.
-
-* `size_t StrCellsCount(const wchar_t *Str, size_t CharsCount);`
-Returns count of console cells which will be used to display given string of CharsCount characters.
-
-* `size_t StrSizeOfCells(const wchar_t *Str, size_t CharsCount, size_t *CellsCount, BOOL RoundUp);`
-Returns count of characters which will be used to fill up to CellsCount cells from given string of CharsCount characters.
-RoundUp argument tells what to do with full-width characters that crossed by CellsCount.
-On return CellsCount contains cells count that will be filled by returned characters count, that:
- Can be smaller than initial value if string has too few characters to fill all CellsCount cells or if RoundUp was set to FALSE and last character would then overflow wanted amount.
- Can be larger by one than initial value if RoundUp was set to TRUE and last full-width character crossed initial value specified in *CellsCount.
-
-* `TruncStr and TruncPathStr`
- This two functions not added but changed to use console cells count as string limiting factor.
-
-
-### Added following commands into FILE_CONTROL_COMMANDS:
-* `FCTL_GETPANELPLUGINHANDLE`
-Can be used to interract with plugin that renders other panel.
-`hPlugin` can be set to `PANEL_ACTIVE` or `PANEL_PASSIVE`.
-`Param1` ignored.
-`Param2` points to value of type `HANDLE`, call sets that value to handle of plugin that renders specified panel or `INVALID_HANDLE_VALUE`.
-
-### Added following plugin-exported functions:
-* `int MayExitFARW();`
-far2l asks plugin if it can exit now. If plugin has some background tasks pending it may block exiting of far2l, however it highly recommended to give user choice using UI prompt.
-
-### Added following dialog messages:
-* `DM_GETCOLOR` - retrieves get current color attributes of selected dialog item
-* `DM_SETCOLOR` - changes current color attributes of selected dialog item
+## Notes on porting and FAR Plugin API changes
+ * See HACKING.md
## Known issues:
* Only valid translations are English, Russian and Ukrainian, all other languages require deep correction.
-* Characters that occupy more than single cell or diacritic-like characters are rendered buggy, that means Chinese and Japanese texts are hardly readable in some cases.
diff --git a/WinPort/WinCompat.h b/WinPort/WinCompat.h
index 17286f60..f30fd733 100644
--- a/WinPort/WinCompat.h
+++ b/WinPort/WinCompat.h
@@ -611,6 +611,7 @@ typedef struct _INPUT_RECORD {
#define BACKGROUND_TRUECOLOR 0x0200 // Use 24 bit RGB colors set by SET_RGB_BACK
#define COMMON_LVB_REVERSE_VIDEO 0x4000 // Reverse fore/back ground attribute.
#define COMMON_LVB_UNDERSCORE 0x8000 // Underscore.
+#define COMMON_LVB_STRIKEOUT 0x2000 // Striekout.
// Constants below not implemented and their bit values are reserved and must be zero-inited
// #define COMMON_LVB_GRID_HORIZONTAL
diff --git a/WinPort/src/APIFiles.cpp b/WinPort/src/APIFiles.cpp
index b1907a26..46ea4cc4 100644
--- a/WinPort/src/APIFiles.cpp
+++ b/WinPort/src/APIFiles.cpp
@@ -79,7 +79,8 @@ extern "C"
{
bool out = (fd == -1 || os_call_int(sdc_close, fd) == 0);
if (!out) {
- fprintf(stderr, "WinPortHandleFile: error %u closing fd %d\n", errno, fd);
+ ErrnoSaver es;
+ fprintf(stderr, "WinPortHandleFile: error %u closing fd %d\n", es.Get(), fd);
}
fd = -1;
diff --git a/WinPort/src/Backend/TTY/TTYOutput.cpp b/WinPort/src/Backend/TTY/TTYOutput.cpp
index 8f8ca44b..e09739a3 100644
--- a/WinPort/src/Backend/TTY/TTYOutput.cpp
+++ b/WinPort/src/Backend/TTY/TTYOutput.cpp
@@ -22,7 +22,7 @@
#define ATTRIBUTES_AFFECTING_BACKGROUND \
(BACKGROUND_BLUE | BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_INTENSITY \
- | BACKGROUND_TRUECOLOR | COMMON_LVB_REVERSE_VIDEO | COMMON_LVB_UNDERSCORE)
+ | BACKGROUND_TRUECOLOR | COMMON_LVB_UNDERSCORE | COMMON_LVB_STRIKEOUT)
void TTYOutput::TrueColors::AppendSuffix(std::string &out, DWORD rgb)
{
@@ -36,7 +36,7 @@ void TTYOutput::TrueColors::AppendSuffix(std::string &out, DWORD rgb)
char buf[64];
const auto &it = _colors256_lookup.find(rgb);
if (it != _colors256_lookup.end()) {
- sprintf(buf, "5;%u;", it->second + 16);
+ sprintf(buf, "5;%u;", ((unsigned int)it->second) + 16);
} else {
sprintf(buf, "2;%u;%u;%u;", rgb & 0xff, (rgb >> 8) & 0xff, (rgb >> 16) & 0xff);
}
@@ -62,7 +62,9 @@ void TTYOutput::WriteUpdatedAttributes(DWORD64 attr, bool is_space)
}
if (is_space && (xa & ATTRIBUTES_AFFECTING_BACKGROUND) == 0) {
if ((attr & BACKGROUND_TRUECOLOR) == 0 || GET_RGB_BACK(xa) == 0) {
- return;
+ if ( ((attr | _prev_attr) & COMMON_LVB_REVERSE_VIDEO) == 0) {
+ return;
+ }
}
}
@@ -73,10 +75,19 @@ void TTYOutput::WriteUpdatedAttributes(DWORD64 attr, bool is_space)
_tmp_attrs+= (attr & FOREGROUND_INTENSITY) ? "1;" : "22;";
}
+ bool emit_tc_fore =
+ ((attr & FOREGROUND_TRUECOLOR) != 0 && (GET_RGB_FORE(xa) != 0 || (xa & FOREGROUND_TRUECOLOR) != 0));
+
+ bool emit_tc_back =
+ ((attr & BACKGROUND_TRUECOLOR) != 0 && (GET_RGB_BACK(xa) != 0 || (xa & BACKGROUND_TRUECOLOR) != 0));
+
if ( ((xa & (FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY)) != 0)
|| ((_prev_attr & FOREGROUND_TRUECOLOR) != 0 && (attr & FOREGROUND_TRUECOLOR) == 0) ) {
_tmp_attrs+= (attr & FOREGROUND_INTENSITY) ? '9' : '3';
AppendAnsiColorSuffix<FOREGROUND_RED, FOREGROUND_GREEN, FOREGROUND_BLUE>(_tmp_attrs, attr);
+ if ((attr & FOREGROUND_TRUECOLOR) != 0) {
+ emit_tc_fore = true;
+ }
}
if ( ((xa & (BACKGROUND_BLUE | BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_INTENSITY)) != 0)
@@ -87,18 +98,25 @@ void TTYOutput::WriteUpdatedAttributes(DWORD64 attr, bool is_space)
_tmp_attrs+= '4';
}
AppendAnsiColorSuffix<BACKGROUND_RED, BACKGROUND_GREEN, BACKGROUND_BLUE>(_tmp_attrs, attr);
+ if ((attr & BACKGROUND_TRUECOLOR) != 0) {
+ emit_tc_back = true;
+ }
}
- if ((attr & FOREGROUND_TRUECOLOR) != 0 && (GET_RGB_FORE(xa) != 0 || (xa & FOREGROUND_TRUECOLOR) != 0)) {
+ if (emit_tc_fore) {
_tmp_attrs+= "38;";
_true_colors.AppendSuffix(_tmp_attrs, GET_RGB_FORE(attr));
}
- if ((attr & BACKGROUND_TRUECOLOR) != 0 && (GET_RGB_BACK(xa) != 0 || (xa & BACKGROUND_TRUECOLOR) != 0)) {
+ if (emit_tc_back) {
_tmp_attrs+= "48;";
_true_colors.AppendSuffix(_tmp_attrs, GET_RGB_BACK(attr));
}
+ if ( (xa & COMMON_LVB_STRIKEOUT) != 0) {
+ _tmp_attrs+= (attr & COMMON_LVB_STRIKEOUT) ? "9;" : "29;";
+ }
+
if ( (xa & COMMON_LVB_UNDERSCORE) != 0) {
_tmp_attrs+= (attr & COMMON_LVB_UNDERSCORE) ? "4;" : "24;";
}
diff --git a/WinPort/src/Backend/WX/Paint.cpp b/WinPort/src/Backend/WX/Paint.cpp
index d6f47725..1eabb702 100644
--- a/WinPort/src/Backend/WX/Paint.cpp
+++ b/WinPort/src/Backend/WX/Paint.cpp
@@ -506,7 +506,8 @@ void CursorProps::Update()
ConsolePainter::ConsolePainter(ConsolePaintContext *context, wxPaintDC &dc, wxString &buffer, CursorProps &cursor_props) :
_context(context), _dc(dc), _buffer(buffer), _cursor_props(cursor_props),
- _start_cx((unsigned int)-1), _start_back_cx((unsigned int)-1), _prev_fit_font_index(0), _prev_underlined(false)
+ _start_cx((unsigned int)-1), _start_back_cx((unsigned int)-1), _prev_fit_font_index(0),
+ _prev_underlined(false), _prev_strikeout(false)
{
_dc.SetPen(context->GetTransparentPen());
_dc.SetBackgroundMode(wxPENSTYLE_TRANSPARENT);
@@ -575,20 +576,31 @@ void ConsolePainter::FlushText(unsigned int cx_end)
_dc.DrawText(_buffer, _start_cx * _context->FontWidth(), _start_y);
_buffer.Empty();
}
- FlushUnderline(cx_end);
+ FlushDecorations(cx_end);
_start_cx = (unsigned int)-1;
_prev_fit_font_index = 0;
}
-void ConsolePainter::FlushUnderline(unsigned int cx_end)
+void ConsolePainter::FlushDecorations(unsigned int cx_end)
{
+ if (!_prev_underlined && !_prev_strikeout) {
+ return;
+ }
+ _dc.SetPen(wxColour(_clr_text.r, _clr_text.g, _clr_text.b));
+
if (_prev_underlined) {
- _dc.SetPen(wxColour(_clr_text.r, _clr_text.g, _clr_text.b));
_dc.DrawLine(_start_cx * _context->FontWidth(), _start_y + _context->FontHeight() - 1,
cx_end * _context->FontWidth(), _start_y + _context->FontHeight() - 1);
- _dc.SetPen(_context->GetTransparentPen());
_prev_underlined = false;
}
+
+ if (_prev_strikeout) {
+ _dc.DrawLine(_start_cx * _context->FontWidth(), _start_y + (_context->FontHeight() / 2),
+ cx_end * _context->FontWidth(), _start_y + (_context->FontHeight() / 2));
+ _prev_strikeout = false;
+ }
+
+ _dc.SetPen(_context->GetTransparentPen());
}
static inline unsigned char CalcFadeColor(unsigned char bg, unsigned char fg)
@@ -710,7 +722,7 @@ void ConsolePainter::NextChar(unsigned int cx, DWORD64 attributes, const wchar_t
FlushBackground(cx + nx);
WXCustomDrawCharPainter cdp(*this, clr_text, clr_back);
custom_draw(cdp, _start_y, cx);
- FlushUnderline(cx);
+ FlushDecorations(cx);
_start_cx = (unsigned int)-1;
_prev_fit_font_index = 0;
return;
@@ -718,8 +730,9 @@ void ConsolePainter::NextChar(unsigned int cx, DWORD64 attributes, const wchar_t
uint8_t fit_font_index = _context->CharFitTest(_dc, wcz);
const bool underlined = (attributes & COMMON_LVB_UNDERSCORE) != 0;
+ const bool strikeout = (attributes & COMMON_LVB_STRIKEOUT) != 0;
- if (fit_font_index == _prev_fit_font_index && _prev_underlined == underlined
+ if (fit_font_index == _prev_fit_font_index && _prev_underlined == underlined && _prev_strikeout == strikeout
&& _start_cx != (unsigned int)-1 && _clr_text == clr_text && _context->IsPaintBuffered()) {
_buffer+= wcz;
return;
@@ -730,6 +743,7 @@ void ConsolePainter::NextChar(unsigned int cx, DWORD64 attributes, const wchar_t
_prev_fit_font_index = fit_font_index;
_prev_underlined = underlined;
+ _prev_strikeout = strikeout;
_start_cx = cx;
_buffer = wcz;
diff --git a/WinPort/src/Backend/WX/Paint.h b/WinPort/src/Backend/WX/Paint.h
index b61199be..5127e2ab 100644
--- a/WinPort/src/Backend/WX/Paint.h
+++ b/WinPort/src/Backend/WX/Paint.h
@@ -99,6 +99,7 @@ class ConsolePainter
unsigned int _start_y;
uint8_t _prev_fit_font_index;
bool _prev_underlined;
+ bool _prev_strikeout;
std::map<WinPortRGB, wxPen *> _custom_draw_pens;
friend struct WXCustomDrawCharPainter;
@@ -107,7 +108,7 @@ class ConsolePainter
void PrepareBackground(unsigned int cx, const WinPortRGB &clr, unsigned int nx);
void FlushBackground(unsigned int cx_end);
void FlushText(unsigned int cx_end);
- void FlushUnderline(unsigned int cx_end);
+ void FlushDecorations(unsigned int cx_end);
public:
ConsolePainter(ConsolePaintContext *context, wxPaintDC &dc, wxString &_buffer, CursorProps &cursor_props);
diff --git a/colorer/configs/base/hrc/base/c-unix.ent.hrc b/colorer/configs/base/hrc/base/c-unix.ent.hrc
new file mode 100644
index 00000000..63ab4873
--- /dev/null
+++ b/colorer/configs/base/hrc/base/c-unix.ent.hrc
@@ -0,0 +1,682 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- UNIX keywords for c/c++/etc. -->
+
+<scheme name="Keywords-unix" if="unix-specific">
+
+ <keywords region='KeywordConstant'>
+ <word name='COMMAND_LINE_SIZE'/>
+ <word name='TIOCM_CAR'/>
+ <word name='TIOCM_CD'/>
+ <word name='TIOCM_CTS'/>
+ <word name='TIOCM_DSR'/>
+ <word name='TIOCM_DTR'/>
+ <word name='TIOCM_LE'/>
+ <word name='TIOCM_OUT1'/>
+ <word name='TIOCM_OUT2'/>
+ <word name='TIOCM_RI'/>
+ <word name='TIOCM_RNG'/>
+ <word name='TIOCM_RTS'/>
+ <word name='TIOCM_SR'/>
+ <word name='TIOCM_ST'/>
+ <word name='EBADE'/>
+ <word name='EBADFD'/>
+ <word name='EBADR'/>
+ <word name='EBADRQC'/>
+ <word name='EBADSLT'/>
+ <word name='ECHRNG'/>
+ <word name='ECOMM'/>
+ <word name='EDEADLOCK'/>
+ <word name='EDQUOT'/>
+ <word name='EHOSTDOWN'/>
+ <word name='EHWPOISON'/>
+ <word name='EISNAM'/>
+ <word name='EKEYEXPIRED'/>
+ <word name='EKEYREJECTED'/>
+ <word name='EKEYREVOKED'/>
+ <word name='EL2HLT'/>
+ <word name='EL2NSYNC'/>
+ <word name='EL3HLT'/>
+ <word name='EL3RST'/>
+ <word name='ELIBACC'/>
+ <word name='ELIBBAD'/>
+ <word name='ELIBEXEC'/>
+ <word name='ELIBMAX'/>
+ <word name='ELIBSCN'/>
+ <word name='ELNRANGE'/>
+ <word name='EMEDIUMTYPE'/>
+ <word name='EMULTIHOP'/>
+ <word name='ENOANO'/>
+ <word name='ENOKEY'/>
+ <word name='ENOMEDIUM'/>
+ <word name='ENONET'/>
+ <word name='ENOPKG'/>
+ <word name='ENOTBLK'/>
+ <word name='ENOTUNIQ'/>
+ <word name='EPFNOSUPPORT'/>
+ <word name='EREMCHG'/>
+ <word name='EREMOTE'/>
+ <word name='EREMOTEIO'/>
+ <word name='ERESTART'/>
+ <word name='ERFKILL'/>
+ <word name='ESHUTDOWN'/>
+ <word name='ESOCKTNOSUPPORT'/>
+ <word name='ESTALE'/>
+ <word name='ESTRPIPE'/>
+ <word name='ETOOMANYREFS'/>
+ <word name='EUCLEAN'/>
+ <word name='EUNATCH'/>
+ <word name='EUSERS'/>
+ <word name='EXFULL'/>
+ <word name='FASYNC'/>
+ <word name='FD_CLOEXEC'/>
+ <word name='F_DUPFD'/>
+ <word name='F_EXLCK'/>
+ <word name='F_GETFD'/>
+ <word name='F_GETFL'/>
+ <word name='F_GETLK'/>
+ <word name='F_GETLK64'/>
+ <word name='F_GETOWN'/>
+ <word name='F_GETOWNER_UIDS'/>
+ <word name='F_GETOWN_EX'/>
+ <word name='F_GETSIG'/>
+ <word name='FIOASYNC'/>
+ <word name='FIOCLEX'/>
+ <word name='FIOGETOWN'/>
+ <word name='FIONBIO'/>
+ <word name='FIONCLEX'/>
+ <word name='FIONREAD'/>
+ <word name='FIOSETOWN'/>
+ <word name='F_LINUX_SPECIFIC_BASE'/>
+ <word name='F_LOCK'/>
+ <word name='F_OFD_GETLK'/>
+ <word name='F_OFD_SETLK'/>
+ <word name='F_OFD_SETLKW'/>
+ <word name='F_OWNER_PGRP'/>
+ <word name='F_OWNER_PID'/>
+ <word name='F_OWNER_TID'/>
+ <word name='F_RDLCK'/>
+ <word name='F_SETFD'/>
+ <word name='F_SETFL'/>
+ <word name='F_SETLK'/>
+ <word name='F_SETLK64'/>
+ <word name='F_SETLKW'/>
+ <word name='F_SETLKW64'/>
+ <word name='F_SETOWN'/>
+ <word name='F_SETOWN_EX'/>
+ <word name='F_SETSIG'/>
+ <word name='F_SHLCK'/>
+ <word name='F_TEST'/>
+ <word name='F_TLOCK'/>
+ <word name='F_ULOCK'/>
+ <word name='F_UNLCK'/>
+ <word name='F_WRLCK'/>
+ <word name='LOCK_EX'/>
+ <word name='LOCK_MAND'/>
+ <word name='LOCK_NB'/>
+ <word name='LOCK_READ'/>
+ <word name='LOCK_RW'/>
+ <word name='LOCK_SH'/>
+ <word name='LOCK_UN'/>
+ <word name='LOCK_WRITE'/>
+ <word name='MADV_DODUMP'/>
+ <word name='MADV_DOFORK'/>
+ <word name='MADV_DONTDUMP'/>
+ <word name='MADV_DONTFORK'/>
+ <word name='MADV_DONTNEED'/>
+ <word name='MADV_FREE'/>
+ <word name='MADV_HUGEPAGE'/>
+ <word name='MADV_HWPOISON'/>
+ <word name='MADV_KEEPONFORK'/>
+ <word name='MADV_MERGEABLE'/>
+ <word name='MADV_NOHUGEPAGE'/>
+ <word name='MADV_NORMAL'/>
+ <word name='MADV_RANDOM'/>
+ <word name='MADV_REMOVE'/>
+ <word name='MADV_SEQUENTIAL'/>
+ <word name='MADV_SOFT_OFFLINE'/>
+ <word name='MADV_UNMERGEABLE'/>
+ <word name='MADV_WILLNEED'/>
+ <word name='MADV_WIPEONFORK'/>
+ <word name='MAP_ANONYMOUS'/>
+ <word name='MAP_DENYWRITE'/>
+ <word name='MAP_EXECUTABLE'/>
+ <word name='MAP_FILE'/>
+ <word name='MAP_FIXED'/>
+ <word name='MAP_GROWSDOWN'/>
+ <word name='MAP_HUGETLB'/>
+ <word name='MAP_LOCKED'/>
+ <word name='MAP_NONBLOCK'/>
+ <word name='MAP_NORESERVE'/>
+ <word name='MAP_POPULATE'/>
+ <word name='MAP_PRIVATE'/>
+ <word name='MAP_SHARED'/>
+ <word name='MAP_SHARED_VALIDATE'/>
+ <word name='MAP_STACK'/>
+ <word name='MAP_SYNC'/>
+ <word name='MAP_TYPE'/>
+ <word name='MCL_CURRENT'/>
+ <word name='MCL_FUTURE'/>
+ <word name='MCL_ONFAULT'/>
+ <word name='MLOCK_ONFAULT'/>
+ <word name='MS_ASYNC'/>
+ <word name='MS_INVALIDATE'/>
+ <word name='MS_SYNC'/>
+ <word name='_NSIG'/>
+ <word name='O_ACCMODE'/>
+ <word name='O_APPEND'/>
+ <word name='O_CLOEXEC'/>
+ <word name='O_CREAT'/>
+ <word name='O_DIRECT'/>
+ <word name='O_DIRECTORY'/>
+ <word name='O_DSYNC'/>
+ <word name='O_EXCL'/>
+ <word name='O_LARGEFILE'/>
+ <word name='O_NDELAY'/>
+ <word name='O_NOATIME'/>
+ <word name='O_NOCTTY'/>
+ <word name='O_NOFOLLOW'/>
+ <word name='O_NONBLOCK'/>
+ <word name='O_PATH'/>
+ <word name='O_RDONLY'/>
+ <word name='O_RDWR'/>
+ <word name='__O_SYNC'/>
+ <word name='O_SYNC'/>
+ <word name='__O_TMPFILE'/>
+ <word name='O_TMPFILE'/>
+ <word name='O_TMPFILE_MASK'/>
+ <word name='O_TRUNC'/>
+ <word name='O_WRONLY'/>
+ <word name='PAGE_SIZE'/>
+ <word name='PKEY_ACCESS_MASK'/>
+ <word name='PKEY_DISABLE_ACCESS'/>
+ <word name='PKEY_DISABLE_WRITE'/>
+ <word name='POLL_BUSY_LOOP'/>
+ <word name='POLLERR'/>
+ <word name='POLLFREE'/>
+ <word name='POLLHUP'/>
+ <word name='POLLIN'/>
+ <word name='POLLMSG'/>
+ <word name='POLLNVAL'/>
+ <word name='POLLOUT'/>
+ <word name='POLLPRI'/>
+ <word name='POLLRDBAND'/>
+ <word name='POLLRDHUP'/>
+ <word name='POLLRDNORM'/>
+ <word name='POLLREMOVE'/>
+ <word name='POLLWRBAND'/>
+ <word name='POLLWRNORM'/>
+ <word name='POLLWRNORM'/>
+ <word name='PROT_EXEC'/>
+ <word name='PROT_GROWSDOWN'/>
+ <word name='PROT_GROWSUP'/>
+ <word name='PROT_NONE'/>
+ <word name='PROT_READ'/>
+ <word name='PROT_SEM'/>
+ <word name='PROT_WRITE'/>
+ <word name='RLIMIT_CORE'/>
+ <word name='RLIMIT_CPU'/>
+ <word name='RLIMIT_DATA'/>
+ <word name='RLIMIT_FSIZE'/>
+ <word name='RLIMIT_LOCKS'/>
+ <word name='RLIMIT_MSGQUEUE'/>
+ <word name='RLIMIT_NICE'/>
+ <word name='RLIMIT_RTPRIO'/>
+ <word name='RLIMIT_RTTIME'/>
+ <word name='RLIMIT_SIGPENDING'/>
+ <word name='RLIMIT_STACK'/>
+ <word name='RLIM_NLIMITS'/>
+ <word name='SCM_TIMESTAMP'/>
+ <word name='SCM_TIMESTAMPING'/>
+ <word name='SCM_TIMESTAMPING_OPT_STATS'/>
+ <word name='SCM_TIMESTAMPING_PKTINFO'/>
+ <word name='SCM_TIMESTAMPNS'/>
+ <word name='SCM_WIFI_STATUS'/>
+ <word name='SEEK_CUR'/>
+ <word name='SEEK_END'/>
+ <word name='SEEK_SET'/>
+ <word name='S_IFBLK'/>
+ <word name='S_IFCHR'/>
+ <word name='S_IFDIR'/>
+ <word name='S_IFMT'/>
+ <word name='S_IFREG'/>
+ <word name='SIGABRT'/>
+ <word name='SIGALRM'/>
+ <word name='SIG_BLOCK'/>
+ <word name='SIGBUS'/>
+ <word name='SIGCHLD'/>
+ <word name='SIGCONT'/>
+ <word name='SIG_DFL'/>
+ <word name='SIG_ERR'/>
+ <word name='SIGFPE'/>
+ <word name='SIGHUP'/>
+ <word name='SIG_IGN'/>
+ <word name='SIGILL'/>
+ <word name='SIGINT'/>
+ <word name='SIGIO'/>
+ <word name='SIGIOT'/>
+ <word name='SIGKILL'/>
+ <word name='SIGLOST'/>
+ <word name='SIGPIPE'/>
+ <word name='SIGPOLL'/>
+ <word name='SIGPROF'/>
+ <word name='SIGPWR'/>
+ <word name='SIGQUIT'/>
+ <word name='SIGRTMAX'/>
+ <word name='SIGRTMIN'/>
+ <word name='SIGSEGV'/>
+ <word name='SIG_SETMASK'/>
+ <word name='SIGSTKFLT'/>
+ <word name='SIGSTOP'/>
+ <word name='SIGSYS'/>
+ <word name='SIGTERM'/>
+ <word name='SIGTRAP'/>
+ <word name='SIGTSTP'/>
+ <word name='SIGTTIN'/>
+ <word name='SIGTTOU'/>
+ <word name='SIG_UNBLOCK'/>
+ <word name='SIGUNUSED'/>
+ <word name='SIGURG'/>
+ <word name='SIGUSR1'/>
+ <word name='SIGUSR2'/>
+ <word name='SIGVTALRM'/>
+ <word name='SIGWINCH'/>
+ <word name='SIGXCPU'/>
+ <word name='SIGXFSZ'/>
+ <word name='SIOCATMARK'/>
+ <word name='SIOCGPGRP'/>
+ <word name='SIOCGSTAMP'/>
+ <word name='SIOCGSTAMPNS'/>
+ <word name='SIOCSPGRP'/>
+ <word name='S_IRGRP'/>
+ <word name='S_IROTH'/>
+ <word name='S_IRUSR'/>
+ <word name='S_IRWXG'/>
+ <word name='S_IRWXO'/>
+ <word name='S_IRWXU'/>
+ <word name='S_ISGID'/>
+ <word name='S_ISUID'/>
+ <word name='S_IWGRP'/>
+ <word name='S_IWOTH'/>
+ <word name='S_IWUSR'/>
+ <word name='S_IXGRP'/>
+ <word name='S_IXOTH'/>
+ <word name='S_IXUSR'/>
+ <word name='SO_ACCEPTCONN'/>
+ <word name='SO_ATTACH_BPF'/>
+ <word name='SO_ATTACH_FILTER'/>
+ <word name='SO_ATTACH_REUSEPORT_CBPF'/>
+ <word name='SO_ATTACH_REUSEPORT_EBPF'/>
+ <word name='SO_BINDTODEVICE'/>
+ <word name='SO_BPF_EXTENSIONS'/>
+ <word name='SO_BROADCAST'/>
+ <word name='SO_BSDCOMPAT'/>
+ <word name='SO_BUSY_POLL'/>
+ <word name='SO_CNX_ADVICE'/>
+ <word name='SO_COOKIE'/>
+ <word name='SO_DEBUG'/>
+ <word name='SO_DETACH_BPF'/>
+ <word name='SO_DETACH_FILTER'/>
+ <word name='SO_DOMAIN'/>
+ <word name='SO_DONTROUTE'/>
+ <word name='SO_ERROR'/>
+ <word name='SO_GET_FILTER'/>
+ <word name='SO_INCOMING_CPU'/>
+ <word name='SO_INCOMING_NAPI_ID'/>
+ <word name='SO_KEEPALIVE'/>
+ <word name='SO_LINGER'/>
+ <word name='SO_LOCK_FILTER'/>
+ <word name='SOL_SOCKET'/>
+ <word name='SO_MARK'/>
+ <word name='SO_MAX_PACING_RATE'/>
+ <word name='SO_MEMINFO'/>
+ <word name='SO_NO_CHECK'/>
+ <word name='SO_NOFCS'/>
+ <word name='SO_OOBINLINE'/>
+ <word name='SO_PASSCRED'/>
+ <word name='SO_PASSSEC'/>
+ <word name='SO_PEEK_OFF'/>
+ <word name='SO_PEERCRED'/>
+ <word name='SO_PEERGROUPS'/>
+ <word name='SO_PEERNAME'/>
+ <word name='SO_PEERSEC'/>
+ <word name='SO_PRIORITY'/>
+ <word name='SO_PROTOCOL'/>
+ <word name='SO_RCVBUF'/>
+ <word name='SO_RCVBUFFORCE'/>
+ <word name='SO_RCVLOWAT'/>
+ <word name='SO_RCVTIMEO'/>
+ <word name='SO_REUSEADDR'/>
+ <word name='SO_REUSEPORT'/>
+ <word name='SO_RXQ_OVFL'/>
+ <word name='SO_SECURITY_AUTHENTICATION'/>
+ <word name='SO_SECURITY_ENCRYPTION_NETWORK'/>
+ <word name='SO_SECURITY_ENCRYPTION_TRANSPORT'/>
+ <word name='SO_SELECT_ERR_QUEUE'/>
+ <word name='SO_SNDBUF'/>
+ <word name='SO_SNDBUFFORCE'/>
+ <word name='SO_SNDLOWAT'/>
+ <word name='SO_SNDTIMEO'/>
+ <word name='SO_TIMESTAMP'/>
+ <word name='SO_TIMESTAMPING'/>
+ <word name='SO_TIMESTAMPNS'/>
+ <word name='SO_TYPE'/>
+ <word name='SO_WIFI_STATUS'/>
+ <word name='SO_ZEROCOPY'/>
+ <word name='STDERR_FILENO'/>
+ <word name='STDIN_FILENO'/>
+ <word name='STDOUT_FILENO'/>
+ <word name='TCFLSH'/>
+ <word name='TCGETA'/>
+ <word name='TCGETS'/>
+ <word name='TCGETS2'/>
+ <word name='TCGETX'/>
+ <word name='TCSBRK'/>
+ <word name='TCSBRKP'/>
+ <word name='TCSETA'/>
+ <word name='TCSETAF'/>
+ <word name='TCSETAW'/>
+ <word name='TCSETS'/>
+ <word name='TCSETS2'/>
+ <word name='TCSETSF'/>
+ <word name='TCSETSF2'/>
+ <word name='TCSETSW'/>
+ <word name='TCSETSW2'/>
+ <word name='TCSETX'/>
+ <word name='TCSETXF'/>
+ <word name='TCSETXW'/>
+ <word name='TCXONC'/>
+ <word name='TIOCCBRK'/>
+ <word name='TIOCCONS'/>
+ <word name='TIOCEXCL'/>
+ <word name='TIOCGDEV'/>
+ <word name='TIOCGETD'/>
+ <word name='TIOCGEXCL'/>
+ <word name='TIOCGICOUNT'/>
+ <word name='TIOCGLCKTRMIOS'/>
+ <word name='TIOCGPGRP'/>
+ <word name='TIOCGPKT'/>
+ <word name='TIOCGPTLCK'/>
+ <word name='TIOCGPTN'/>
+ <word name='TIOCGPTPEER'/>
+ <word name='TIOCGRS485'/>
+ <word name='TIOCGSERIAL'/>
+ <word name='TIOCGSID'/>
+ <word name='TIOCGSOFTCAR'/>
+ <word name='TIOCGWINSZ'/>
+ <word name='TIOCINQ'/>
+ <word name='TIOCLINUX'/>
+ <word name='TIOCMBIC'/>
+ <word name='TIOCMBIS'/>
+ <word name='TIOCMGET'/>
+ <word name='TIOCMIWAIT'/>
+ <word name='TIOCM_LOOP'/>
+ <word name='TIOCMSET'/>
+ <word name='TIOCNOTTY'/>
+ <word name='TIOCNXCL'/>
+ <word name='TIOCOUTQ'/>
+ <word name='TIOCPKT'/>
+ <word name='TIOCPKT_DATA'/>
+ <word name='TIOCPKT_DOSTOP'/>
+ <word name='TIOCPKT_FLUSHREAD'/>
+ <word name='TIOCPKT_FLUSHWRITE'/>
+ <word name='TIOCPKT_IOCTL'/>
+ <word name='TIOCPKT_NOSTOP'/>
+ <word name='TIOCPKT_START'/>
+ <word name='TIOCPKT_STOP'/>
+ <word name='TIOCSBRK'/>
+ <word name='TIOCSCTTY'/>
+ <word name='TIOCSERCONFIG'/>
+ <word name='TIOCSERGETLSR'/>
+ <word name='TIOCSERGETMULTI'/>
+ <word name='TIOCSERGSTRUCT'/>
+ <word name='TIOCSERGWILD'/>
+ <word name='TIOCSERSETMULTI'/>
+ <word name='TIOCSERSWILD'/>
+ <word name='TIOCSER_TEMT'/>
+ <word name='TIOCSETD'/>
+ <word name='TIOCSIG'/>
+ <word name='TIOCSLCKTRMIOS'/>
+ <word name='TIOCSPGRP'/>
+ <word name='TIOCSPTLCK'/>
+ <word name='TIOCSRS485'/>
+ <word name='TIOCSSERIAL'/>
+ <word name='TIOCSSOFTCAR'/>
+ <word name='TIOCSTI'/>
+ <word name='TIOCSWINSZ'/>
+ <word name='TIOCVHANGUP'/>
+ <word name='PTHREAD_BARRIER_SERIAL_THREAD'/>
+ <word name='PTHREAD_MUTEX_INITIALIZER'/>
+ <word name='PTHREAD_RWLOCK_INITIALIZER'/>
+ <word name='PTHREAD_CANCEL_ASYNCHRONOUS'/>
+ <word name='PTHREAD_CANCEL_DEFERRED'/>
+ <word name='PTHREAD_CANCEL_DISABLE'/>
+ <word name='PTHREAD_CANCEL_ENABLE'/>
+ <word name='PTHREAD_CREATE_DETACHED'/>
+ <word name='PTHREAD_CREATE_JOINABLE'/>
+ <word name='PTHREAD_EXPLICIT_SCHED'/>
+ <word name='PTHREAD_INHERIT_SCHED'/>
+ <word name='PTHREAD_PROCESS_PRIVATE'/>
+ <word name='PTHREAD_PROCESS_SHARED'/>
+ <word name='PTHREAD_SCOPE_PROCESS'/>
+ <word name='PTHREAD_SCOPE_SYSTEM'/>
+ <word name='FD_CLR'/>
+ <word name='FD_ISSET'/>
+ <word name='FD_SET'/>
+ <word name='FD_SETSIZE'/>
+ <word name='FD_ZERO'/>
+ <word name='RB_AUTOBOOT'/>
+ <word name='RB_DISABLE_CAD'/>
+ <word name='RB_ENABLE_CAD'/>
+ <word name='RB_HALT_SYSTEM'/>
+ <word name='RB_KEXEC'/>
+ <word name='RB_POWER_OFF'/>
+ <word name='RB_SW_SUSPEND'/>
+ </keywords>
+
+ <keywords region='KeywordUnixIO'>
+ <word name='access'/>
+ <word name='chdir'/>
+ <word name='chmod'/>
+ <word name='chown'/>
+ <word name='close'/>
+ <word name='closedir'/>
+ <word name='creat'/>
+ <word name='fcntl'/>
+ <word name='fdopen'/>
+ <word name='fileno'/>
+ <word name='fpathconf'/>
+ <word name='fstat'/>
+ <word name='getcwd'/>
+ <word name='link'/>
+ <word name='lstat'/>
+ <word name='mkdir'/>
+ <word name='mkfifo'/>
+ <word name='open'/>
+ <word name='opendir'/>
+ <word name='pipe'/>
+ <word name='pipe2'/>
+ <word name='poll'/>
+ <word name='pread'/>
+ <word name='pwrite'/>
+ <word name='read'/>
+ <word name='readdir'/>
+ <word name='rewinddir'/>
+ <word name='rmdir'/>
+ <word name='tmpname'/>
+ <word name='unlink'/>
+ <word name='utime'/>
+ <word name='write'/>
+ </keywords>
+
+ <keywords region='KeywordUnix'>
+ <word name='dirent'/>
+ <word name='alarm'/>
+ <word name='cfgetispeed'/>
+ <word name='cfgetospeed'/>
+ <word name='cfsetispeed'/>
+ <word name='cfsetospeed'/>
+ <word name='ctermid'/>
+ <word name='cuserid'/>
+ <word name='execl'/>
+ <word name='execle'/>
+ <word name='execlp'/>
+ <word name='execv'/>
+ <word name='execve'/>
+ <word name='execvp'/>
+ <word name='fork'/>
+ <word name='vfork'/>
+ <word name='getegid'/>
+ <word name='geteuid'/>
+ <word name='getgid'/>
+ <word name='getgrgid'/>
+ <word name='getgrnam'/>
+ <word name='getgroups'/>
+ <word name='getlogin'/>
+ <word name='getpgrp'/>
+ <word name='getpid'/>
+ <word name='getppid'/>
+ <word name='getpwnam'/>
+ <word name='getpwuid'/>
+ <word name='getrandom'/>
+ <word name='getentropy'/>
+ <word name='getuid'/>
+ <word name='ioctl'/>
+ <word name='ioperm'/>
+ <word name='iopl'/>
+ <word name='isatty'/>
+ <word name='kexec'/>
+ <word name='kill'/>
+ <word name='killpg'/>
+ <word name='pause'/>
+ <word name='setgid'/>
+ <word name='setjmp'/>
+ <word name='setpgid'/>
+ <word name='setsid'/>
+ <word name='setuid'/>
+ <word name='sigaction'/>
+ <word name='sigaddset'/>
+ <word name='sigdelset'/>
+ <word name='sigemptyset'/>
+ <word name='sigfillset'/>
+ <word name='sigismember'/>
+ <word name='siglongjmp'/>
+ <word name='sigpending'/>
+ <word name='sigprocmask'/>
+ <word name='sigsetjmp'/>
+ <word name='sigsuspend'/>
+ <word name='sleep'/>
+ <word name='stat'/>
+ <word name='reboot'/>
+ <word name='spawn'/>
+ <word name='spawnp'/>
+ <word name='syscall'/>
+ <word name='sysconf'/>
+ <word name='tcdrain'/>
+ <word name='tcflow'/>
+ <word name='tcflush'/>
+ <word name='tcgetattr'/>
+ <word name='tcgetpgrp'/>
+ <word name='tcsendbreak'/>
+ <word name='tcsetattr'/>
+ <word name='tcsetpgrp'/>
+ <word name='times'/>
+ <word name='time'/>
+ <word name='ttyname'/>
+ <word name='tzset'/>
+ <word name='umask'/>
+ <word name='uname'/>
+ <word name='waitpid'/>
+ <word name='pthread_create'/>
+ <word name='pthread_cancel'/>
+ <word name='pthread_detach'/>
+ <word name='pthread_equal'/>
+ <word name='pthread_exit'/>
+ <word name='pthread_join'/>
+ <word name='pthread_kill'/>
+ <word name='pthread_once'/>
+ <word name='pthread_self'/>
+ <word name='pthread_setcancelstate'/>
+ <word name='pthread_setcanceltype'/>
+ <word name='pthread_testcancel'/>
+ <word name='pthread_yield'/>
+ <word name='pthread_attr_destroy'/>
+ <word name='pthread_attr_getinheritsched'/>
+ <word name='pthread_attr_getschedparam'/>
+ <word name='pthread_attr_getschedpolicy'/>
+ <word name='pthread_attr_getscope'/>
+ <word name='pthread_attr_getstacksize'/>
+ <word name='pthread_attr_getstackaddr'/>
+ <word name='pthread_attr_getdetachstate'/>
+ <word name='pthread_attr_init'/>
+ <word name='pthread_attr_setinheritsched'/>
+ <word name='pthread_attr_setschedparam'/>
+ <word name='pthread_attr_setschedpolicy'/>
+ <word name='pthread_attr_setscope'/>
+ <word name='pthread_attr_setstacksize'/>
+ <word name='pthread_attr_setstackaddr'/>
+ <word name='pthread_attr_setdetachstate'/>
+ <word name='pthread_mutexattr_destroy'/>
+ <word name='pthread_mutexattr_getprioceiling'/>
+ <word name='pthread_mutexattr_getprotocol'/>
+ <word name='pthread_mutexattr_gettype'/>
+ <word name='pthread_mutexattr_init'/>
+ <word name='pthread_mutexattr_setprioceiling'/>
+ <word name='pthread_mutexattr_setprotocol'/>
+ <word name='pthread_mutexattr_settype'/>
+ <word name='pthread_mutex_destroy'/>
+ <word name='pthread_mutex_init'/>
+ <word name='pthread_mutex_lock'/>
+ <word name='pthread_mutex_timedlock'/>
+ <word name='pthread_mutex_trylock'/>
+ <word name='pthread_mutex_unlock'/>
+ <word name='pthread_condattr_destroy'/>
+ <word name='pthread_condattr_init'/>
+ <word name='pthread_cond_broadcast'/>
+ <word name='pthread_cond_destroy'/>
+ <word name='pthread_cond_init'/>
+ <word name='pthread_cond_signal'/>
+ <word name='pthread_cond_timedwait'/>
+ <word name='pthread_cond_wait'/>
+ <word name='pthread_rwlock_destroy'/>
+ <word name='pthread_rwlock_init'/>
+ <word name='pthread_rwlock_rdlock'/>
+ <word name='pthread_rwlock_tryrdlock'/>
+ <word name='pthread_rwlock_trywrlock'/>
+ <word name='pthread_rwlock_unlock'/>
+ <word name='pthread_rwlock_wrlock'/>
+ <word name='pthread_rwlockattr_destroy'/>
+ <word name='pthread_rwlockattr_getpshared'/>
+ <word name='pthread_rwlockattr_init'/>
+ <word name='pthread_rwlockattr_setpshared'/>
+ <word name='pthread_key_create'/>
+ <word name='pthread_key_delete'/>
+ <word name='pthread_getspecific'/>
+ <word name='pthread_setspecific'/>
+ <word name='pthread_atfork'/>
+ <word name='pthread_cleanup_pop'/>
+ <word name='pthread_cleanup_push'/>
+ <word name='sysinfo'/>
+ <word name='get_nprocs'/>
+ <word name='get_nprocs_conf'/>
+ <word name='get_phys_pages'/>
+ <word name='get_avphys_pages'/>
+
+ <word name='mmap'/>
+ <word name='munmap'/>
+ <word name='mmap64'/>
+ <word name='munmap'/>
+ <word name='mprotect'/>
+ <word name='msync'/>
+ <word name='madvise'/>
+ <word name='posix_madvise'/>
+ <word name='mlock munlock'/>
+ <word name='mlockall'/>
+ <word name='munlockall'/>
+ <word name='mremap'/>
+ <word name='remap_file_pages'/>
+ <word name='shm_open'/>
+ <word name='shm_unlink'/>
+ </keywords>
+
+</scheme>
diff --git a/colorer/configs/base/hrc/base/c.hrc b/colorer/configs/base/hrc/base/c.hrc
index 1c925b07..029c2a4b 100644
--- a/colorer/configs/base/hrc/base/c.hrc
+++ b/colorer/configs/base/hrc/base/c.hrc
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding='UTF-8'?>
<!DOCTYPE hrc PUBLIC "-//Cail Lomecb//DTD Colorer HRC take5//EN"
"http://colorer.sf.net/2003/hrc.dtd"[
+<!ENTITY c-unix SYSTEM "c-unix.ent.hrc">
<!ENTITY c-win32 SYSTEM "c-win32.ent.hrc">
]>
<hrc version="take5" xmlns="http://colorer.sf.net/2003/hrc"
@@ -59,6 +60,8 @@
<region name="KeywordLibFunctions" parent="def:FunctionKeyword"/>
<region name="KeywordStructure" parent="def:StructKeyword"/>
+ <region name="KeywordUnixIO" parent="KeywordStructure"/>
+ <region name="KeywordUnix" parent="KeywordLibFunctions"/>
<region name="KeywordWin32" parent="KeywordLibFunctions"/>
<region name="KeywordOpenGL" parent="KeywordLibFunctions"/>
<region name="KeywordNetwork" parent="KeywordLibFunctions"/>
@@ -648,6 +651,7 @@
<inherit scheme="Keyword-Stdlibs"/>
<inherit scheme="Keywords-win32"/>
+ <inherit scheme="Keywords-unix"/>
</scheme>
@@ -908,6 +912,7 @@
</keywords>
</scheme>
+ &c-unix;
&c-win32;
</type>
diff --git a/colorer/configs/base/hrc/base/cpp.hrc b/colorer/configs/base/hrc/base/cpp.hrc
index 295db971..623e514e 100644
--- a/colorer/configs/base/hrc/base/cpp.hrc
+++ b/colorer/configs/base/hrc/base/cpp.hrc
@@ -218,6 +218,7 @@
<!-- !!EE (add keywords): STL -->
<keywords region="KeywordSTL">
<word name="std" region="KeywordNamespace"/>
+ <word name="chrono" region="KeywordNamespace"/>
<!-- c++11 -->
<word name="move"/>
@@ -230,17 +231,25 @@
<word name="list"/>
<word name="set"/>
<word name="multiset"/>
+ <word name="unordered_set"/>
+ <word name="unordered_multiset"/>
<word name="map"/>
<word name="multimap"/>
+ <word name="unordered_map"/>
+ <word name="unordered_multimap"/>
<word name="hash_map"/>
<word name="deque"/>
<word name="stack"/>
<word name="queue"/>
<word name="priority_queue"/>
<word name="string"/>
+ <word name="wstring"/>
<word name="array"/>
<word name="valarray"/>
<word name="biset"/>
+ <word name="mutex"/>
+ <word name="unique_lock"/>
+ <word name="lock_guard"/>
<word name="char_traits"/>
<word name="basic_string"/>
@@ -295,6 +304,8 @@
<word name="raw_storage_iterator"/>
<word name="allocator"/>
<word name="auto_ptr"/>
+ <word name="shared_ptr"/>
+ <word name="unique_ptr"/>
<word name="pair"/>
<!-- alogoritms -->
diff --git a/colorer/configs/base/hrd/catalog-rgb.xml b/colorer/configs/base/hrd/catalog-rgb.xml
index 88978146..91382a6f 100644
--- a/colorer/configs/base/hrd/catalog-rgb.xml
+++ b/colorer/configs/base/hrd/catalog-rgb.xml
@@ -1,9 +1,12 @@
- <hrd class="rgb" name="default" description="White (crimsoned)">
- <location link="&hrd;/rgb/white.hrd"/>
- </hrd>
+ <hrd class="rgb" name="default" description="far2l default">
+ <location link="&hrd;/rgb/default.hrd"/>
+ </hrd>
<hrd class="rgb" name="blue" description="Blue (far-truemod)">
<location link="&hrd;/rgb/blue.hrd"/>
</hrd>
+ <hrd class="rgb" name="white" description="White (crimsoned)">
+ <location link="&hrd;/rgb/white.hrd"/>
+ </hrd>
<hrd class="rgb" name="navy" description="Navy (seashore)">
<location link="&hrd;/rgb/navy.hrd"/>
</hrd>
diff --git a/colorer/configs/base/hrd/reg.addons/conemu/ansi.reg b/colorer/configs/base/hrd/reg.addons/conemu/ansi.reg
deleted file mode 100644
index a9e26919..00000000
--- a/colorer/configs/base/hrd/reg.addons/conemu/ansi.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Software\ConEmu]
-"ColorTable00"=dword:00000000
-"ColorTable01"=dword:00AA0000
-"ColorTable02"=dword:0000AA00
-"ColorTable03"=dword:00AAAA00
-"ColorTable04"=dword:000000AA
-"ColorTable05"=dword:00AA00AA
-"ColorTable06"=dword:000055AA
-"ColorTable07"=dword:00AAAAAA
-"ColorTable08"=dword:00555555
-"ColorTable09"=dword:00FF5555
-"ColorTable10"=dword:0055FF55
-"ColorTable11"=dword:00FFFF55
-"ColorTable12"=dword:005555FF
-"ColorTable13"=dword:00FF55FF
-"ColorTable14"=dword:0055FFFF
-"ColorTable15"=dword:00FFFFFF
diff --git a/colorer/configs/base/hrd/reg.addons/conemu/mirice.reg b/colorer/configs/base/hrd/reg.addons/conemu/mirice.reg
deleted file mode 100644
index 39664ba8..00000000
--- a/colorer/configs/base/hrd/reg.addons/conemu/mirice.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Software\ConEmu]
-"ColorTable00"=dword:00a9a9a9
-"ColorTable01"=dword:00f5f5f5
-"ColorTable02"=dword:00609018
-"ColorTable03"=dword:00bca800
-"ColorTable04"=dword:009314ff
-"ColorTable05"=dword:00c00088
-"ColorTable06"=dword:001010BB
-"ColorTable07"=dword:00703530
-"ColorTable08"=dword:00d3d3d3
-"ColorTable09"=dword:00b07800
-"ColorTable10"=dword:00aacd66
-"ColorTable11"=dword:00ff8440
-"ColorTable12"=dword:007280fa
-"ColorTable13"=dword:00d670da
-"ColorTable14"=dword:0087b8de
-"ColorTable15"=dword:00ffbf00
diff --git a/colorer/configs/base/hrd/reg.addons/conemu/mirror.reg b/colorer/configs/base/hrd/reg.addons/conemu/mirror.reg
deleted file mode 100644
index 33535f03..00000000
--- a/colorer/configs/base/hrd/reg.addons/conemu/mirror.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Software\ConEmu]
-"ColorTable00"=dword:00000000
-"ColorTable01"=dword:00AA0000
-"ColorTable02"=dword:0000AA00
-"ColorTable03"=dword:00AAAA00
-"ColorTable04"=dword:000000AA
-"ColorTable05"=dword:00AA22CC
-"ColorTable06"=dword:00CC0000
-"ColorTable07"=dword:00AAAAAA
-"ColorTable08"=dword:00777777
-"ColorTable09"=dword:00FF5555
-"ColorTable10"=dword:0055FF55
-"ColorTable11"=dword:00FFFF55
-"ColorTable12"=dword:005555FF
-"ColorTable13"=dword:00FF55FF
-"ColorTable14"=dword:0055FFFF
-"ColorTable15"=dword:00FFFFFF
diff --git a/colorer/configs/base/hrd/reg.addons/console/ansi.reg b/colorer/configs/base/hrd/reg.addons/console/ansi.reg
deleted file mode 100644
index a86101f0..00000000
--- a/colorer/configs/base/hrd/reg.addons/console/ansi.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Console]
-"ColorTable00"=dword:00000000
-"ColorTable01"=dword:00AA0000
-"ColorTable02"=dword:0000AA00
-"ColorTable03"=dword:00AAAA00
-"ColorTable04"=dword:000000AA
-"ColorTable05"=dword:00AA00AA
-"ColorTable06"=dword:000055AA
-"ColorTable07"=dword:00AAAAAA
-"ColorTable08"=dword:00555555
-"ColorTable09"=dword:00FF5555
-"ColorTable10"=dword:0055FF55
-"ColorTable11"=dword:00FFFF55
-"ColorTable12"=dword:005555FF
-"ColorTable13"=dword:00FF55FF
-"ColorTable14"=dword:0055FFFF
-"ColorTable15"=dword:00FFFFFF
diff --git a/colorer/configs/base/hrd/reg.addons/console/mirice.reg b/colorer/configs/base/hrd/reg.addons/console/mirice.reg
deleted file mode 100644
index e5acfebc..00000000
--- a/colorer/configs/base/hrd/reg.addons/console/mirice.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Console]
-"ColorTable00"=dword:00a9a9a9
-"ColorTable01"=dword:00f5f5f5
-"ColorTable02"=dword:00609018
-"ColorTable03"=dword:00bca800
-"ColorTable04"=dword:009314ff
-"ColorTable05"=dword:00c00088
-"ColorTable06"=dword:001010BB
-"ColorTable07"=dword:00703530
-"ColorTable08"=dword:00d3d3d3
-"ColorTable09"=dword:00b07800
-"ColorTable10"=dword:00aacd66
-"ColorTable11"=dword:00ff8440
-"ColorTable12"=dword:007280fa
-"ColorTable13"=dword:00d670da
-"ColorTable14"=dword:0087b8de
-"ColorTable15"=dword:00ffbf00
diff --git a/colorer/configs/base/hrd/reg.addons/console/mirror.reg b/colorer/configs/base/hrd/reg.addons/console/mirror.reg
deleted file mode 100644
index ecf092fe..00000000
--- a/colorer/configs/base/hrd/reg.addons/console/mirror.reg
+++ /dev/null
@@ -1,19 +0,0 @@
-REGEDIT4
-
-[HKEY_CURRENT_USER\Console]
-"ColorTable00"=dword:00000000
-"ColorTable01"=dword:00AA0000
-"ColorTable02"=dword:0000AA00
-"ColorTable03"=dword:00AAAA00
-"ColorTable04"=dword:000000AA
-"ColorTable05"=dword:00AA22CC
-"ColorTable06"=dword:00CC0000
-"ColorTable07"=dword:00AAAAAA
-"ColorTable08"=dword:00777777
-"ColorTable09"=dword:00FF5555
-"ColorTable10"=dword:0055FF55
-"ColorTable11"=dword:00FFFF55
-"ColorTable12"=dword:005555FF
-"ColorTable13"=dword:00FF55FF
-"ColorTable14"=dword:0055FFFF
-"ColorTable15"=dword:00FFFFFF
diff --git a/colorer/configs/base/hrd/rgb/blue.hrd b/colorer/configs/base/hrd/rgb/blue.hrd
index 8a1fc730..ac7e97e7 100644
--- a/colorer/configs/base/hrd/rgb/blue.hrd
+++ b/colorer/configs/base/hrd/rgb/blue.hrd
@@ -4,8 +4,8 @@
<hrd xmlns="http://colorer.sf.net/2003/hrd">
<assign name="def:Text" fore="#DADAFF" back="#000060"/>
- <assign name="def:HorzCross" fore="#000000" back="#e8e7f8"/>
- <assign name="def:VertCross" fore="#000000" back="#e1e0f2"/>
+ <assign name="def:HorzCross" fore="#DADAFF" back="#202080"/>
+ <assign name="def:VertCross" fore="#DADAFF" back="#202080"/>
<assign name="def:Number" fore="#00E000"/>
<assign name="def:NumberDec" fore="#00D000"/>
diff --git a/colorer/configs/base/hrd/rgb/default.hrd b/colorer/configs/base/hrd/rgb/default.hrd
new file mode 100644
index 00000000..1a916823
--- /dev/null
+++ b/colorer/configs/base/hrd/rgb/default.hrd
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE hrd PUBLIC "-//Cail Lomecb//DTD Colorer HRD take5//EN"
+ "http://colorer.sf.net/2003/hrd.dtd">
+<hrd xmlns="http://colorer.sf.net/2003/hrd">
+
+ <assign name="def:Text" fore="#99EEEE" back="#000060"/>
+ <assign name="def:HorzCross" fore="#99EEEE" back="#000020"/>
+ <assign name="def:VertCross" fore="#99EEEE" back="#000020"/>
+
+ <assign name="def:Number" fore="#00E000"/>
+ <assign name="def:NumberDec" fore="#00E020"/>
+ <assign name="def:NumberHex" fore="#00E040"/>
+ <assign name="def:NumberOct" fore="#00E060"/>
+ <assign name="def:NumberBin" fore="#40E040"/>
+ <assign name="def:NumberFloat" fore="#00E080"/>
+ <assign name="def:NumberSuffix" fore="#008000"/>
+
+ <assign name="def:String" fore="#88ee88"/>
+ <assign name="def:StringContent" fore="#ee8888" style='1'/>
+ <assign name="def:StringEdge" fore="#40a000"/>
+ <assign name="def:Character" fore="#aaff88"/>
+ <assign name="def:CharacterContent" fore="#aaff88"/>
+
+ <assign name="def:Comment" fore="#696989"/>
+ <assign name="def:CommentContent" fore="#7F9FBF" style='1'/>
+ <assign name="def:CommentEdge" fore="#606060"/>
+ <assign name="def:CommentDoc" fore="#7FBFDF"/>
+ <assign name="def:CommentDocEdge" fore="#606060"/>
+
+ <assign name="def:Symbol" fore="#E0E0C0"/>
+ <assign name="def:SymbolStrong" fore="#F0E080"/>
+ <assign name="def:Prefix" fore="#6699ff"/>
+ <assign name="def:PrefixStrong" fore="#4466ee"/>
+
+ <assign name="def:Operator" fore="#44aadd"/>
+
+ <assign name="def:Keyword" fore="#FFFFFF"/>
+ <assign name="def:KeywordStrong" fore="#bb7977" style='1'/>
+ <assign name="def:TypeKeyword" fore="#ffaf9f"/>
+
+ <assign name="def:FunctionKeyword" fore="#DDDD00"/>
+ <assign name="def:DeprecatedKeyword" fore="#DD6622"/>
+ <assign name="def:InterfaceKeyword" fore="#DD8800"/>
+ <assign name="def:ClassKeyword" fore="#EFB977" style='1'/>
+ <assign name="def:StructKeyword" fore="#EEEEA0"/>
+ <assign name="def:TypeKeyword" fore="#FFE977"/>
+
+ <assign name="def:Function" fore="#6060F0"/>
+ <assign name="def:Register" fore="#60F060"/>
+ <assign name="def:Constant" fore="#a00065"/>
+ <assign name="def:Var" fore="#95b9d7"/>
+ <assign name="def:VarStrong" fore="#f089A7"/>
+ <assign name="def:Identifier" fore="#005fd2"/>
+ <assign name="def:BooleanConstant" fore="#a030ff"/>
+
+ <assign name="def:Directive" fore="#40ff00"/>
+ <assign name="def:Parameter" fore="#e7a6e7"/>
+ <assign name="def:ParameterStrong" fore="#00eedd"/>
+ <assign name="def:ParameterUnknown" fore="#ee8822"/>
+
+ <assign name="def:Tag" fore="#F0F0D0"/>
+ <assign name="def:OpenTag" fore="#A697F0"/>
+ <assign name="def:CloseTag" fore="#A697F0"/>
+
+ <assign name="def:Label" fore="#5aa3ff" style='4'/>
+ <assign name="def:LabelStrong" fore="#5aa3ff" back="#200050"/>
+
+ <assign name="def:Insertion" fore="#DADAFF" back="#200050"/>
+ <assign name="def:InsertionStart" fore="#DADAFF" back="#200050"/>
+ <assign name="def:InsertionEnd" fore="#DADAFF" back="#200050"/>
+
+ <assign name="def:Error" fore="#EEEEFF" back="#883355" style='3'/>
+ <assign name="def:ErrorText" fore="#ee00ee" style='1'/>
+
+ <assign name="def:TODO" fore="#ffffff" back="#606000" style='4'/>
+ <assign name="def:Debug" fore="#80abfd" back="#007084"/>
+
+ <assign name="def:Path" fore="#f040e0"/>
+ <assign name="def:URI" fore="#5555DD"/>
+ <assign name="def:EMail" fore="#7144c4"/>
+
+ <assign name="def:Date" fore="#009797"/>
+ <assign name="def:Time" fore="#8745a0"/>
+
+ <assign name="def:PairStart" fore="#FF4040" back='#000050' style='4'/>
+ <assign name="def:PairEnd" fore="#FF4040" back='#000050' style='4'/>
+ <assign name="def:PairStrongStart" fore="#EE00EE"/>
+ <assign name="def:PairStrongEnd" fore="#EE00EE"/>
+
+</hrd>
diff --git a/colorer/configs/plug/colorere.hlf b/colorer/configs/plug/colorere.hlf
index 16e50706..fcf7cf0a 100644
--- a/colorer/configs/plug/colorere.hlf
+++ b/colorer/configs/plug/colorere.hlf
@@ -68,13 +68,13 @@ $^#FarColorer's settings.#
#Log file#
Full path in this field specifies the file that will store diagnostic messages
- #--------------------------TrueMod Settings------------------------#
+ #--------------------------TrueColor Settings------------------------#
#Enabled#
- Enable/Disable TrueMod in plugin.
+ Enable/Disable using of TrueColor (24 bit RGB) coloring.
#Color style:#
- Choose a color style, which will be used for coloring text in TrueMod.
- The choice does not work if the plugin is disabled.
+ Choose a color style, which will be used for coloring text in TrueColor.
+ The choice does not work if TrueColor is disabled or unavailable.
#------------------------------------------------------------------#
#[ Test schema library ]#
diff --git a/colorer/configs/plug/colorere.lng b/colorer/configs/plug/colorere.lng
index 1e718ff3..02d42484 100644
--- a/colorer/configs/plug/colorere.lng
+++ b/colorer/configs/plug/colorere.lng
@@ -48,7 +48,7 @@
"Fatal error loading the library schemes"
"Select color style"
"Change Editor &background"
-"TrueMod settings"
+"TrueColor settings"
"&Users file of color styles"
"Users &file of schemes"
"Sc&heme settings"
diff --git a/colorer/configs/plug/colorerr.hlf b/colorer/configs/plug/colorerr.hlf
index 95efe54d..d2fa991d 100644
--- a/colorer/configs/plug/colorerr.hlf
+++ b/colorer/configs/plug/colorerr.hlf
@@ -73,13 +73,13 @@ $^#Настройки плагина.#
#Log файл#
Полный путь в этом поле задает файл, в который будут записываться диагностические сообщения.
- #--------------------------Настройки TrueMod-----------------------#
+ #--------------------------Настройки TrueColor-----------------------#
#Включить#
- Включает/Отключает TrueMod режим работы плагина.
+ Включает/Отключает TrueColor (24-битные цвета) режим работы плагина.
#Цветовой стиль#
Выбор цветового стиля, который будет использоваться при раскраске текста в
- режиме TrueMod. Выбор не работает, если плагин отключен или недоступен режим TrueMod.
+ режиме TrueColor. Выбор не работает, если TrueColor отключен или недоступен.
#------------------------------------------------------------------#
#[ Тест библиотеки схем ]#
diff --git a/colorer/configs/plug/colorerr.lng b/colorer/configs/plug/colorerr.lng
index 7d25bc86..61e57158 100644
--- a/colorer/configs/plug/colorerr.lng
+++ b/colorer/configs/plug/colorerr.lng
@@ -48,7 +48,7 @@
"Ошибка загрузки библиотеки схем"
"Выбор цветового стиля"
"И&зменять цвет фона редактора"
-"Настройки TrueMod"
+"Настройки TrueColor"
"&Файл цветовых стилей пользователя"
"Фай&л списка схем пользователя"
"&Настройки схем"
diff --git a/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.cpp b/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.cpp
index 026d7ff2..e5e14b8f 100644
--- a/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.cpp
+++ b/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.cpp
@@ -13,14 +13,7 @@ FileTypeImpl::FileTypeImpl(HRCParserImpl* hrcParser): name(nullptr), group(nullp
}
FileTypeImpl::~FileTypeImpl(){
- for(auto it : chooserVector){
- delete it;
- }
chooserVector.clear();
-
- for(const auto& it: paramsHash){
- delete it.second;
- }
paramsHash.clear();
importVector.clear();
@@ -135,7 +128,7 @@ size_t FileTypeImpl::getParamUserValueCount() const{
double FileTypeImpl::getPriority(const String *fileName, const String *fileContent) const{
SMatches match;
double cur_prior = 0;
- for(auto ftc : chooserVector){
+ for(const auto &ftc : chooserVector){
if (fileName != nullptr && ftc->isFileName() && ftc->getRE()->parse(fileName, &match))
cur_prior += ftc->getPriority();
if (fileContent != nullptr && ftc->isFileContent() && ftc->getRE()->parse(fileContent, &match))
diff --git a/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.h b/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.h
index 5c7fa8a2..12e5c85c 100644
--- a/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.h
+++ b/colorer/src/Colorer-library/src/colorer/parsers/FileTypeImpl.h
@@ -91,8 +91,8 @@ protected:
HRCParserImpl *hrcParser;
SchemeImpl *baseScheme;
- std::vector<FileTypeChooser*> chooserVector;
- std::unordered_map<SString, TypeParameter*> paramsHash;
+ std::vector<std::unique_ptr<FileTypeChooser> > chooserVector;
+ std::unordered_map<SString, std::unique_ptr<TypeParameter> > paramsHash;
std::vector<UString> importVector;
uXmlInputSource inputSource;
diff --git a/colorer/src/Colorer-library/src/colorer/parsers/HRCParserImpl.cpp b/colorer/src/Colorer-library/src/colorer/parsers/HRCParserImpl.cpp
index 66a49a27..d318817c 100644
--- a/colorer/src/Colorer-library/src/colorer/parsers/HRCParserImpl.cpp
+++ b/colorer/src/Colorer-library/src/colorer/parsers/HRCParserImpl.cpp
@@ -368,7 +368,7 @@ void HRCParserImpl::addPrototypeDetectParam(const xercesc::DOMElement* elem)
CString weight = CString(elem->getAttribute(hrcFilenameAttrWeight));
UnicodeTools::getNumber(&weight, &prior);
auto* ftc = new FileTypeChooser(ctype, prior, matchRE);
- parseProtoType->chooserVector.push_back(ftc);
+ parseProtoType->chooserVector.emplace_back(ftc);
}
void HRCParserImpl::addPrototypeParameters(const xercesc::DOMElement* elem)
diff --git a/colorer/src/pcolorer2/FarEditor.cpp b/colorer/src/pcolorer2/FarEditor.cpp
index a4a7542a..b5a5caf9 100644
--- a/colorer/src/pcolorer2/FarEditor.cpp
+++ b/colorer/src/pcolorer2/FarEditor.cpp
@@ -606,23 +606,21 @@ int FarEditor::editorEvent(int event, void *param)
// fills back
if (lno == ei.CurLine && showHorizontalCross){
- if (!TrueMod){
- addFARColor(lno, 0, ei.LeftPos + ei.WindowSizeX, horzCrossColor);
- }
- else{
- addFARColor(lno, 0, ei.LeftPos + ei.WindowSizeX, convert(nullptr));
- }
+ addFARColor(lno, 0, ei.LeftPos + ei.WindowSizeX, horzCrossColor);
}
else{
addFARColor(lno, 0, ei.LeftPos + ei.WindowSizeX, convert(nullptr));
}
- if (showVerticalCross && !TrueMod){
+ if (showVerticalCross){
+ auto col = vertCrossColor;
ecp_cl.StringNumber = lno;
ecp_cl.SrcPos = ecp.DestPos;
info->EditorControl(ECTL_TABTOREAL, &ecp_cl);
- vertCrossColor.concolor |= 0x10000;
- addFARColor(lno, ecp_cl.DestPos, ecp_cl.DestPos+1, vertCrossColor);
+ if (!TrueMod) {
+ col.concolor |= 0x10000;
+ }
+ addFARColor(lno, ecp_cl.DestPos, ecp_cl.DestPos+1, col);
};
bool vertCrossDone = false;
@@ -645,17 +643,22 @@ int FarEditor::editorEvent(int event, void *param)
if ((lno != ei.CurLine || !showHorizontalCross || crossZOrder == 0)){
color col = convert(l1->styled());
- //TODO
if (lno == ei.CurLine && showHorizontalCross){
- if (!TrueMod){
if (foreDefault(col)){
- col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ if (!TrueMod){
+ col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ } else {
+ col.fg = horzCrossColor.fg;
+ }
}
if (backDefault(col)){
- col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ if (!TrueMod){
+ col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ } else {
+ col.bk = horzCrossColor.bk;
+ }
}
- }
};
if (!col.concolor){
continue;
@@ -675,29 +678,44 @@ int FarEditor::editorEvent(int event, void *param)
};
// column
- if (!TrueMod && showVerticalCross && crossZOrder == 0 && l1->start <= ecp_cl.DestPos && ecp_cl.DestPos < lend){
+ if (showVerticalCross && crossZOrder == 0 && l1->start <= ecp_cl.DestPos && ecp_cl.DestPos < lend){
col = convert(l1->styled());
+ if (foreDefault(col)) {
+ if (!TrueMod){
+ col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ } else {
+ col.fg = horzCrossColor.fg;
+ }
+ }
- if (foreDefault(col)) col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
-
- if (backDefault(col)) col.concolor = (col.concolor&0xF) + (vertCrossColor.concolor&0xF0);
+ if (backDefault(col)) {
+ if (!TrueMod){
+ col.concolor = (col.concolor&0xF) + (vertCrossColor.concolor&0xF0);
+ } else {
+ col.bk = horzCrossColor.bk;
+ }
+ }
ecp_cl.StringNumber = lno;
ecp_cl.SrcPos = ecp.DestPos;
info->EditorControl(ECTL_TABTOREAL, &ecp_cl);
- col.concolor|=0x10000;
+ if (!TrueMod) {
+ col.concolor|=0x10000;
+ }
addFARColor(lno, ecp_cl.DestPos, ecp_cl.DestPos+1, col);
vertCrossDone = true;
};
};
};
};
- if (!TrueMod && showVerticalCross && !vertCrossDone){
+ if (showVerticalCross && !vertCrossDone){
ecp_cl.StringNumber = lno;
ecp_cl.SrcPos = ecp.DestPos;
info->EditorControl(ECTL_TABTOREAL, &ecp_cl);
- vertCrossColor.concolor |= 0x10000;
+ if (!TrueMod) {
+ vertCrossColor.concolor |= 0x10000;
+ }
addFARColor(lno, ecp_cl.DestPos, ecp_cl.DestPos+1, vertCrossColor);
};
};
@@ -712,29 +730,44 @@ int FarEditor::editorEvent(int event, void *param)
if (pm != nullptr){
color col = convert(pm->start->styled());
- // TODO
- if (!TrueMod && showHorizontalCross){
+ if (showHorizontalCross){
if (foreDefault(col)){
- col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ } else {
+ col.fg = horzCrossColor.fg;
+ }
}
if (backDefault(col)){
- col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ } else {
+ col.bk = horzCrossColor.bk;
+ }
}
};
//
addFARColor(ei.CurLine, pm->start->start, pm->start->end, col);
// TODO
- if (!TrueMod && showVerticalCross && !showHorizontalCross && pm->start->start <= ei.CurPos && ei.CurPos < pm->start->end){
+ if (showVerticalCross && !showHorizontalCross && pm->start->start <= ei.CurPos && ei.CurPos < pm->start->end){
col = convert(pm->start->styled());
if (foreDefault(col)){
- col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ } else {
+ col.fg = vertCrossColor.fg;
+ }
}
if (backDefault(col)){
- col.concolor = (col.concolor&0xF) + (vertCrossColor.concolor&0xF0);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF) + (vertCrossColor.concolor&0xF0);
+ } else {
+ col.bk = vertCrossColor.bk;
+ }
}
col.concolor|=0x10000;
@@ -745,13 +778,21 @@ int FarEditor::editorEvent(int event, void *param)
col = convert(pm->end->styled());
//
- if (!TrueMod && showHorizontalCross && pm->eline == ei.CurLine){
+ if (showHorizontalCross && pm->eline == ei.CurLine){
if (foreDefault(col)){
- col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF0) + (horzCrossColor.concolor&0xF);
+ } else {
+ col.fg = horzCrossColor.fg;
+ }
}
if (backDefault(col)){
- col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF) + (horzCrossColor.concolor&0xF0);
+ } else {
+ col.bk = horzCrossColor.bk;
+ }
}
};
//
@@ -761,15 +802,23 @@ int FarEditor::editorEvent(int event, void *param)
info->EditorControl(ECTL_TABTOREAL, &ecp);
//
- if (!TrueMod && showVerticalCross && pm->end->start <= ecp.DestPos && ecp.DestPos < pm->end->end){
+ if (showVerticalCross && pm->end->start <= ecp.DestPos && ecp.DestPos < pm->end->end){
col = convert(pm->end->styled());
if (foreDefault(col)){
- col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ } else {
+ col.fg = vertCrossColor.fg;
+ }
}
if (backDefault(col)){
- col.concolor = (col.concolor&0xF) + (vertCrossColor.concolor&0xF0);
+ if (!TrueMod) {
+ col.concolor = (col.concolor&0xF0) + (vertCrossColor.concolor&0xF);
+ } else {
+ col.bk = vertCrossColor.bk;
+ }
}
col.concolor|=0x10000;
@@ -1249,12 +1298,58 @@ bool FarEditor::backDefault(color col)
void FarEditor::addFARColor(int lno, int s, int e, color col)
{
if (TrueMod){
+/*
AnnotationInfo ai;
ai.fg_color = ((col.fg>>16)&0xFF) + (col.fg&0x00FF00) + ((col.fg&0xFF)<<16);
ai.bk_color = ((col.bk>>16)&0xFF) + (col.bk&0x00FF00) + ((col.bk&0xFF)<<16);
ai.bk_valid = ai.fg_valid = 1;
ai.style = col.style;
addAnnotation(lno, s, e, ai);
+*/
+ EditorTrueColor ec{};
+ ec.Base.StringNumber = lno;
+ ec.Base.StartPos = s;
+ ec.Base.EndPos = e-1;
+ if (col.fg || col.bk) {
+ if (col.style & AI_STYLE_UNDERLINE) {
+ ec.Base.Color|= COMMON_LVB_UNDERSCORE;
+ }
+ if (col.style & AI_STYLE_STRIKEOUT) {
+ ec.Base.Color|= COMMON_LVB_STRIKEOUT;
+ }
+ ec.TrueFore.R = ((col.fg >> 16) & 0xFF);
+ ec.TrueFore.G = ((col.fg >> 8) & 0xFF);
+ ec.TrueFore.B = ((col.fg) & 0xFF);
+ ec.TrueFore.Flags = 1;
+ ec.TrueBack.R = ((col.bk >> 16) & 0xFF);
+ ec.TrueBack.G = ((col.bk >> 8) & 0xFF);
+ ec.TrueBack.B = ((col.bk) & 0xFF);
+ ec.TrueBack.Flags = 1;
+
+ if (ec.TrueFore.R > 0x10) ec.Base.Color|= FOREGROUND_RED;
+ if (ec.TrueFore.G > 0x10) ec.Base.Color|= FOREGROUND_GREEN;
+ if (ec.TrueFore.B > 0x10) ec.Base.Color|= FOREGROUND_BLUE;
+
+ if (ec.TrueBack.R > 0x10) ec.Base.Color|= BACKGROUND_RED;
+ if (ec.TrueBack.G > 0x10) ec.Base.Color|= BACKGROUND_GREEN;
+ if (ec.TrueBack.B > 0x10) ec.Base.Color|= BACKGROUND_BLUE;
+
+ if (ec.TrueFore.R > 0x80 || ec.TrueFore.G > 0x80 || ec.TrueFore.B > 0x80) {
+ ec.Base.Color = FOREGROUND_INTENSITY;
+ }
+
+ if (ec.Base.Color == 0 || ec.TrueBack.R > 0x80 || ec.TrueBack.G > 0x80 || ec.TrueBack.B > 0x80) {
+ ec.Base.Color = BACKGROUND_INTENSITY;
+ }
+ }
+
+#if 0
+ CLR_TRACE("FarEditor", "line:%d, %d-%d, color:%x", lno, s, e, col);
+#endif // if 0
+ info->EditorControl(ECTL_ADDTRUECOLOR, &ec);
+#if 0
+ CLR_TRACE("FarEditor", "line %d: %d-%d: color=%x", lno, s, e, col);
+#endif // if 0
}else{
EditorColor ec;
ec.StringNumber = lno;
diff --git a/colorer/src/pcolorer2/FarEditorSet.cpp b/colorer/src/pcolorer2/FarEditorSet.cpp
index 4a23725c..ff144d6a 100644
--- a/colorer/src/pcolorer2/FarEditorSet.cpp
+++ b/colorer/src/pcolorer2/FarEditorSet.cpp
@@ -1350,19 +1350,14 @@ bool FarEditorSet::checkConEmu()
return conemu;*/
}
-bool FarEditorSet::checkFarTrueMod() //TODO
+bool FarEditorSet::checkFarTrueMod()
{
- return false;
- /*EditorAnnotation ea;
- ea.StringNumber = 1;
- ea.StartPos = 1;
- ea.EndPos = 2;
- return !!Info.EditorControl(ECTL_ADDANNOTATION, &ea);*/
+ return WINPORT(GetConsoleColorPalette)() >= 24;
}
bool FarEditorSet::checkConsoleAnnotationAvailable()
{
- return checkConEmu()&&checkFarTrueMod();
+ return checkFarTrueMod();
}
bool FarEditorSet::SetBgEditor()
@@ -1753,18 +1748,15 @@ void FarEditorSet::SaveChangedValueParam(HANDLE hDlg)
//если его изменили
if (!v.equals(def_value)){
if (type->getParamValue(p)==nullptr){
- ((FileTypeImpl*)type)->addParam(&p);
+ type->addParam(&p);
}
type->setParamValue(p,&v);
}
}else{//было пользовательское значение
if (!v.equals(value)){//changed
if (v.equals(def_value)){
- //delete value
- delete type->getParamUserValue(p);
- ((FileTypeImpl*)type)->removeParamValue(p);
+ type->removeParamValue(p);
}else{
- delete type->getParamUserValue(p);
type->setParamValue(p,&v);
}
}
diff --git a/far2l/far2sdk/farplug-mb.h b/far2l/far2sdk/farplug-mb.h
index 5adb2399..94dcf10c 100644
--- a/far2l/far2sdk/farplug-mb.h
+++ b/far2l/far2sdk/farplug-mb.h
@@ -1232,6 +1232,8 @@ namespace oldfar
ECTL_DELETESTACKBOOKMARK,
ECTL_GETSTACKBOOKMARKS,
ECTL_SERVICEREGION,
+ ECTL_ADDTRUECOLOR,
+ ECTL_GETTRUECOLOR,
};
enum EDITOR_SETPARAMETER_TYPES
@@ -1406,6 +1408,21 @@ namespace oldfar
int Color;
};
+ struct FarTrueColor
+ {
+ unsigned char R;
+ unsigned char G;
+ unsigned char B;
+ unsigned char Flags; // bit one - 'active' flag, others - ignored and must be set to zero
+ };
+
+ struct EditorTrueColor
+ {
+ struct EditorColor Base;
+ struct FarTrueColor TrueFore;
+ struct FarTrueColor TrueBack;
+ };
+
struct EditorSaveFile
{
char FileName[NM];
diff --git a/far2l/far2sdk/farplug-wide.h b/far2l/far2sdk/farplug-wide.h
index 09bc01ac..7561f803 100644
--- a/far2l/far2sdk/farplug-wide.h
+++ b/far2l/far2sdk/farplug-wide.h
@@ -1568,10 +1568,12 @@ enum EDITOR_CONTROL_COMMANDS
ECTL_GETSTACKBOOKMARKS,
ECTL_UNDOREDO,
ECTL_GETFILENAME,
-#ifdef FAR_USE_INTERNALS
- ECTL_SERVICEREGION,
-#endif // END FAR_USE_INTERNALS
+ ECTL_ADDTRUECOLOR,
+ ECTL_GETTRUECOLOR,
};
+//#ifdef FAR_USE_INTERNALS
+// ECTL_SERVICEREGION, // WTF
+//#endif // END FAR_USE_INTERNALS
enum EDITOR_SETPARAMETER_TYPES
{
@@ -1763,6 +1765,21 @@ struct EditorColor
int Color;
};
+struct FarTrueColor
+{
+ unsigned char R;
+ unsigned char G;
+ unsigned char B;
+ unsigned char Flags; // bit one - 'active' flag, others - ignored and must be set to zero
+};
+
+struct EditorTrueColor
+{
+ struct EditorColor Base;
+ struct FarTrueColor TrueFore;
+ struct FarTrueColor TrueBack;
+};
+
struct EditorSaveFile
{
const wchar_t *FileName;
diff --git a/far2l/src/cfg/config.cpp b/far2l/src/cfg/config.cpp
index bbada1c2..be022f91 100644
--- a/far2l/src/cfg/config.cpp
+++ b/far2l/src/cfg/config.cpp
@@ -856,7 +856,8 @@ static struct FARConfig
{1, REG_DWORD, NKeySystem, "DriveMenuMode2",&Opt.ChangeDriveMode,(DWORD)-1, 0},
{1, REG_DWORD, NKeySystem, "DriveDisconnetMode",&Opt.ChangeDriveDisconnetMode,1, 0},
- {1, REG_SZ, NKeySystem, "DriveExceptions",&Opt.ChangeDriveExceptions, 0, L"/System/*;/proc;/proc/*;/sys;/sys/*;/dev;/dev/*;/run;/run/*;/tmp;/snap;/snap/*;/private;/private/*"},
+ {1, REG_SZ, NKeySystem, "DriveExceptions",&Opt.ChangeDriveExceptions, 0,
+ L"/System/*;/proc;/proc/*;/sys;/sys/*;/dev;/dev/*;/run;/run/*;/tmp;/snap;/snap/*;/private;/private/*;/var/lib/lxcfs;/var/snap/*;/var/spool/cron"},
{1, REG_SZ, NKeySystem, "DriveColumn2",&Opt.ChangeDriveColumn2, 0, L"$U/$T"},
{1, REG_SZ, NKeySystem, "DriveColumn3",&Opt.ChangeDriveColumn3, 0, L"$S$D"},
diff --git a/far2l/src/console/AnsiEsc.cpp b/far2l/src/console/AnsiEsc.cpp
index 1980552e..606aa8f3 100644
--- a/far2l/src/console/AnsiEsc.cpp
+++ b/far2l/src/console/AnsiEsc.cpp
@@ -148,6 +148,7 @@ void FontState::ParseSuffixM(const int *args, int argc)
concealed = false;
bold = false;
underline = false;
+ strikeout = false;
case 39:
case 49: {
@@ -177,6 +178,9 @@ void FontState::ParseSuffixM(const int *args, int argc)
case 8:
concealed = 1;
break;
+ case 9:
+ strikeout = true;
+ break;
case 21: // oops, this actually turns on double underline
// but xterm turns off bold too, so that's alright
case 22:
@@ -192,6 +196,9 @@ void FontState::ParseSuffixM(const int *args, int argc)
case 28:
concealed = 0;
break;
+ case 29:
+ strikeout = false;
+ break;
}
}
}
@@ -200,6 +207,7 @@ void FontState::FromConsoleAttributes(DWORD64 qAttributes)
{
bold = (qAttributes & FOREGROUND_INTENSITY) != 0;
underline = (qAttributes & COMMON_LVB_UNDERSCORE) != 0;
+ strikeout = (qAttributes & COMMON_LVB_STRIKEOUT) != 0;
rvideo = (qAttributes & COMMON_LVB_REVERSE_VIDEO) != 0;
foreground = Attr2Ansi[qAttributes & 7];
background = Attr2Ansi[(qAttributes >> 4) & 7];
@@ -250,6 +258,10 @@ DWORD64 FontState::ToConsoleAttributes()
attribut|= COMMON_LVB_UNDERSCORE;
}
+ if (strikeout) {
+ attribut|= COMMON_LVB_STRIKEOUT;
+ }
+
return attribut;
}
diff --git a/far2l/src/console/AnsiEsc.hpp b/far2l/src/console/AnsiEsc.hpp
index f2278682..576bace8 100644
--- a/far2l/src/console/AnsiEsc.hpp
+++ b/far2l/src/console/AnsiEsc.hpp
@@ -14,6 +14,7 @@ namespace AnsiEsc
BYTE background = 0; // ANSI base color (0 to 7; add 40)
bool bold = false; //
bool underline = false; //
+ bool strikeout = false; //
bool rvideo = false; // swap console foreground & background attributes
bool concealed = false; // set foreground/bold to background/underline
diff --git a/far2l/src/console/keyboard.cpp b/far2l/src/console/keyboard.cpp
index 3b398280..bac76058 100644
--- a/far2l/src/console/keyboard.cpp
+++ b/far2l/src/console/keyboard.cpp
@@ -797,7 +797,7 @@ DWORD GetInputRecord(INPUT_RECORD *rec,bool ExcludeMacro,bool ProcessMouse,bool
return(KEY_NONE);
}
- if (!WaitInMainLoop && LoopCount == 4)
+ if (!WaitInMainLoop && LoopCount == 3)
{
LastEventIdle = TRUE;
ZeroFill(*rec);
diff --git a/far2l/src/console/scrbuf.cpp b/far2l/src/console/scrbuf.cpp
index 831856a9..f256692e 100644
--- a/far2l/src/console/scrbuf.cpp
+++ b/far2l/src/console/scrbuf.cpp
@@ -186,7 +186,7 @@ void ScreenBuf::Read(int X1,int Y1,int X2,int Y2,CHAR_INFO *Text,int MaxTextLeng
/* Изменить значение цветовых атрибутов в соответствии с маской
(в основном применяется для "создания" тени)
*/
-void ScreenBuf::ApplyColorMask(int X1,int Y1,int X2,int Y2,WORD ColorMask)
+void ScreenBuf::ApplyColorMask(int X1,int Y1,int X2,int Y2,DWORD64 ColorMask)
{
CriticalSectionLock Lock(CS);
int Width=X2-X1+1;
@@ -216,7 +216,7 @@ void ScreenBuf::ApplyColorMask(int X1,int Y1,int X2,int Y2,WORD ColorMask)
/* Непосредственное изменение цветовых атрибутов
*/
-void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,WORD Color)
+void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,DWORD64 Color)
{
CriticalSectionLock Lock(CS);
if(X1<=ScrX && Y1<=ScrY && X2>=0 && Y2>=0)
@@ -253,7 +253,7 @@ void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,WORD Color)
/* Непосредственное изменение цветовых атрибутов с заданым цетом исключением
*/
-void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,int Color,WORD ExceptColor)
+void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,DWORD64 Color,DWORD64 ExceptColor)
{
CriticalSectionLock Lock(CS);
if(X1<=ScrX && Y1<=ScrY && X2>=0 && Y2>=0)
@@ -285,7 +285,7 @@ void ScreenBuf::ApplyColor(int X1,int Y1,int X2,int Y2,int Color,WORD ExceptColo
/* Закрасить прямоугольник символом Ch и цветом Color
*/
-void ScreenBuf::FillRect(int X1,int Y1,int X2,int Y2,WCHAR Ch,WORD Color)
+void ScreenBuf::FillRect(int X1,int Y1,int X2,int Y2,WCHAR Ch,DWORD64 Color)
{
CriticalSectionLock Lock(CS);
int Width=X2-X1+1;
diff --git a/far2l/src/console/scrbuf.hpp b/far2l/src/console/scrbuf.hpp
index 497c01a9..f7ad4b4f 100644
--- a/far2l/src/console/scrbuf.hpp
+++ b/far2l/src/console/scrbuf.hpp
@@ -81,10 +81,10 @@ class ScreenBuf
void RestoreMacroChar();
void RestoreElevationChar();
- void ApplyColorMask(int X1,int Y1,int X2,int Y2,WORD ColorMask);
- void ApplyColor(int X1,int Y1,int X2,int Y2,WORD Color);
- void ApplyColor(int X1,int Y1,int X2,int Y2,int Color,WORD ExceptColor);
- void FillRect(int X1,int Y1,int X2,int Y2,WCHAR Ch,WORD Color);
+ void ApplyColorMask(int X1,int Y1,int X2,int Y2,DWORD64 ColorMask);
+ void ApplyColor(int X1,int Y1,int X2,int Y2,DWORD64 Color);
+ void ApplyColor(int X1,int Y1,int X2,int Y2,DWORD64 Color,DWORD64 ExceptColor);
+ void FillRect(int X1,int Y1,int X2,int Y2,WCHAR Ch,DWORD64 Color);
void Scroll(int);
void Flush();
diff --git a/far2l/src/copy.cpp b/far2l/src/copy.cpp
index 5487f4ec..eca74951 100644
--- a/far2l/src/copy.cpp
+++ b/far2l/src/copy.cpp
@@ -2975,14 +2975,17 @@ DWORD ShellFileTransfer::PieceCopy()
if (BytesWritten > BytesRead)
{ // likely we written bit more due to no_buffering requires aligned io
// move backward and correct file size
- _DestFile.SetPointer((INT64)BytesRead - (INT64)WriteSize, nullptr, FILE_CURRENT);
- _DestFile.SetEnd();
+ if (!_DestFile.SetPointer((INT64)BytesRead - (INT64)WriteSize, nullptr, FILE_CURRENT))
+ throw ErrnoSaver();
+ if (!_DestFile.SetEnd())
+ throw ErrnoSaver();
return BytesRead;
}
if (BytesWritten < BytesRead)
{ // if written less than read then need to rewind source file by difference
- _SrcFile.SetPointer((INT64)BytesWritten - (INT64)BytesRead, nullptr, FILE_CURRENT);
+ if (!_SrcFile.SetPointer((INT64)BytesWritten - (INT64)BytesRead, nullptr, FILE_CURRENT))
+ throw ErrnoSaver();
}
return BytesWritten;
diff --git a/far2l/src/edit.cpp b/far2l/src/edit.cpp
index e58c26ee..01efa5be 100644
--- a/far2l/src/edit.cpp
+++ b/far2l/src/edit.cpp
@@ -58,6 +58,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "history.hpp"
#include "vmenu.hpp"
#include "chgmmode.hpp"
+#include "VT256ColorTable.h"
#include <cwctype>
static int Recurse=0;
@@ -115,8 +116,6 @@ Edit::Edit(ScreenObject *pOwner, Callback* aCallback, bool bAllocateData):
SelColor=F_WHITE|B_BLACK;
ColorUnChanged=COL_DIALOGEDITUNCHANGED;
EndType=EOL_NONE;
- ColorList=nullptr;
- ColorCount=0;
TabSize=Opt.EdOpt.TabSize;
TabExpandMode = EXPAND_NOTABS;
Flags.Change(FEDITLINE_DELREMOVESBLOCKS,Opt.EdOpt.DelRemovesBlocks);
@@ -128,9 +127,6 @@ Edit::Edit(ScreenObject *pOwner, Callback* aCallback, bool bAllocateData):
Edit::~Edit()
{
- if (ColorList)
- free(ColorList);
-
if (Mask)
free(Mask);
@@ -2453,53 +2449,42 @@ void Edit::DeleteBlock()
}
-void Edit::AddColor(ColorItem *col)
+void Edit::AddColor(const ColorItem *col)
{
- if (!(ColorCount & 15))
- ColorList=(ColorItem *)realloc(ColorList,(ColorCount+16)*sizeof(*ColorList));
-
- ColorList[ColorCount++]=*col;
+ ColorList.emplace_back(*col);
}
-int Edit::DeleteColor(int ColorPos)
+size_t Edit::DeleteColor(int ColorPos)
{
- int Src;
-
- if (!ColorCount)
- return FALSE;
+ if (ColorList.empty())
+ return 0;
- int Dest=0;
+ size_t Dest, Src;
- for (Src=0; Src<ColorCount; Src++)
- if (ColorPos!=-1 && ColorList[Src].StartPos!=ColorPos)
+ for (Src = Dest = 0; Src < ColorList.size(); ++Src)
+ if (ColorPos != -1 && ColorList[Src].StartPos != ColorPos)
{
- if (Dest!=Src)
- ColorList[Dest]=ColorList[Src];
+ if (Dest != Src)
+ ColorList[Dest] = ColorList[Src];
- Dest++;
+ ++Dest;
}
- int DelCount=ColorCount-Dest;
- ColorCount=Dest;
-
- if (!ColorCount)
- {
- free(ColorList);
- ColorList=nullptr;
- }
+ const size_t DelCount = ColorList.size() - Dest;
+ ColorList.resize(Dest);
- return(DelCount);
+ return DelCount;
}
-int Edit::GetColor(ColorItem *col,int Item)
+bool Edit::GetColor(ColorItem *col, int Item)
{
- if (Item >= ColorCount)
- return FALSE;
+ if (Item >= (int)ColorList.size())
+ return false;
- *col=ColorList[Item];
- return TRUE;
+ *col = ColorList[Item];
+ return true;
}
@@ -2509,50 +2494,57 @@ void Edit::ApplyColor()
int Pos = INT_MIN, TabPos = INT_MIN, TabEditorPos = INT_MIN;
// Обрабатываем элементы ракраски
- for (int Col = 0; Col < ColorCount; Col++)
+ for (auto &CurItem : ColorList)
{
- ColorItem *CurItem = ColorList+Col;
-
// Пропускаем элементы у которых начало больше конца
- if (CurItem->StartPos > CurItem->EndPos)
+ if (CurItem.StartPos > CurItem.EndPos)
continue;
// Отсекаем элементы заведомо не попадающие на экран
- if (CurItem->StartPos-LeftPos > X2 && CurItem->EndPos-LeftPos < X1)
+ if (CurItem.StartPos-LeftPos > X2 && CurItem.EndPos-LeftPos < X1)
continue;
- int Attr = CurItem->Color;
- int Length = CurItem->EndPos-CurItem->StartPos+1;
+ DWORD64 Attr = CurItem.Color;
+ if (CurItem.TrueFore.Flags & 1)
+ {
+ SET_RGB_FORE(Attr, COMPOSE_RGB(CurItem.TrueFore.R, CurItem.TrueFore.G, CurItem.TrueFore.B));
+ }
+ if (CurItem.TrueBack.Flags & 1)
+ {
+ SET_RGB_BACK(Attr, COMPOSE_RGB(CurItem.TrueBack.R, CurItem.TrueBack.G, CurItem.TrueBack.B));
+ }
+
+ int Length = CurItem.EndPos - CurItem.StartPos+1;
- if (CurItem->StartPos+Length >= StrSize)
- Length = StrSize-CurItem->StartPos;
+ if (CurItem.StartPos + Length >= StrSize)
+ Length = StrSize - CurItem.StartPos;
// Получаем начальную позицию
int RealStart, Start;
// Если предыдущая позиция равна текущей, то ничего не вычисляем
// и сразу берём ранее вычисленное значение
- if (Pos == CurItem->StartPos)
+ if (Pos == CurItem.StartPos)
{
RealStart = TabPos;
Start = TabEditorPos;
}
// Если вычисление идёт первый раз или предыдущая позиция больше текущей,
// то производим вычисление с начала строки
- else if (Pos == INT_MIN || CurItem->StartPos < Pos)
+ else if (Pos == INT_MIN || CurItem.StartPos < Pos)
{
- RealStart = RealPosToCell(CurItem->StartPos);
+ RealStart = RealPosToCell(CurItem.StartPos);
Start = RealStart-LeftPos;
}
// Для отптимизации делаем вычисление относительно предыдущей позиции
else
{
- RealStart = RealPosToCell(TabPos, Pos, CurItem->StartPos, nullptr);
+ RealStart = RealPosToCell(TabPos, Pos, CurItem.StartPos, nullptr);
Start = RealStart-LeftPos;
}
// Запоминаем вычисленные значения для их дальнейшего повторного использования
- Pos = CurItem->StartPos;
+ Pos = CurItem.StartPos;
TabPos = RealStart;
TabEditorPos = Start;
@@ -2567,7 +2559,7 @@ void Edit::ApplyColor()
Attr &= ~ECF_TAB1;
// Получаем конечную позицию
- int EndPos = CurItem->EndPos;
+ int EndPos = CurItem.EndPos;
int RealEnd, End;
// Обрабатываем случай, когда предыдущая позиция равна текущей, то есть
diff --git a/far2l/src/edit.hpp b/far2l/src/edit.hpp
index ec531f16..2ba16dc0 100644
--- a/far2l/src/edit.hpp
+++ b/far2l/src/edit.hpp
@@ -38,6 +38,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "bitflags.hpp"
#include "FilesSuggestor.hpp"
#include <memory>
+#include <vector>
// Младший байт (маска 0xFF) юзается классом ScreenObject!!!
enum FLAGS_CLASS_EDITLINE
@@ -66,6 +67,9 @@ struct ColorItem
int StartPos;
int EndPos;
int Color;
+ FarTrueColor TrueFore;
+ FarTrueColor TrueBack;
+ bool TrueColorDefined;
};
enum SetCPFlags
@@ -144,8 +148,7 @@ class Edit:public ScreenObject
wchar_t *Mask;
- ColorItem *ColorList;
- int ColorCount;
+ std::vector<ColorItem> ColorList;
int Color;
int SelColor;
@@ -297,9 +300,9 @@ class Edit:public ScreenObject
void InsertTab();
- void AddColor(ColorItem *col);
- int DeleteColor(int ColorPos);
- int GetColor(ColorItem *col,int Item);
+ void AddColor(const ColorItem *col);
+ size_t DeleteColor(int ColorPos);
+ bool GetColor(ColorItem *col,int Item);
void Xlat(bool All=false);
diff --git a/far2l/src/editor.cpp b/far2l/src/editor.cpp
index b7ed7eba..7c0d7b49 100644
--- a/far2l/src/editor.cpp
+++ b/far2l/src/editor.cpp
@@ -5747,11 +5747,12 @@ int Editor::EditorControl(int Command,void *Param)
return TRUE;
}
// TODO: Если DI_MEMOEDIT не будет юзать раскаску, то должно выполняется в FileEditor::EditorControl(), в диалоге - нафиг ненать
+ case ECTL_ADDTRUECOLOR:
case ECTL_ADDCOLOR:
{
if (Param)
{
- EditorColor *col=(EditorColor *)Param;
+ const EditorColor *col=(EditorColor *)Param;
_ECTLLOG(SysLog(L"EditorColor{"));
_ECTLLOG(SysLog(L" StringNumber=%d",col->StringNumber));
_ECTLLOG(SysLog(L" ColorItem =%d (0x%08X)",col->ColorItem,col->ColorItem));
@@ -5759,7 +5760,7 @@ int Editor::EditorControl(int Command,void *Param)
_ECTLLOG(SysLog(L" EndPos =%d",col->EndPos));
_ECTLLOG(SysLog(L" Color =%d (0x%08X)",col->Color,col->Color));
_ECTLLOG(SysLog(L"}"));
- ColorItem newcol;
+ ColorItem newcol{0};
newcol.StartPos=col->StartPos+(col->StartPos!=-1?X1:0);
newcol.EndPos=col->EndPos+X1;
newcol.Color=col->Color;
@@ -5774,6 +5775,12 @@ int Editor::EditorControl(int Command,void *Param)
if (!col->Color)
return(CurPtr->DeleteColor(newcol.StartPos));
+ if (Command == ECTL_ADDTRUECOLOR)
+ {
+ const EditorTrueColor *tcol = (EditorTrueColor *)Param;
+ newcol.TrueFore = tcol->TrueFore;
+ newcol.TrueBack = tcol->TrueBack;
+ }
CurPtr->AddColor(&newcol);
return TRUE;
}
@@ -5781,6 +5788,7 @@ int Editor::EditorControl(int Command,void *Param)
break;
}
// TODO: Если DI_MEMOEDIT не будет юзать раскаску, то должно выполняется в FileEditor::EditorControl(), в диалоге - нафиг ненать
+ case ECTL_GETTRUECOLOR:
case ECTL_GETCOLOR:
{
if (Param)
@@ -5805,6 +5813,12 @@ int Editor::EditorControl(int Command,void *Param)
col->StartPos=curcol.StartPos-X1;
col->EndPos=curcol.EndPos-X1;
col->Color=curcol.Color;
+ if (Command == ECTL_ADDTRUECOLOR)
+ {
+ EditorTrueColor *tcol = (EditorTrueColor *)Param;
+ tcol->TrueFore = curcol.TrueFore;
+ tcol->TrueBack = curcol.TrueBack;
+ }
_ECTLLOG(SysLog(L"EditorColor{"));
_ECTLLOG(SysLog(L" StringNumber=%d",col->StringNumber));
_ECTLLOG(SysLog(L" ColorItem =%d (0x%08X)",col->ColorItem,col->ColorItem));
diff --git a/far2l/src/plug/wrap.cpp b/far2l/src/plug/wrap.cpp
index e08ef7fb..40ddd218 100644
--- a/far2l/src/plug/wrap.cpp
+++ b/far2l/src/plug/wrap.cpp
@@ -3514,11 +3514,13 @@ int WINAPI FarEditorControlA(int Command,void* Param)
switch (Command)
{
case oldfar::ECTL_ADDCOLOR: Command = ECTL_ADDCOLOR; break;
+ case oldfar::ECTL_ADDTRUECOLOR: Command = ECTL_ADDTRUECOLOR; break;
case oldfar::ECTL_DELETEBLOCK: Command = ECTL_DELETEBLOCK; break;
case oldfar::ECTL_DELETECHAR: Command = ECTL_DELETECHAR; break;
case oldfar::ECTL_DELETESTRING: Command = ECTL_DELETESTRING; break;
case oldfar::ECTL_EXPANDTABS: Command = ECTL_EXPANDTABS; break;
case oldfar::ECTL_GETCOLOR: Command = ECTL_GETCOLOR; break;
+ case oldfar::ECTL_GETTRUECOLOR: Command = ECTL_GETTRUECOLOR; break;
case oldfar::ECTL_GETBOOKMARKS: Command = ECTL_GETBOOKMARKS; break;
case oldfar::ECTL_INSERTSTRING: Command = ECTL_INSERTSTRING; break;
case oldfar::ECTL_QUIT: Command = ECTL_QUIT; break;
diff --git a/far2l/src/setcolor.cpp b/far2l/src/setcolor.cpp
index 82c4dffb..085f75ba 100644
--- a/far2l/src/setcolor.cpp
+++ b/far2l/src/setcolor.cpp
@@ -531,6 +531,56 @@ void GetColor(int PaletteIndex)
}
}
+static void GetColorDlgProc_OnDrawn(HANDLE hDlg)
+{
+ // Trick to fix #1392:
+ // For foreground-colored boxes invert Fg&Bg colors and add COMMON_LVB_REVERSE_VIDEO attribute
+ // this will put real colors on them if mapping of colors is different for Fg and Bg indexes
+
+ // Ensure everything is on screen and will use console API then
+ ScrBuf.Flush();
+
+ SMALL_RECT DlgRect{};
+ SendDlgMessage(hDlg, DM_GETDLGRECT, 0, (LONG_PTR)&DlgRect);
+
+ for (int ID = 2; ID <= 17; ++ID)
+ {
+ SMALL_RECT ItemRect{};
+ if (SendDlgMessage(hDlg, DM_GETITEMPOSITION, ID, (LONG_PTR)&ItemRect))
+ {
+ ItemRect.Left+= DlgRect.Left;
+ ItemRect.Right+= DlgRect.Left;
+ ItemRect.Top+= DlgRect.Top;
+ ItemRect.Bottom+= DlgRect.Top;
+
+ CHAR_INFO ci{};
+ SMALL_RECT Rect = {ItemRect.Left, ItemRect.Top, ItemRect.Left, ItemRect.Top};
+ WINPORT(ReadConsoleOutput)(0, &ci, COORD{1, 1}, COORD{0, 0}, &Rect);
+ if (ci.Attributes & COMMON_LVB_REVERSE_VIDEO)
+ continue; // this cell is already tweaked during prev paint
+
+ DWORD64 InvColors = COMMON_LVB_REVERSE_VIDEO;
+
+ InvColors|= ((ci.Attributes & 0x0f) << 4) | ((ci.Attributes & 0xf0) >> 4);
+
+ InvColors|= (ci.Attributes & (COMMON_LVB_UNDERSCORE | COMMON_LVB_STRIKEOUT));
+
+ if (ci.Attributes & FOREGROUND_TRUECOLOR)
+ {
+ SET_RGB_BACK(InvColors, GET_RGB_FORE(ci.Attributes));
+ }
+
+ if (ci.Attributes & BACKGROUND_TRUECOLOR)
+ {
+ SET_RGB_FORE(InvColors, GET_RGB_BACK(ci.Attributes));
+ }
+
+ DWORD NumberOfAttrsWritten{};
+ WINPORT(FillConsoleOutputAttribute)(0, InvColors, ItemRect.Right - ItemRect.Left,
+ COORD{ItemRect.Left, ItemRect.Top}, &NumberOfAttrsWritten);
+ }
+ }
+}
static LONG_PTR WINAPI GetColorDlgProc(HANDLE hDlg, int Msg, int Param1, LONG_PTR Param2)
{
@@ -575,12 +625,17 @@ static LONG_PTR WINAPI GetColorDlgProc(HANDLE hDlg, int Msg, int Param1, LONG_PT
}
break;
+
+ case DN_DRAWDIALOGDONE:
+ GetColorDlgProc_OnDrawn(hDlg);
+ break;
}
return DefDlgProc(hDlg, Msg, Param1, Param2);
}
+
int GetColorDialog(WORD& Color,bool bCentered,bool bAddTransparent)
{
DialogDataEx ColorDlgData[]=
diff --git a/multiarc/CMakeLists.txt b/multiarc/CMakeLists.txt
index 375e01a8..e8860b73 100644
--- a/multiarc/CMakeLists.txt
+++ b/multiarc/CMakeLists.txt
@@ -119,6 +119,10 @@ set(SOURCES
src/formats/7z/C/7zArcIn.c
src/formats/7z/C/Bra.c
src/formats/7z/C/Blake2s.c
+ src/formats/7z/C/LzFindOpt.c
+ src/formats/7z/C/Ppmd7aDec.c
+ src/formats/7z/C/Sha1Opt.c
+ src/formats/7z/C/Sha256Opt.c
src/formats/ha/ha/acoder.c
src/formats/ha/ha/archive.c
diff --git a/multiarc/src/formats/7z/C/7z.h b/multiarc/src/formats/7z/C/7z.h
index 6c7886e3..304f75ff 100644..100755
--- a/multiarc/src/formats/7z/C/7z.h
+++ b/multiarc/src/formats/7z/C/7z.h
@@ -1,5 +1,5 @@
/* 7z.h -- 7z interface
-2017-04-03 : Igor Pavlov : Public domain */
+2018-07-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
@@ -91,6 +91,8 @@ typedef struct
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
+
+ UInt64 RangeLimit;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
diff --git a/multiarc/src/formats/7z/C/7zAlloc.c b/multiarc/src/formats/7z/C/7zAlloc.c
index c924a529..c924a529 100644..100755
--- a/multiarc/src/formats/7z/C/7zAlloc.c
+++ b/multiarc/src/formats/7z/C/7zAlloc.c
diff --git a/multiarc/src/formats/7z/C/7zAlloc.h b/multiarc/src/formats/7z/C/7zAlloc.h
index 44778f9b..44778f9b 100644..100755
--- a/multiarc/src/formats/7z/C/7zAlloc.h
+++ b/multiarc/src/formats/7z/C/7zAlloc.h
diff --git a/multiarc/src/formats/7z/C/7zArcIn.c b/multiarc/src/formats/7z/C/7zArcIn.c
index f74d0fad..0d9dec41 100644..100755
--- a/multiarc/src/formats/7z/C/7zArcIn.c
+++ b/multiarc/src/formats/7z/C/7zArcIn.c
@@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
-2018-12-31 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
return SZ_OK;
}
-void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
+static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
-void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
+static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p)
p->CoderUnpackSizes = NULL;
p->CodersData = NULL;
+
+ p->RangeLimit = 0;
}
static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
@@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
return SZ_ERROR_ARCHIVE;
if (propsSize >= 0x80)
return SZ_ERROR_UNSUPPORTED;
- coder->PropsOffset = sd->Data - dataStart;
+ coder->PropsOffset = (size_t)(sd->Data - dataStart);
coder->PropsSize = (Byte)propsSize;
sd->Data += (size_t)propsSize;
sd->Size -= (size_t)propsSize;
@@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
{
UInt32 numCoders, ci, numInStreams = 0;
- p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
+ p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
RINOK(SzReadNumber32(&sd, &numCoders));
if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
@@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
{
- size_t dataSize = sd.Data - startBufPtr;
+ const size_t dataSize = (size_t)(sd.Data - startBufPtr);
p->FoStartPackStreamIndex[fo] = packStreamIndex;
p->FoCodersOffsets[fo] = dataSize;
MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
@@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
numSubDigests += numStreams;
}
- ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
+ ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data);
continue;
}
if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
@@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdSizes.Data = sd->Data;
RINOK(SkipNumbers(sd, numUnpackSizesInData));
- ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
+ ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
RINOK(ReadID(sd, &type));
}
@@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdCRCs.Data = sd->Data;
RINOK(SkipBitUi32s(sd, numSubDigests));
- ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
+ ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
}
else
{
@@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p,
if (type == k7zIdPackInfo)
{
RINOK(ReadNumber(sd, dataOffset));
+ if (*dataOffset > p->RangeLimit)
+ return SZ_ERROR_ARCHIVE;
RINOK(ReadPackInfo(p, sd, alloc));
+ if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
+ return SZ_ERROR_ARCHIVE;
RINOK(ReadID(sd, &type));
}
if (type == k7zIdUnpackInfo)
@@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
return SZ_ERROR_ARCHIVE;
for (p = data + pos;
#ifdef _WIN32
- *(const UInt16 *)p != 0
+ *(const UInt16 *)(const void *)p != 0
#else
p[0] != 0 || p[1] != 0
#endif
; p += 2);
- pos = p - data + 2;
+ pos = (size_t)(p - data) + 2;
*offsets++ = (pos >> 1);
}
while (--numFiles);
@@ -1133,6 +1139,8 @@ static SRes SzReadHeader2(
SRes res;
SzAr_Init(&tempAr);
+ tempAr.RangeLimit = p->db.RangeLimit;
+
res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
p->startPosAfterHeader, &tempAr, allocTemp);
*numTempBufs = tempAr.NumFolders;
@@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2(
nextHeaderSize = GetUi64(header + 20);
nextHeaderCRC = GetUi32(header + 28);
- p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
+ p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize;
if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
return SZ_ERROR_CRC;
+ p->db.RangeLimit = nextHeaderOffset;
+
nextHeaderSizeT = (size_t)nextHeaderSize;
if (nextHeaderSizeT != nextHeaderSize)
return SZ_ERROR_MEM;
@@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2(
{
Int64 pos = 0;
RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
- if ((UInt64)pos < startArcPos + nextHeaderOffset ||
- (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
- (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+ if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
+ (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+ (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
return SZ_ERROR_INPUT_EOF;
}
- RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
+ RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
return SZ_ERROR_MEM;
@@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2(
Buf_Init(&tempBuf);
SzAr_Init(&tempAr);
+ tempAr.RangeLimit = p->db.RangeLimit;
+
res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
SzAr_Free(&tempAr, allocTemp);
diff --git a/multiarc/src/formats/7z/C/7zBuf.c b/multiarc/src/formats/7z/C/7zBuf.c
index 8865c32a..8865c32a 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf.c
+++ b/multiarc/src/formats/7z/C/7zBuf.c
diff --git a/multiarc/src/formats/7z/C/7zBuf.h b/multiarc/src/formats/7z/C/7zBuf.h
index 81d1b5b6..81d1b5b6 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf.h
+++ b/multiarc/src/formats/7z/C/7zBuf.h
diff --git a/multiarc/src/formats/7z/C/7zBuf2.c b/multiarc/src/formats/7z/C/7zBuf2.c
index 20834741..20834741 100644..100755
--- a/multiarc/src/formats/7z/C/7zBuf2.c
+++ b/multiarc/src/formats/7z/C/7zBuf2.c
diff --git a/multiarc/src/formats/7z/C/7zCrc.c b/multiarc/src/formats/7z/C/7zCrc.c
index b4d84f02..f186324d 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrc.c
+++ b/multiarc/src/formats/7z/C/7zCrc.c
@@ -1,5 +1,5 @@
/* 7zCrc.c -- CRC32 init
-2017-06-06 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -26,8 +26,20 @@
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
+extern
CRC_FUNC g_CrcUpdateT4;
+CRC_FUNC g_CrcUpdateT4;
+extern
+CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
+extern
+CRC_FUNC g_CrcUpdateT0_32;
+CRC_FUNC g_CrcUpdateT0_32;
+extern
+CRC_FUNC g_CrcUpdateT0_64;
+CRC_FUNC g_CrcUpdateT0_64;
+extern
+CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
@@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
return v;
}
+
+/* ---------- hardware CRC ---------- */
+
+#ifdef MY_CPU_LE
+
+#if defined(MY_CPU_ARM_OR_ARM64)
+
+// #pragma message("ARM*")
+
+ #if defined(_MSC_VER)
+ #if defined(MY_CPU_ARM64)
+ #if (_MSC_VER >= 1910)
+ #define USE_ARM64_CRC
+ #endif
+ #endif
+ #elif (defined(__clang__) && (__clang_major__ >= 3)) \
+ || (defined(__GNUC__) && (__GNUC__ > 4))
+ #if !defined(__ARM_FEATURE_CRC32)
+ #define __ARM_FEATURE_CRC32 1
+ #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
+ #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
+ #endif
+ #endif
+ #if defined(__ARM_FEATURE_CRC32)
+ #define USE_ARM64_CRC
+ #include <arm_acle.h>
+ #endif
+ #endif
+
+#else
+
+// no hardware CRC
+
+// #define USE_CRC_EMU
+
+#ifdef USE_CRC_EMU
+
+#pragma message("ARM64 CRC emulation")
+
+MY_FORCE_INLINE
+UInt32 __crc32b(UInt32 v, UInt32 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data);
+ return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32w(UInt32 v, UInt32 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ return v;
+}
+
+MY_FORCE_INLINE
+UInt32 __crc32d(UInt32 v, UInt64 data)
+{
+ const UInt32 *table = g_CrcTable;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+ return v;
+}
+
+#endif // USE_CRC_EMU
+
+#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
+
+
+
+#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#define T0_32_UNROLL_BYTES (4 * 4)
+#define T0_64_UNROLL_BYTES (4 * 8)
+
+#ifndef ATTRIB_CRC
+#define ATTRIB_CRC
+#endif
+// #pragma message("USE ARM HW CRC")
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+ const Byte *p = (const Byte *)data;
+ UNUSED_VAR(table);
+
+ for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
+ v = __crc32b(v, *p++);
+
+ if (size >= T0_32_UNROLL_BYTES)
+ {
+ const Byte *lim = p + size;
+ size &= (T0_32_UNROLL_BYTES - 1);
+ lim -= size;
+ do
+ {
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+ v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+ }
+ while (p != lim);
+ }
+
+ for (; size != 0; size--)
+ v = __crc32b(v, *p++);
+
+ return v;
+}
+
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
+ATTRIB_CRC
+UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
+{
+ const Byte *p = (const Byte *)data;
+ UNUSED_VAR(table);
+
+ for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
+ v = __crc32b(v, *p++);
+
+ if (size >= T0_64_UNROLL_BYTES)
+ {
+ const Byte *lim = p + size;
+ size &= (T0_64_UNROLL_BYTES - 1);
+ lim -= size;
+ do
+ {
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+ v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+ }
+ while (p != lim);
+ }
+
+ for (; size != 0; size--)
+ v = __crc32b(v, *p++);
+
+ return v;
+}
+
+#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
+
+#endif // MY_CPU_LE
+
+
+
+
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
@@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable()
}
}
#endif
+ #endif
+ #ifdef MY_CPU_LE
+ #ifdef USE_ARM64_CRC
+ if (CPU_IsSupported_CRC32())
+ {
+ g_CrcUpdateT0_32 = CrcUpdateT0_32;
+ g_CrcUpdateT0_64 = CrcUpdateT0_64;
+ g_CrcUpdate =
+ #if defined(MY_CPU_ARM)
+ CrcUpdateT0_32;
+ #else
+ CrcUpdateT0_64;
+ #endif
+ }
+ #endif
+
+ #ifdef USE_CRC_EMU
+ g_CrcUpdateT0_32 = CrcUpdateT0_32;
+ g_CrcUpdateT0_64 = CrcUpdateT0_64;
+ g_CrcUpdate = CrcUpdateT0_64;
+ #endif
#endif
}
diff --git a/multiarc/src/formats/7z/C/7zCrc.h b/multiarc/src/formats/7z/C/7zCrc.h
index 8fd57958..8fd57958 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrc.h
+++ b/multiarc/src/formats/7z/C/7zCrc.h
diff --git a/multiarc/src/formats/7z/C/7zCrcOpt.c b/multiarc/src/formats/7z/C/7zCrcOpt.c
index 73beba29..69fad9ca 100644..100755
--- a/multiarc/src/formats/7z/C/7zCrcOpt.c
+++ b/multiarc/src/formats/7z/C/7zCrcOpt.c
@@ -1,5 +1,5 @@
/* 7zCrcOpt.c -- CRC32 calculation
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
@@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
return v;
}
+UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
- d = *((const UInt32 *)p + 1);
+ d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
@@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
- v ^= *(const UInt32 *)p;
+ v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
- d = *((const UInt32 *)p + 1);
+ d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
diff --git a/multiarc/src/formats/7z/C/7zDec.c b/multiarc/src/formats/7z/C/7zDec.c
index 7c463521..fbfd016e 100644..100755
--- a/multiarc/src/formats/7z/C/7zDec.c
+++ b/multiarc/src/formats/7z/C/7zDec.c
@@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -21,17 +21,20 @@
#endif
#define k_Copy 0
-#define k_Delta 3
+#ifndef _7Z_NO_METHOD_LZMA2
#define k_LZMA2 0x21
+#endif
#define k_LZMA 0x30101
-#define k_BCJ 0x3030103
#define k_BCJ2 0x303011B
+#ifndef _7Z_NO_METHODS_FILTERS
+#define k_Delta 3
+#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
-
+#endif
#ifdef _7ZIP_PPMD_SUPPPORT
@@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp)
return *p->cur++;
if (p->res == SZ_OK)
{
- size_t size = p->cur - p->begin;
+ size_t size = (size_t)(p->cur - p->begin);
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
@@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
Ppmd7_Init(&ppmd, order);
}
{
- CPpmd7z_RangeDec rc;
- Ppmd7z_RangeDec_CreateVTable(&rc);
- rc.Stream = &s.vt;
- if (!Ppmd7z_RangeDec_Init(&rc))
+ ppmd.rc.dec.Stream = &s.vt;
+ if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
res = SZ_ERROR_DATA;
- else if (s.extra)
- res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
- else
+ else if (!s.extra)
{
- SizeT i;
- for (i = 0; i < outSize; i++)
+ Byte *buf = outBuffer;
+ const Byte *lim = buf + outSize;
+ for (; buf != lim; buf++)
{
- int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
+ int sym = Ppmd7z_DecodeSymbol(&ppmd);
if (s.extra || sym < 0)
break;
- outBuffer[i] = (Byte)sym;
+ *buf = (Byte)sym;
}
- if (i != outSize)
- res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
- else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
+ if (buf != lim)
+ res = SZ_ERROR_DATA;
+ else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
+ {
+ /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
res = SZ_ERROR_DATA;
+ }
}
+ if (s.extra)
+ res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
+ else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
+ res = SZ_ERROR_DATA;
}
Ppmd7_Free(&ppmd, allocMain);
return res;
@@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED;
}
+#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+#endif
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,
diff --git a/multiarc/src/formats/7z/C/7zFile.c b/multiarc/src/formats/7z/C/7zFile.c
index 8992fb1c..13d2efa4 100644..100755
--- a/multiarc/src/formats/7z/C/7zFile.c
+++ b/multiarc/src/formats/7z/C/7zFile.c
@@ -1,5 +1,5 @@
/* 7zFile.c -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-04-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,9 +7,19 @@
#ifndef USE_WINDOWS_FILE
-#ifndef UNDER_CE
-#include <errno.h>
-#endif
+ #include <errno.h>
+
+ #ifndef USE_FOPEN
+ #include <stdio.h>
+ #include <fcntl.h>
+ #ifdef _WIN32
+ #include <io.h>
+ typedef int ssize_t;
+ typedef int off_t;
+ #else
+ #include <unistd.h>
+ #endif
+ #endif
#else
@@ -23,30 +33,36 @@
And message can be "Network connection was lost"
*/
-#define kChunkSizeMax (1 << 22)
-
#endif
+#define kChunkSizeMax (1 << 22)
+
void File_Construct(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
p->handle = INVALID_HANDLE_VALUE;
- #else
+ #elif defined(USE_FOPEN)
p->file = NULL;
+ #else
+ p->fd = -1;
#endif
}
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
+
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
{
#ifdef USE_WINDOWS_FILE
+
p->handle = CreateFileA(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
- #else
+
+ #elif defined(USE_FOPEN)
+
p->file = fopen(name, writeMode ? "wb+" : "rb");
return (p->file != 0) ? 0 :
#ifdef UNDER_CE
@@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode)
#else
errno;
#endif
+
+ #else
+
+ int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY);
+ #ifdef O_BINARY
+ flags |= O_BINARY;
+ #endif
+ p->fd = open(name, flags, 0666);
+ return (p->fd != -1) ? 0 : errno;
+
#endif
}
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
-WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
+
+WRes OutFile_Open(CSzFile *p, const char *name)
+{
+ #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN)
+ return File_Open(p, name, 1);
+ #else
+ p->fd = creat(name, 0666);
+ return (p->fd != -1) ? 0 : errno;
+ #endif
+}
+
#endif
+
#ifdef USE_WINDOWS_FILE
static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
{
@@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1
WRes File_Close(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
+
if (p->handle != INVALID_HANDLE_VALUE)
{
if (!CloseHandle(p->handle))
return GetLastError();
p->handle = INVALID_HANDLE_VALUE;
}
- #else
+
+ #elif defined(USE_FOPEN)
+
if (p->file != NULL)
{
int res = fclose(p->file);
if (res != 0)
+ {
+ if (res == EOF)
+ return errno;
return res;
+ }
p->file = NULL;
}
+
+ #else
+
+ if (p->fd != -1)
+ {
+ if (close(p->fd) != 0)
+ return errno;
+ p->fd = -1;
+ }
+
#endif
+
return 0;
}
+
WRes File_Read(CSzFile *p, void *data, size_t *size)
{
size_t originalSize = *size;
+ *size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
- *size = 0;
do
{
- DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+ const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
- BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
+ const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
+ // debug : we can break here for partial reading mode
+ if (processed == 0)
+ break;
+ }
+ while (originalSize > 0);
+
+ #elif defined(USE_FOPEN)
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const size_t processed = fread(data, 1, curSize, p->file);
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= processed;
+ *size += processed;
+ if (processed != curSize)
+ return ferror(p->file);
+ // debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
- return 0;
#else
-
- *size = fread(data, 1, originalSize, p->file);
- if (*size == originalSize)
- return 0;
- return ferror(p->file);
-
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const ssize_t processed = read(p->fd, data, curSize);
+ if (processed == -1)
+ return errno;
+ if (processed == 0)
+ break;
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= (size_t)processed;
+ *size += (size_t)processed;
+ // debug : we can break here for partial reading mode
+ // break;
+ }
+ while (originalSize > 0);
+
#endif
+
+ return 0;
}
+
WRes File_Write(CSzFile *p, const void *data, size_t *size)
{
size_t originalSize = *size;
+ *size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
- *size = 0;
do
{
- DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
+ const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
- BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
- data = (void *)((Byte *)data + processed);
+ const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
+ data = (const void *)((const Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
@@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
break;
}
while (originalSize > 0);
- return 0;
+
+ #elif defined(USE_FOPEN)
+
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const size_t processed = fwrite(data, 1, curSize, p->file);
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= processed;
+ *size += processed;
+ if (processed != curSize)
+ return ferror(p->file);
+ if (processed == 0)
+ break;
+ }
+ while (originalSize > 0);
#else
- *size = fwrite(data, 1, originalSize, p->file);
- if (*size == originalSize)
- return 0;
- return ferror(p->file);
-
+ do
+ {
+ const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
+ const ssize_t processed = write(p->fd, data, curSize);
+ if (processed == -1)
+ return errno;
+ if (processed == 0)
+ break;
+ data = (void *)((Byte *)data + (size_t)processed);
+ originalSize -= (size_t)processed;
+ *size += (size_t)processed;
+ }
+ while (originalSize > 0);
+
#endif
+
+ return 0;
}
+
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
{
#ifdef USE_WINDOWS_FILE
- LARGE_INTEGER value;
DWORD moveMethod;
- value.LowPart = (DWORD)*pos;
- value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
+ UInt32 low = (UInt32)*pos;
+ LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin)
{
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
@@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
case SZ_SEEK_END: moveMethod = FILE_END; break;
default: return ERROR_INVALID_PARAMETER;
}
- value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
- if (value.LowPart == 0xFFFFFFFF)
+ low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod);
+ if (low == (UInt32)0xFFFFFFFF)
{
WRes res = GetLastError();
if (res != NO_ERROR)
return res;
}
- *pos = ((Int64)value.HighPart << 32) | value.LowPart;
+ *pos = ((Int64)high << 32) | low;
return 0;
#else
- int moveMethod;
- int res;
+ int moveMethod; // = origin;
+
switch (origin)
{
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
case SZ_SEEK_END: moveMethod = SEEK_END; break;
- default: return 1;
+ default: return EINVAL;
}
- res = fseek(p->file, (long)*pos, moveMethod);
- *pos = ftell(p->file);
- return res;
- #endif
+ #if defined(USE_FOPEN)
+ {
+ int res = fseek(p->file, (long)*pos, moveMethod);
+ if (res == -1)
+ return errno;
+ *pos = ftell(p->file);
+ if (*pos == -1)
+ return errno;
+ return 0;
+ }
+ #else
+ {
+ off_t res = lseek(p->fd, (off_t)*pos, moveMethod);
+ if (res == -1)
+ return errno;
+ *pos = res;
+ return 0;
+ }
+
+ #endif // USE_FOPEN
+ #endif // USE_WINDOWS_FILE
}
+
WRes File_GetLength(CSzFile *p, UInt64 *length)
{
#ifdef USE_WINDOWS_FILE
@@ -224,13 +355,31 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
return 0;
- #else
+ #elif defined(USE_FOPEN)
long pos = ftell(p->file);
int res = fseek(p->file, 0, SEEK_END);
*length = ftell(p->file);
fseek(p->file, pos, SEEK_SET);
return res;
+
+ #else
+
+ off_t pos;
+ *length = 0;
+ pos = lseek(p->fd, 0, SEEK_CUR);
+ if (pos != -1)
+ {
+ const off_t len2 = lseek(p->fd, 0, SEEK_END);
+ const off_t res2 = lseek(p->fd, pos, SEEK_SET);
+ if (len2 != -1)
+ {
+ *length = (UInt64)len2;
+ if (res2 != -1)
+ return 0;
+ }
+ }
+ return errno;
#endif
}
@@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
- return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
+ WRes wres = File_Read(&p->file, buf, size);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
@@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
- return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
+ WRes wres = File_Read(&p->file, buf, size);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
- return File_Seek(&p->file, pos, origin);
+ WRes wres = File_Seek(&p->file, pos, origin);
+ p->wres = wres;
+ return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileInStream_CreateVTable(CFileInStream *p)
@@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p)
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
{
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
- File_Write(&p->file, data, &size);
+ WRes wres = File_Write(&p->file, data, &size);
+ p->wres = wres;
return size;
}
diff --git a/multiarc/src/formats/7z/C/7zFile.h b/multiarc/src/formats/7z/C/7zFile.h
index 0e792538..788abb6b 100644..100755
--- a/multiarc/src/formats/7z/C/7zFile.h
+++ b/multiarc/src/formats/7z/C/7zFile.h
@@ -1,17 +1,20 @@
/* 7zFile.h -- File IO
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-15 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H
#define __7Z_FILE_H
#ifdef _WIN32
#define USE_WINDOWS_FILE
+// #include <windows.h>
#endif
#ifdef USE_WINDOWS_FILE
#include <windows.h>
#else
-#include <stdio.h>
+// note: USE_FOPEN mode is limited to 32-bit file size
+// #define USE_FOPEN
+// #include <stdio.h>
#endif
#include "7zTypes.h"
@@ -24,8 +27,10 @@ typedef struct
{
#ifdef USE_WINDOWS_FILE
HANDLE handle;
- #else
+ #elif defined(USE_FOPEN)
FILE *file;
+ #else
+ int fd;
#endif
} CSzFile;
@@ -56,6 +61,7 @@ typedef struct
{
ISeqInStream vt;
CSzFile file;
+ WRes wres;
} CFileSeqInStream;
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
@@ -65,6 +71,7 @@ typedef struct
{
ISeekInStream vt;
CSzFile file;
+ WRes wres;
} CFileInStream;
void FileInStream_CreateVTable(CFileInStream *p);
@@ -74,6 +81,7 @@ typedef struct
{
ISeqOutStream vt;
CSzFile file;
+ WRes wres;
} CFileOutStream;
void FileOutStream_CreateVTable(CFileOutStream *p);
diff --git a/multiarc/src/formats/7z/C/7zStream.c b/multiarc/src/formats/7z/C/7zStream.c
index 6b5aa162..28a14604 100644..100755
--- a/multiarc/src/formats/7z/C/7zStream.c
+++ b/multiarc/src/formats/7z/C/7zStream.c
@@ -1,5 +1,5 @@
/* 7zStream.c -- 7z Stream functions
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
- Int64 t = offset;
+ Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}
diff --git a/multiarc/src/formats/7z/C/7zTypes.h b/multiarc/src/formats/7z/C/7zTypes.h
index 65b3af63..c5065a7c 100644..100755
--- a/multiarc/src/formats/7z/C/7zTypes.h
+++ b/multiarc/src/formats/7z/C/7zTypes.h
@@ -1,11 +1,13 @@
/* 7zTypes.h -- Basic types
-2018-08-04 : Igor Pavlov : Public domain */
+2022-04-01 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
+#else
+#include <errno.h>
#endif
#include <stddef.h>
@@ -43,18 +45,76 @@ EXTERN_C_BEGIN
typedef int SRes;
+#ifdef _MSC_VER
+ #if _MSC_VER > 1200
+ #define MY_ALIGN(n) __declspec(align(n))
+ #else
+ #define MY_ALIGN(n)
+ #endif
+#else
+ #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+#endif
+
+
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
-#else
+// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+#else // _WIN32
+
+// #define ENV_HAVE_LSTAT
typedef int WRes;
-#define MY__FACILITY_WIN32 7
-#define MY__FACILITY__WRes MY__FACILITY_WIN32
-#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+#define MY__FACILITY_ERRNO 0x800
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_ERRNO
+
+#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+ ( (HRESULT)(x) & 0x0000FFFF) \
+ | (MY__FACILITY__WRes << 16) \
+ | (HRESULT)0x80000000 ))
+
+#define MY_SRes_HRESULT_FROM_WRes(x) \
+ ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
+
+// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
+#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
+
+/*
+#define ERROR_FILE_NOT_FOUND 2L
+#define ERROR_ACCESS_DENIED 5L
+#define ERROR_NO_MORE_FILES 18L
+#define ERROR_LOCK_VIOLATION 33L
+#define ERROR_FILE_EXISTS 80L
+#define ERROR_DISK_FULL 112L
+#define ERROR_NEGATIVE_SEEK 131L
+#define ERROR_ALREADY_EXISTS 183L
+#define ERROR_DIRECTORY 267L
+#define ERROR_TOO_MANY_POSTS 298L
+
+#define ERROR_INTERNAL_ERROR 1359L
+#define ERROR_INVALID_REPARSE_DATA 4392L
+#define ERROR_REPARSE_TAG_INVALID 4393L
+#define ERROR_REPARSE_TAG_MISMATCH 4394L
+*/
+
+// if (MY__FACILITY__WRes != FACILITY_WIN32),
+// we use FACILITY_WIN32 for COM errors:
+#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
+#define E_INVALIDARG ((HRESULT)0x80070057L)
+#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
+
+/*
+// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+*/
#endif
@@ -63,6 +123,10 @@ typedef int WRes;
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
+#ifndef RINOK_WRes
+#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
@@ -75,6 +139,12 @@ typedef int Int32;
typedef unsigned int UInt32;
#endif
+
+
+
+#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
+
+
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
@@ -128,25 +198,37 @@ typedef int BoolInt;
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
-#else
+#else // _MSC_VER
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
+ || (defined(__clang__) && (__clang_major__ >= 4)) \
+ || defined(__INTEL_COMPILER) \
+ || defined(__xlC__)
+#define MY_NO_INLINE __attribute__((noinline))
+// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
+#else
#define MY_NO_INLINE
+#endif
+
#define MY_FORCE_INLINE
-#define MY_CDECL
-#define MY_FAST_CALL
-/* inline keyword : for C++ / C99 */
-/* GCC, clang: */
-/*
-#if defined (__GNUC__) && (__GNUC__ >= 4)
-#define MY_FORCE_INLINE __attribute__((always_inline))
-#define MY_NO_INLINE __attribute__((noinline))
-#endif
-*/
+#define MY_CDECL
+#if defined(_M_IX86) \
+ || defined(__i386__)
+// #define MY_FAST_CALL __attribute__((fastcall))
+// #define MY_FAST_CALL __attribute__((cdecl))
+#define MY_FAST_CALL
+#elif defined(MY_CPU_AMD64)
+// #define MY_FAST_CALL __attribute__((ms_abi))
+#define MY_FAST_CALL
+#else
+#define MY_FAST_CALL
#endif
+#endif // _MSC_VER
+
/* The following interfaces use first parameter as pointer to structure */
@@ -335,12 +417,11 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members"
*/
-#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
-
+#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
@@ -353,6 +434,7 @@ struct ISzAlloc
*/
+#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifdef _WIN32
@@ -370,6 +452,14 @@ struct ISzAlloc
#endif
+#define k_PropVar_TimePrec_0 0
+#define k_PropVar_TimePrec_Unix 1
+#define k_PropVar_TimePrec_DOS 2
+#define k_PropVar_TimePrec_HighPrec 3
+#define k_PropVar_TimePrec_Base 16
+#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
+#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/7zVersion.h b/multiarc/src/formats/7z/C/7zVersion.h
index c176823a..49ea81dd 100644..100755
--- a/multiarc/src/formats/7z/C/7zVersion.h
+++ b/multiarc/src/formats/7z/C/7zVersion.h
@@ -1,7 +1,7 @@
-#define MY_VER_MAJOR 19
-#define MY_VER_MINOR 00
+#define MY_VER_MAJOR 22
+#define MY_VER_MINOR 01
#define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "19.00"
+#define MY_VERSION_NUMBERS "22.01"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
-#define MY_DATE "2019-02-21"
+#define MY_DATE "2022-07-15"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR
diff --git a/multiarc/src/formats/7z/C/7zVersion.rc b/multiarc/src/formats/7z/C/7zVersion.rc
index e520995d..e520995d 100644..100755
--- a/multiarc/src/formats/7z/C/7zVersion.rc
+++ b/multiarc/src/formats/7z/C/7zVersion.rc
diff --git a/multiarc/src/formats/7z/C/Aes.c b/multiarc/src/formats/7z/C/Aes.c
index 1cdd0e78..27e32e62 100644..100755
--- a/multiarc/src/formats/7z/C/Aes.c
+++ b/multiarc/src/formats/7z/C/Aes.c
@@ -1,10 +1,17 @@
/* Aes.c -- AES encryption / decryption
-2017-01-24 : Igor Pavlov : Public domain */
+2021-05-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#include "Aes.h"
#include "CpuArch.h"
+#include "Aes.h"
+
+AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+AES_CODE_FUNC g_AesCbc_Encode;
+AES_CODE_FUNC g_AesCtr_Code;
+UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
static UInt32 T[256 * 4];
static const Byte Sbox[256] = {
@@ -25,23 +32,10 @@ static const Byte Sbox[256] = {
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-AES_CODE_FUNC g_AesCbc_Encode;
-AES_CODE_FUNC g_AesCbc_Decode;
-AES_CODE_FUNC g_AesCtr_Code;
static UInt32 D[256 * 4];
static Byte InvS[256];
-static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
-
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
@@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0
#define DD(x) (D + (x << 8))
+// #define _SHOW_AES_STATUS
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #define USE_HW_AES
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_AES
+ #endif
+ #endif
+#endif
+
+#ifdef USE_HW_AES
+#ifdef _SHOW_AES_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+#else
+#define _PRF(x)
+#endif
+#endif
+
+
void AesGenTables(void)
{
unsigned i;
@@ -90,18 +114,48 @@ void AesGenTables(void)
}
}
- g_AesCbc_Encode = AesCbc_Encode;
- g_AesCbc_Decode = AesCbc_Decode;
- g_AesCtr_Code = AesCtr_Code;
+ {
+ AES_CODE_FUNC d = AesCbc_Decode;
+ #ifndef _SFX
+ AES_CODE_FUNC e = AesCbc_Encode;
+ AES_CODE_FUNC c = AesCtr_Code;
+ UInt32 flags = 0;
+ #endif
- #ifdef MY_CPU_X86_OR_AMD64
- if (CPU_Is_Aes_Supported())
+ #ifdef USE_HW_AES
+ if (CPU_IsSupported_AES())
{
- g_AesCbc_Encode = AesCbc_Encode_Intel;
- g_AesCbc_Decode = AesCbc_Decode_Intel;
- g_AesCtr_Code = AesCtr_Code_Intel;
+ // #pragma message ("AES HW")
+ _PRF(printf("\n===AES HW\n"));
+ d = AesCbc_Decode_HW;
+
+ #ifndef _SFX
+ e = AesCbc_Encode_HW;
+ c = AesCtr_Code_HW;
+ flags = k_Aes_SupportedFunctions_HW;
+ #endif
+
+ #ifdef MY_CPU_X86_OR_AMD64
+ if (CPU_IsSupported_VAES_AVX2())
+ {
+ _PRF(printf("\n===vaes avx2\n"));
+ d = AesCbc_Decode_HW_256;
+ #ifndef _SFX
+ c = AesCtr_Code_HW_256;
+ flags |= k_Aes_SupportedFunctions_HW_256;
+ #endif
+ }
+ #endif
}
#endif
+
+ g_AesCbc_Decode = d;
+ #ifndef _SFX
+ g_AesCbc_Encode = e;
+ g_AesCtr_Code = c;
+ g_Aes_SupportedFunctions_Flags = flags;
+ #endif
+ }
}
@@ -142,8 +196,11 @@ void AesGenTables(void)
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{
- unsigned i, wSize;
- wSize = keySize + 28;
+ unsigned i, m;
+ const UInt32 *wLim;
+ UInt32 t;
+ UInt32 rcon = 1;
+
keySize /= 4;
w[0] = ((UInt32)keySize / 2) + 3;
w += 4;
@@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
for (i = 0; i < keySize; i++, key += 4)
w[i] = GetUi32(key);
- for (; i < wSize; i++)
+ t = w[(size_t)keySize - 1];
+ wLim = w + (size_t)keySize * 3 + 28;
+ m = 0;
+ do
{
- UInt32 t = w[(size_t)i - 1];
- unsigned rem = i % keySize;
- if (rem == 0)
- t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
- else if (keySize > 6 && rem == 4)
+ if (m == 0)
+ {
+ t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
+ rcon <<= 1;
+ if (rcon & 0x100)
+ rcon = 0x1b;
+ m = keySize;
+ }
+ else if (m == 4 && keySize > 6)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
- w[i] = w[i - keySize] ^ t;
+ m--;
+ t ^= w[0];
+ w[keySize] = t;
}
+ while (++w != wLim);
}
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
@@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */
+// MY_FORCE_INLINE
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
FT4(0); FT4(1); FT4(2); FT4(3);
}
+MY_FORCE_INLINE
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -294,12 +363,12 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN
- *((UInt32 *)data) ^= t;
+ *((UInt32 *)(void *)data) ^= t;
#else
- data[0] ^= (t & 0xFF);
- data[1] ^= ((t >> 8) & 0xFF);
- data[2] ^= ((t >> 16) & 0xFF);
- data[3] ^= ((t >> 24));
+ data[0] = (Byte)(data[0] ^ (t & 0xFF));
+ data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF));
+ data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF));
+ data[3] = (Byte)(data[3] ^ ((t >> 24)));
#endif
}
}
diff --git a/multiarc/src/formats/7z/C/Aes.h b/multiarc/src/formats/7z/C/Aes.h
index 64979b5b..2aa22564 100644..100755
--- a/multiarc/src/formats/7z/C/Aes.h
+++ b/multiarc/src/formats/7z/C/Aes.h
@@ -1,5 +1,5 @@
/* Aes.h -- AES encryption / decryption
-2013-01-18 : Igor Pavlov : Public domain */
+2018-04-28 : Igor Pavlov : Public domain */
#ifndef __AES_H
#define __AES_H
@@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize)
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
+
/* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
-extern AES_CODE_FUNC g_AesCbc_Encode;
+
extern AES_CODE_FUNC g_AesCbc_Decode;
+#ifndef _SFX
+extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCtr_Code;
+#define k_Aes_SupportedFunctions_HW (1 << 2)
+#define k_Aes_SupportedFunctions_HW_256 (1 << 3)
+extern UInt32 g_Aes_SupportedFunctions_Flags;
+#endif
+
+
+#define DECLARE__AES_CODE_FUNC(funcName) \
+ void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode)
+DECLARE__AES_CODE_FUNC (AesCtr_Code)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
+
+DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
+DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/AesOpt.c b/multiarc/src/formats/7z/C/AesOpt.c
index 9571c467..8be8ff69 100644..100755
--- a/multiarc/src/formats/7z/C/AesOpt.c
+++ b/multiarc/src/formats/7z/C/AesOpt.c
@@ -1,184 +1,776 @@
-/* AesOpt.c -- Intel's AES
-2017-06-08 : Igor Pavlov : Public domain */
+/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
-#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
-#define USE_INTEL_AES
+
+ #if defined(__clang__)
+ #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
+ #define USE_INTEL_AES
+ #define ATTRIB_AES __attribute__((__target__("aes")))
+ #if (__clang_major__ >= 8)
+ #define USE_INTEL_VAES
+ #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+ #endif
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+ #define USE_INTEL_AES
+ #ifndef __AES__
+ #define ATTRIB_AES __attribute__((__target__("aes")))
+ #endif
+ #if (__GNUC__ >= 8)
+ #define USE_INTEL_VAES
+ #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1110)
+ #define USE_INTEL_AES
+ #if (__INTEL_COMPILER >= 1900)
+ #define USE_INTEL_VAES
+ #endif
+ #endif
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
+ #define USE_INTEL_AES
+ #if (_MSC_VER >= 1910)
+ #define USE_INTEL_VAES
+ #endif
+ #endif
+ #endif
+
+#ifndef ATTRIB_AES
+ #define ATTRIB_AES
#endif
+#ifndef ATTRIB_VAES
+ #define ATTRIB_VAES
#endif
+
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
-void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#ifndef USE_INTEL_VAES
+#define AES_TYPE_keys __m128i
+#define AES_TYPE_data __m128i
+#endif
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src) dest = op(dest, src);
+#define MM_OP_m(op, src) MM_OP(op, m, src);
+
+#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src);
+#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
{
__m128i m = *p;
+ const __m128i k0 = p[2];
+ const __m128i k1 = p[3];
+ const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
for (; numBlocks != 0; numBlocks--, data++)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p + 3;
- m = _mm_xor_si128(m, *data);
- m = _mm_xor_si128(m, p[2]);
+ UInt32 r = numRounds2;
+ const __m128i *w = p + 4;
+ __m128i temp = *data;
+ MM_XOR (temp, k0);
+ MM_XOR (m, temp);
+ MM_OP_m (_mm_aesenc_si128, k1);
do
{
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
+ while (--r);
+ MM_OP_m (_mm_aesenclast_si128, w[0]);
*data = m;
}
*p = m;
}
-#define NUM_WAYS 3
-#define AES_OP_W(op, n) { \
- const __m128i t = w[n]; \
- m0 = op(m0, t); \
- m1 = op(m1, t); \
- m2 = op(m2, t); \
- }
+#define WOP_1(op)
+#define WOP_2(op) WOP_1 (op) op (m1, 1);
+#define WOP_3(op) WOP_2 (op) op (m2, 2);
+#define WOP_4(op) WOP_3 (op) op (m3, 3);
+#ifdef MY_CPU_AMD64
+#define WOP_5(op) WOP_4 (op) op (m4, 4);
+#define WOP_6(op) WOP_5 (op) op (m5, 5);
+#define WOP_7(op) WOP_6 (op) op (m6, 6);
+#define WOP_8(op) WOP_7 (op) op (m7, 7);
+#endif
+/*
+#define WOP_9(op) WOP_8 (op) op (m8, 8);
+#define WOP_10(op) WOP_9 (op) op (m9, 9);
+#define WOP_11(op) WOP_10(op) op (m10, 10);
+#define WOP_12(op) WOP_11(op) op (m11, 11);
+#define WOP_13(op) WOP_12(op) op (m12, 12);
+#define WOP_14(op) WOP_13(op) op (m13, 13);
+*/
+
+#ifdef MY_CPU_AMD64
+ #define NUM_WAYS 8
+ #define WOP_M1 WOP_8
+#else
+ #define NUM_WAYS 4
+ #define WOP_M1 WOP_4
+#endif
+
+#define WOP(op) op (m0, 0); WOP_M1(op)
+
+
+#define DECLARE_VAR(reg, ii) __m128i reg
+#define LOAD_data( reg, ii) reg = data[ii];
+#define STORE_data( reg, ii) data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define AVX__DECLARE_VAR(reg, ii) __m256i reg
+#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
+#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
+#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]));
+
+#define MM_OP_key(op, reg) MM_OP(op, reg, key);
+
+#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
+#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg)
+#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg)
+#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
+#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
+
-#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
-#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
-#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
-#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
+#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
+#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
+#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
+#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
+#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
-void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr;
+#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
+
+#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key);
+#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg);
+
+#define WOP_KEY(op, n) { \
+ const __m128i key = w[n]; \
+ WOP(op); }
+
+#define AVX__WOP_KEY(op, n) { \
+ const __m256i key = w[n]; \
+ WOP(op); }
+
+
+#define WIDE_LOOP_START \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS) \
+ { dataEnd -= NUM_WAYS; do { \
+
+
+#define WIDE_LOOP_END \
+ data += NUM_WAYS; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS; } \
+
+
+#define SINGLE_LOOP \
+ for (; data < dataEnd; data++)
+
+
+#define NUM_AES_KEYS_MAX 15
+
+#define WIDE_LOOP_START_AVX(OP) \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS * 2) \
+ { __m256i keys[NUM_AES_KEYS_MAX]; \
+ UInt32 ii; \
+ OP \
+ for (ii = 0; ii < numRounds; ii++) \
+ keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \
+ dataEnd -= NUM_WAYS * 2; do { \
+
+
+#define WIDE_LOOP_END_AVX(OP) \
+ data += NUM_WAYS * 2; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS * 2; \
+ OP \
+ _mm256_zeroupper(); \
+ } \
+
+/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
+ MSVC still can insert vzeroupper instruction. */
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
{
__m128i iv = *p;
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
+ const __m128i *dataEnd;
+ p += 2;
+
+ WIDE_LOOP_START
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m0, m1, m2;
+ const __m128i *w = wStart;
+
+ WOP (DECLARE_VAR)
+ WOP (LOAD_data);
+ WOP_KEY (AES_XOR, 1)
+
+ do
{
- const __m128i t = w[2];
- m0 = _mm_xor_si128(t, data[0]);
- m1 = _mm_xor_si128(t, data[1]);
- m2 = _mm_xor_si128(t, data[2]);
+ WOP_KEY (AES_DEC, 0)
+ w--;
}
- numRounds2--;
+ while (w != p);
+ WOP_KEY (AES_DEC_LAST, 0)
+
+ MM_XOR (m0, iv);
+ WOP_M1 (XOR_data_M1)
+ iv = data[NUM_WAYS - 1];
+ WOP (STORE_data);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const __m128i *w = wStart - 1;
+ __m128i m = _mm_xor_si128 (w[2], *data);
do
{
- AES_DEC(1)
- AES_DEC(0)
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
- while (--numRounds2 != 0);
- AES_DEC(1)
- AES_DEC_LAST(0)
+ while (w != p);
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdeclast_si128, w[0]);
+ MM_XOR (m, iv);
+ iv = *data;
+ *data = m;
+ }
+
+ p[-2] = iv;
+}
+
+
+AES_FUNC_START2 (AesCtr_Code_HW)
+{
+ __m128i ctr = *p;
+ UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
+ const __m128i *dataEnd;
+ __m128i one = _mm_cvtsi32_si128(1);
+
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const __m128i *w = p;
+ UInt32 r = numRoundsMinus2;
+ WOP (DECLARE_VAR)
+ WOP (CTR_START);
+ WOP_KEY (AES_XOR, 0)
+ w += 1;
+ do
{
- __m128i t;
- t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
- t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
- t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
+ WOP_KEY (AES_ENC, 0)
+ w += 1;
}
+ while (--r);
+ WOP_KEY (AES_ENC_LAST, 0)
+
+ WOP (CTR_END);
}
- for (; numBlocks != 0; numBlocks--, data++)
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
+ const __m128i *w = p;
+ __m128i m;
+ MM_OP (_mm_add_epi64, ctr, one);
+ m = _mm_xor_si128 (ctr, p[0]);
+ w += 1;
+ do
+ {
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
+ w += 2;
+ }
+ while (--numRounds2);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenclast_si128, w[1]);
+ MM_XOR (*data, m);
+ }
+
+ p[-2] = ctr;
+}
+
+
+
+#ifdef USE_INTEL_VAES
+
+#if defined(__clang__) && defined(_MSC_VER)
+#define __SSE4_2__
+#define __AES__
+#define __AVX__
+#define __AVX2__
+#define __VAES__
+#define __AVX512F__
+#define __AVX512VL__
+#endif
+
+#include <immintrin.h>
+
+#define VAES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_VAES \
+AES_FUNC_START (name)
+
+VAES_FUNC_START2 (AesCbc_Decode_HW_256)
+{
+ __m128i iv = *p;
+ const __m128i *dataEnd;
+ UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+ p += 2;
+
+ WIDE_LOOP_START_AVX(;)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m = _mm_xor_si128(w[2], *data);
- numRounds2--;
+ const __m256i *w = keys + numRounds - 2;
+
+ WOP (AVX__DECLARE_VAR)
+ WOP (AVX__LOAD_data);
+ AVX__WOP_KEY (AVX__AES_XOR, 1)
+
do
{
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdec_si128(m, w[0]);
+ AVX__WOP_KEY (AVX__AES_DEC, 0)
+ w--;
+ }
+ while (w != keys);
+ AVX__WOP_KEY (AVX__AES_DEC_LAST, 0)
+
+ AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]));
+ WOP_M1 (AVX__XOR_data_M1)
+ iv = data[NUM_WAYS * 2 - 1];
+ WOP (AVX__STORE_data);
+ }
+ WIDE_LOOP_END_AVX(;)
+
+ SINGLE_LOOP
+ {
+ const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3;
+ __m128i m = _mm_xor_si128 (w[2], *data);
+ do
+ {
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdeclast_si128(m, w[0]);
+ while (w != p);
+ MM_OP_m (_mm_aesdec_si128, w[1]);
+ MM_OP_m (_mm_aesdeclast_si128, w[0]);
- m = _mm_xor_si128(m, iv);
+ MM_XOR (m, iv);
iv = *data;
*data = m;
}
- *p = iv;
+
+ p[-2] = iv;
}
-void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
+
+/*
+SSE2: _mm_cvtsi32_si128 : movd
+AVX: _mm256_setr_m128i : vinsertf128
+AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm
+ _mm256_extracti128_si256 : vextracti128
+ _mm256_broadcastsi128_si256 : vbroadcasti128
+*/
+
+#define AVX__CTR_LOOP_START \
+ ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
+ two = _mm256_setr_m128i(one, one); \
+ two = _mm256_add_epi64(two, two); \
+
+// two = _mm256_setr_epi64x(2, 0, 2, 0);
+
+#define AVX__CTR_LOOP_ENC \
+ ctr = _mm256_extracti128_si256 (ctr2, 1); \
+
+VAES_FUNC_START2 (AesCtr_Code_HW_256)
{
__m128i ctr = *p;
- __m128i one;
- one.m128i_u64[0] = 1;
- one.m128i_u64[1] = 0;
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
+ const __m128i *dataEnd;
+ __m128i one = _mm_cvtsi32_si128(1);
+ __m256i ctr2, two;
+ p += 2;
+
+ WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START)
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p;
- __m128i m0, m1, m2;
- {
- const __m128i t = w[2];
- ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
- }
- w += 3;
+ const __m256i *w = keys;
+ UInt32 r = numRounds - 2;
+ WOP (AVX__DECLARE_VAR)
+ AVX__WOP_KEY (AVX__CTR_START, 0);
+
+ w += 1;
do
{
- AES_ENC(0)
- AES_ENC(1)
- w += 2;
+ AVX__WOP_KEY (AVX__AES_ENC, 0)
+ w += 1;
}
- while (--numRounds2 != 0);
- AES_ENC(0)
- AES_ENC_LAST(1)
- data[0] = _mm_xor_si128(data[0], m0);
- data[1] = _mm_xor_si128(data[1], m1);
- data[2] = _mm_xor_si128(data[2], m2);
+ while (--r);
+ AVX__WOP_KEY (AVX__AES_ENC_LAST, 0)
+
+ WOP (AVX__CTR_END);
}
- for (; numBlocks != 0; numBlocks--, data++)
+ WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC)
+
+ SINGLE_LOOP
{
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+ UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
- ctr = _mm_add_epi64(ctr, one);
- m = _mm_xor_si128(ctr, p[2]);
- w += 3;
+ MM_OP (_mm_add_epi64, ctr, one);
+ m = _mm_xor_si128 (ctr, p[0]);
+ w += 1;
do
{
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
- *data = _mm_xor_si128(*data, m);
+ while (--numRounds2);
+ MM_OP_m (_mm_aesenc_si128, w[0]);
+ MM_OP_m (_mm_aesenclast_si128, w[1]);
+ MM_XOR (*data, m);
}
- *p = ctr;
+
+ p[-2] = ctr;
}
+#endif // USE_INTEL_VAES
+
+#else // USE_INTEL_AES
+
+/* no USE_INTEL_AES */
+
+#pragma message("AES HW_SW stub was used")
+
+#define AES_TYPE_keys UInt32
+#define AES_TYPE_data Byte
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \
+
+#define AES_COMPAT_STUB(name) \
+ AES_FUNC_START(name); \
+ AES_FUNC_START(name ## _HW) \
+ { name(p, data, numBlocks); }
+
+AES_COMPAT_STUB (AesCbc_Encode)
+AES_COMPAT_STUB (AesCbc_Decode)
+AES_COMPAT_STUB (AesCtr_Code)
+
+#endif // USE_INTEL_AES
+
+
+#ifndef USE_INTEL_VAES
+
+#pragma message("VAES HW_SW stub was used")
+
+#define VAES_COMPAT_STUB(name) \
+ void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
+ void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
+ { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
+
+VAES_COMPAT_STUB (AesCbc_Decode_HW)
+VAES_COMPAT_STUB (AesCtr_Code_HW)
+
+#endif // ! USE_INTEL_VAES
+
+
+#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_AES
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_AES
+ #endif
+ #endif
+
+#ifdef USE_HW_AES
+
+// #pragma message("=== AES HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_AES __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#ifndef ATTRIB_AES
+ #define ATTRIB_AES
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
#else
+#include <arm_neon.h>
+#endif
+
+typedef uint8x16_t v128;
+
+#define AES_FUNC_START(name) \
+ void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks)
+
+#define AES_FUNC_START2(name) \
+AES_FUNC_START (name); \
+ATTRIB_AES \
+AES_FUNC_START (name)
+
+#define MM_OP(op, dest, src) dest = op(dest, src);
+#define MM_OP_m(op, src) MM_OP(op, m, src);
+#define MM_OP1_m(op) m = op(m);
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
+#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src);
+#define MM_XOR_m( src) MM_XOR(m, src);
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+#define AES_E_m(k) MM_OP_m (vaeseq_u8, k);
+#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8);
+
+
+AES_FUNC_START2 (AesCbc_Encode_HW)
{
- AesCbc_Encode(p, data, numBlocks);
+ v128 m = *p;
+ const v128 k0 = p[2];
+ const v128 k1 = p[3];
+ const v128 k2 = p[4];
+ const v128 k3 = p[5];
+ const v128 k4 = p[6];
+ const v128 k5 = p[7];
+ const v128 k6 = p[8];
+ const v128 k7 = p[9];
+ const v128 k8 = p[10];
+ const v128 k9 = p[11];
+ const UInt32 numRounds2 = *(const UInt32 *)(p + 1);
+ const v128 *w = p + ((size_t)numRounds2 * 2);
+ const v128 k_z1 = w[1];
+ const v128 k_z0 = w[2];
+ for (; numBlocks != 0; numBlocks--, data++)
+ {
+ MM_XOR_m (*data);
+ AES_E_MC_m (k0)
+ AES_E_MC_m (k1)
+ AES_E_MC_m (k2)
+ AES_E_MC_m (k3)
+ AES_E_MC_m (k4)
+ AES_E_MC_m (k5)
+ AES_E_MC_m (k6)
+ AES_E_MC_m (k7)
+ AES_E_MC_m (k8)
+ if (numRounds2 >= 6)
+ {
+ AES_E_MC_m (k9)
+ AES_E_MC_m (p[12])
+ if (numRounds2 != 6)
+ {
+ AES_E_MC_m (p[13])
+ AES_E_MC_m (p[14])
+ }
+ }
+ AES_E_m (k_z1);
+ MM_XOR_m (k_z0);
+ *data = m;
+ }
+ *p = m;
}
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+#define WOP_1(op)
+#define WOP_2(op) WOP_1 (op) op (m1, 1);
+#define WOP_3(op) WOP_2 (op) op (m2, 2);
+#define WOP_4(op) WOP_3 (op) op (m3, 3);
+#define WOP_5(op) WOP_4 (op) op (m4, 4);
+#define WOP_6(op) WOP_5 (op) op (m5, 5);
+#define WOP_7(op) WOP_6 (op) op (m6, 6);
+#define WOP_8(op) WOP_7 (op) op (m7, 7);
+
+ #define NUM_WAYS 8
+ #define WOP_M1 WOP_8
+
+#define WOP(op) op (m0, 0); WOP_M1(op)
+
+#define DECLARE_VAR(reg, ii) v128 reg
+#define LOAD_data( reg, ii) reg = data[ii];
+#define STORE_data( reg, ii) data[ii] = reg;
+#if (NUM_WAYS > 1)
+#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
+#endif
+
+#define MM_OP_key(op, reg) MM_OP (op, reg, key);
+
+#define AES_D_m(k) MM_OP_m (vaesdq_u8, k);
+#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8);
+
+#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg)
+#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg)
+#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg)
+
+#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg)
+#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg)
+
+#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr);
+#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
+
+#define WOP_KEY(op, n) { \
+ const v128 key = w[n]; \
+ WOP(op); }
+
+#define WIDE_LOOP_START \
+ dataEnd = data + numBlocks; \
+ if (numBlocks >= NUM_WAYS) \
+ { dataEnd -= NUM_WAYS; do { \
+
+#define WIDE_LOOP_END \
+ data += NUM_WAYS; \
+ } while (data <= dataEnd); \
+ dataEnd += NUM_WAYS; } \
+
+#define SINGLE_LOOP \
+ for (; data < dataEnd; data++)
+
+
+AES_FUNC_START2 (AesCbc_Decode_HW)
{
- AesCbc_Decode(p, data, numBlocks);
+ v128 iv = *p;
+ const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+ const v128 *dataEnd;
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const v128 *w = wStart;
+ WOP (DECLARE_VAR)
+ WOP (LOAD_data);
+ WOP_KEY (AES_D_IMC, 2)
+ do
+ {
+ WOP_KEY (AES_D_IMC, 1)
+ WOP_KEY (AES_D_IMC, 0)
+ w -= 2;
+ }
+ while (w != p);
+ WOP_KEY (AES_D, 1)
+ WOP_KEY (AES_XOR, 0)
+ MM_XOR (m0, iv);
+ WOP_M1 (XOR_data_M1)
+ iv = data[NUM_WAYS - 1];
+ WOP (STORE_data);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const v128 *w = wStart;
+ v128 m = *data;
+ AES_D_IMC_m (w[2])
+ do
+ {
+ AES_D_IMC_m (w[1]);
+ AES_D_IMC_m (w[0]);
+ w -= 2;
+ }
+ while (w != p);
+ AES_D_m (w[1]);
+ MM_XOR_m (w[0]);
+ MM_XOR_m (iv);
+ iv = *data;
+ *data = m;
+ }
+
+ p[-2] = iv;
}
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+
+AES_FUNC_START2 (AesCtr_Code_HW)
{
- AesCtr_Code(p, data, numBlocks);
+ uint64x2_t ctr = vreinterpretq_u64_u8(*p);
+ const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
+ const v128 *dataEnd;
+ uint64x2_t one = vdupq_n_u64(0);
+ one = vsetq_lane_u64(1, one, 0);
+ p += 2;
+
+ WIDE_LOOP_START
+ {
+ const v128 *w = p;
+ WOP (DECLARE_VAR)
+ WOP (CTR_START);
+ do
+ {
+ WOP_KEY (AES_E_MC, 0)
+ WOP_KEY (AES_E_MC, 1)
+ w += 2;
+ }
+ while (w != wEnd);
+ WOP_KEY (AES_E_MC, 0)
+ WOP_KEY (AES_E, 1)
+ WOP_KEY (AES_XOR, 2)
+ WOP (CTR_END);
+ }
+ WIDE_LOOP_END
+
+ SINGLE_LOOP
+ {
+ const v128 *w = p;
+ v128 m;
+ CTR_START (m, 0);
+ do
+ {
+ AES_E_MC_m (w[0]);
+ AES_E_MC_m (w[1]);
+ w += 2;
+ }
+ while (w != wEnd);
+ AES_E_MC_m (w[0]);
+ AES_E_m (w[1]);
+ MM_XOR_m (w[2]);
+ CTR_END (m, 0);
+ }
+
+ p[-2] = vreinterpretq_u8_u64(ctr);
}
-#endif
+#endif // USE_HW_AES
+
+#endif // MY_CPU_ARM_OR_ARM64
diff --git a/multiarc/src/formats/7z/C/Alloc.c b/multiarc/src/formats/7z/C/Alloc.c
index bcede4b8..d1af76c5 100644..100755
--- a/multiarc/src/formats/7z/C/Alloc.c
+++ b/multiarc/src/formats/7z/C/Alloc.c
@@ -1,12 +1,12 @@
/* Alloc.c -- Memory allocation functions
-2018-04-27 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
#endif
#include <stdlib.h>
@@ -122,7 +122,6 @@ static void PrintAddr(void *p)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
-#define PrintDec(v, align)
#define PrintAddr(p)
#endif
@@ -133,10 +132,11 @@ void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
+ PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
- PRINT_ALLOC("Alloc ", g_allocCount, size, p);
+ // PRINT_ALLOC("Alloc ", g_allocCount, size, p);
return p;
}
#else
@@ -172,14 +172,20 @@ void MidFree(void *address)
VirtualFree(address, 0, MEM_RELEASE);
}
-#ifndef MEM_LARGE_PAGES
-#undef _7ZIP_LARGE_PAGES
+#ifdef _7ZIP_LARGE_PAGES
+
+#ifdef MEM_LARGE_PAGES
+ #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
+#else
+ #define MY__MEM_LARGE_PAGES 0x20000000
#endif
-#ifdef _7ZIP_LARGE_PAGES
+extern
+SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0;
-typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
-#endif
+typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
+
+#endif // _7ZIP_LARGE_PAGES
void SetLargePageSize()
{
@@ -214,7 +220,7 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
- void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+ void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res)
return res;
}
@@ -241,14 +247,14 @@ static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
-const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
-
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
-
+#endif
/*
uintptr_t : <stdint.h> C99 (optional)
@@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
-#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
-
-#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
+#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#define USE_posix_memalign
#endif
+#ifndef USE_posix_memalign
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+#endif
+
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),
diff --git a/multiarc/src/formats/7z/C/Alloc.h b/multiarc/src/formats/7z/C/Alloc.h
index 64823764..3be2041e 100644..100755
--- a/multiarc/src/formats/7z/C/Alloc.h
+++ b/multiarc/src/formats/7z/C/Alloc.h
@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
-2018-02-19 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
@@ -13,7 +13,7 @@ void MyFree(void *address);
#ifdef _WIN32
-void SetLargePageSize();
+void SetLargePageSize(void);
void *MidAlloc(size_t size);
void MidFree(void *address);
@@ -30,8 +30,15 @@ void BigFree(void *address);
#endif
extern const ISzAlloc g_Alloc;
+
+#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
+#else
+#define g_BigAlloc g_AlignedAlloc
+#define g_MidAlloc g_AlignedAlloc
+#endif
+
extern const ISzAlloc g_AlignedAlloc;
diff --git a/multiarc/src/formats/7z/C/Bcj2.c b/multiarc/src/formats/7z/C/Bcj2.c
index 9a0046a6..c7b95670 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2.c
+++ b/multiarc/src/formats/7z/C/Bcj2.c
@@ -1,5 +1,5 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
-2018-04-28 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
- SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
+ SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{
@@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
- num = p->destLim - dest;
+ num = (SizeT)(p->destLim - dest);
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
@@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
break;
}
- num = src - p->bufs[BCJ2_STREAM_MAIN];
+ num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
@@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
p->ip += 4;
val -= p->ip;
dest = p->dest;
- rem = p->destLim - dest;
+ rem = (SizeT)(p->destLim - dest);
if (rem < 4)
{
diff --git a/multiarc/src/formats/7z/C/Bcj2.h b/multiarc/src/formats/7z/C/Bcj2.h
index 8824080a..8824080a 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2.h
+++ b/multiarc/src/formats/7z/C/Bcj2.h
diff --git a/multiarc/src/formats/7z/C/Bcj2Enc.c b/multiarc/src/formats/7z/C/Bcj2Enc.c
index bfbeb8e4..682362a1 100644..100755
--- a/multiarc/src/formats/7z/C/Bcj2Enc.c
+++ b/multiarc/src/formats/7z/C/Bcj2Enc.c
@@ -1,5 +1,5 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
const Byte *src = p->src;
const Byte *srcLim;
Byte *dest;
- SizeT num = p->srcLim - src;
+ SizeT num = (SizeT)(p->srcLim - src);
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
@@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
dest = p->bufs[BCJ2_STREAM_MAIN];
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
{
- num = p->lims[BCJ2_STREAM_MAIN] - dest;
+ num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
@@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
break;
}
- num = src - p->src;
+ num = (SizeT)(src - p->src);
if (src == srcLim)
{
diff --git a/multiarc/src/formats/7z/C/Blake2.h b/multiarc/src/formats/7z/C/Blake2.h
index 14f3cb64..14f3cb64 100644..100755
--- a/multiarc/src/formats/7z/C/Blake2.h
+++ b/multiarc/src/formats/7z/C/Blake2.h
diff --git a/multiarc/src/formats/7z/C/Blake2s.c b/multiarc/src/formats/7z/C/Blake2s.c
index 6527415e..3c56a8b8 100644..100755
--- a/multiarc/src/formats/7z/C/Blake2s.c
+++ b/multiarc/src/formats/7z/C/Blake2s.c
@@ -1,5 +1,5 @@
/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash
-2015-06-30 : Igor Pavlov : Public domain
+2021-02-09 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#include <string.h>
@@ -34,7 +34,7 @@ static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] =
};
-void Blake2s_Init0(CBlake2s *p)
+static void Blake2s_Init0(CBlake2s *p)
{
unsigned i;
for (i = 0; i < 8; i++)
diff --git a/multiarc/src/formats/7z/C/Bra.c b/multiarc/src/formats/7z/C/Bra.c
index aed17e33..3b854d9c 100644..100755
--- a/multiarc/src/formats/7z/C/Bra.c
+++ b/multiarc/src/formats/7z/C/Bra.c
@@ -1,5 +1,5 @@
/* Bra.c -- Converters for RISC code
-2017-04-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
- return p - data;
+ return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
- return p - data;
+ return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
@@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
- return p - data;
+ return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;
diff --git a/multiarc/src/formats/7z/C/Bra.h b/multiarc/src/formats/7z/C/Bra.h
index 855e37a6..855e37a6 100644..100755
--- a/multiarc/src/formats/7z/C/Bra.h
+++ b/multiarc/src/formats/7z/C/Bra.h
diff --git a/multiarc/src/formats/7z/C/Bra86.c b/multiarc/src/formats/7z/C/Bra86.c
index 93ed4d76..10a0fbd1 100644..100755
--- a/multiarc/src/formats/7z/C/Bra86.c
+++ b/multiarc/src/formats/7z/C/Bra86.c
@@ -1,5 +1,5 @@
/* Bra86.c -- Converter for x86 code (BCJ)
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding
break;
{
- SizeT d = (SizeT)(p - data - pos);
+ SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{
diff --git a/multiarc/src/formats/7z/C/BraIA64.c b/multiarc/src/formats/7z/C/BraIA64.c
index d1dbc62c..d1dbc62c 100644..100755
--- a/multiarc/src/formats/7z/C/BraIA64.c
+++ b/multiarc/src/formats/7z/C/BraIA64.c
diff --git a/multiarc/src/formats/7z/C/BwtSort.c b/multiarc/src/formats/7z/C/BwtSort.c
index cc2f4b29..3eb57efa 100644..100755
--- a/multiarc/src/formats/7z/C/BwtSort.c
+++ b/multiarc/src/formats/7z/C/BwtSort.c
@@ -1,5 +1,5 @@
/* BwtSort.c -- BWT block sorting
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -60,7 +60,7 @@ SortGroup - is recursive Range-Sort function with HeapSort optimization for smal
returns: 1 - if there are groups, 0 - no more groups
*/
-UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
+static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
@@ -116,7 +116,7 @@ UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 group
}
HeapSort(temp, groupSize);
- mask = ((1 << NumRefBits) - 1);
+ mask = (((UInt32)1 << NumRefBits) - 1);
thereAreGroups = 0;
group = groupOffset;
diff --git a/multiarc/src/formats/7z/C/BwtSort.h b/multiarc/src/formats/7z/C/BwtSort.h
index 7e989a99..7e989a99 100644..100755
--- a/multiarc/src/formats/7z/C/BwtSort.h
+++ b/multiarc/src/formats/7z/C/BwtSort.h
diff --git a/multiarc/src/formats/7z/C/Compiler.h b/multiarc/src/formats/7z/C/Compiler.h
index 0cc409d8..a9816fa5 100644..100755
--- a/multiarc/src/formats/7z/C/Compiler.h
+++ b/multiarc/src/formats/7z/C/Compiler.h
@@ -1,9 +1,13 @@
/* Compiler.h
-2017-04-03 : Igor Pavlov : Public domain */
+2021-01-05 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
+ #ifdef __clang__
+ #pragma clang diagnostic ignored "-Wunused-private-field"
+ #endif
+
#ifdef _MSC_VER
#ifdef UNDER_CE
@@ -25,6 +29,12 @@
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
+ #ifdef __clang__
+ #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
+ // #pragma clang diagnostic ignored "-Wreserved-id-macro"
+ #endif
+
#endif
#define UNUSED_VAR(x) (void)x;
diff --git a/multiarc/src/formats/7z/C/CpuArch.c b/multiarc/src/formats/7z/C/CpuArch.c
index 02e482e0..fa9afe39 100644..100755
--- a/multiarc/src/formats/7z/C/CpuArch.c
+++ b/multiarc/src/formats/7z/C/CpuArch.c
@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
-2018-02-18: Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag)
#define CHECK_CPUID_IS_SUPPORTED
#endif
+#ifndef USE_ASM
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1600
+ #define MY__cpuidex __cpuidex
+ #else
+
+/*
+ __cpuid (function == 4) requires subfunction number in ECX.
+ MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+ __cpuid() in new MSVC clears ECX.
+ __cpuid() in old MSVC (14.00) doesn't clear ECX
+ We still can use __cpuid for low (function) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FAST_CALL / NO_INLINE function,
+ So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+
+ DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
+*/
+
+static
+MY_NO_INLINE
+void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
+{
+ UNUSED_VAR(subFunction);
+ __cpuid(CPUInfo, function);
+}
+
+ #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
+ #pragma message("======== MY__cpuidex_HACK WAS USED ========")
+ #endif
+ #else
+ #define MY__cpuidex(info, func, func2) __cpuid(info, func)
+ #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
+ #endif
+#endif
+
+
+
+
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
@@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
#endif
"=c" (*c) ,
"=d" (*d)
- : "0" (function)) ;
+ : "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
- __cpuid(CPUInfo, function);
- *a = CPUInfo[0];
- *b = CPUInfo[1];
- *c = CPUInfo[2];
- *d = CPUInfo[3];
+
+ MY__cpuidex(CPUInfo, (int)function, 0);
+
+ *a = (UInt32)CPUInfo[0];
+ *b = (UInt32)CPUInfo[1];
+ *c = (UInt32)CPUInfo[2];
+ *d = (UInt32)CPUInfo[3];
#endif
}
@@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder()
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
-#include <windows.h>
+#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
@@ -188,13 +231,101 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#define CHECK_SYS_SSE_SUPPORT
#endif
-BoolInt CPU_Is_Aes_Supported()
+
+static UInt32 X86_CPUID_ECX_Get_Flags()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_CheckAndRead(&p))
+ return 0;
+ return p.c;
+}
+
+BoolInt CPU_IsSupported_AES()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41()
+{
+ return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+ if (!x86cpuid_CheckAndRead(&p))
+ return False;
+
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ return (d[1] >> 29) & 1;
+ }
+}
+
+// #include <stdio.h>
+
+#ifdef _WIN32
+#include <Windows.h>
+#endif
+
+BoolInt CPU_IsSupported_AVX2()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+
+ #ifdef _WIN32
+ #define MY__PF_XSAVE_ENABLED 17
+ if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+ return False;
+ #endif
+
+ if (!x86cpuid_CheckAndRead(&p))
+ return False;
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5); // avx2
+ }
+}
+
+BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
+
+ #ifdef _WIN32
+ #define MY__PF_XSAVE_ENABLED 17
+ if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+ return False;
+ #endif
+
if (!x86cpuid_CheckAndRead(&p))
return False;
- return (p.c >> 25) & 1;
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5) // avx2
+ // & (d[1] >> 31) // avx512vl
+ & (d[2] >> 9); // vaes // VEX-256/EVEX
+ }
}
BoolInt CPU_IsSupported_PageGB()
@@ -215,4 +346,133 @@ BoolInt CPU_IsSupported_PageGB()
}
}
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+
+#else
+
+#if defined(__APPLE__)
+
+/*
+#include <stdio.h>
+#include <string.h>
+static void Print_sysctlbyname(const char *name)
+{
+ size_t bufSize = 256;
+ char buf[256];
+ int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
+ {
+ int i;
+ printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
+ for (i = 0; i < 20; i++)
+ printf(" %2x", (unsigned)(Byte)buf[i]);
+
+ }
+}
+*/
+
+static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
+{
+ UInt32 val = 0;
+ if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+ return 1;
+ return 0;
+}
+
+ /*
+ Print_sysctlbyname("hw.pagesize");
+ Print_sysctlbyname("machdep.cpu.brand_string");
+ */
+
+BoolInt CPU_IsSupported_CRC32(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+}
+
+BoolInt CPU_IsSupported_NEON(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
+}
+
+#ifdef MY_CPU_ARM64
+#define APPLE_CRYPTO_SUPPORT_VAL 1
+#else
+#define APPLE_CRYPTO_SUPPORT_VAL 0
+#endif
+
+BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+
+
+#else // __APPLE__
+
+#include <sys/auxv.h>
+
+#define USE_HWCAP
+
+#ifdef USE_HWCAP
+
+#include <asm/hwcap.h>
+
+ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+ BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
+
+#ifdef MY_CPU_ARM64
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ MY_HWCAP_CHECK_FUNC_2(name, name)
+ MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+// MY_HWCAP_CHECK_FUNC (ASIMD)
+#elif defined(MY_CPU_ARM)
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
+ MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
+#endif
+
+#else // USE_HWCAP
+
+ #define MY_HWCAP_CHECK_FUNC(name) \
+ BoolInt CPU_IsSupported_ ## name() { return 0; }
+ MY_HWCAP_CHECK_FUNC(NEON)
+
+#endif // USE_HWCAP
+
+MY_HWCAP_CHECK_FUNC (CRC32)
+MY_HWCAP_CHECK_FUNC (SHA1)
+MY_HWCAP_CHECK_FUNC (SHA2)
+MY_HWCAP_CHECK_FUNC (AES)
+
+#endif // __APPLE__
+#endif // _WIN32
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+
+#ifdef __APPLE__
+
+#include <sys/sysctl.h>
+
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+{
+ return sysctlbyname(name, buf, bufSize, NULL, 0);
+}
+
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+{
+ size_t bufSize = sizeof(*val);
+ int res = My_sysctlbyname_Get(name, val, &bufSize);
+ if (res == 0 && bufSize != sizeof(*val))
+ return EFAULT;
+ return res;
+}
+
#endif
diff --git a/multiarc/src/formats/7z/C/CpuArch.h b/multiarc/src/formats/7z/C/CpuArch.h
index bd429388..4856fbb1 100644..100755
--- a/multiarc/src/formats/7z/C/CpuArch.h
+++ b/multiarc/src/formats/7z/C/CpuArch.h
@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
-2018-02-18 : Igor Pavlov : Public domain */
+2022-07-15 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+
+MY_CPU_64BIT means that processor can work with 64-bit registers.
+ MY_CPU_64BIT can be used to select fast code branch
+ MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
@@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
+ #define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
+ #define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
@@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
- #define MY_CPU_32BIT
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
- #define MY_CPU_NAME "arm"
- #define MY_CPU_32BIT
+
+ #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+ #define MY_CPU_NAME "armt"
+ #else
+ #define MY_CPU_NAME "arm"
+ #endif
+ /* #define MY_CPU_32BIT */
+ #define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -84,26 +97,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#if defined(__ppc64__) \
- || defined(__powerpc64__)
+ || defined(__powerpc64__) \
+ || defined(__ppc__) \
+ || defined(__powerpc__) \
+ || defined(__PPC__) \
+ || defined(_POWER)
+
+#if defined(__ppc64__) \
+ || defined(__powerpc64__) \
+ || defined(_LP64) \
+ || defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
+ #define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
+ #define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
-#elif defined(__ppc__) \
- || defined(__powerpc__)
+#else
#define MY_CPU_NAME "ppc"
- #define MY_CPU_32BIT
+ #define MY_CPU_SIZEOF_POINTER 4
+ /* #define MY_CPU_32BIT */
+#endif
#endif
-#if defined(__sparc64__)
- #define MY_CPU_NAME "sparc64"
- #define MY_CPU_64BIT
-#elif defined(__sparc__)
- #define MY_CPU_NAME "sparc"
- /* #define MY_CPU_32BIT */
+#if defined(__riscv) \
+ || defined(__riscv__)
+ #if __riscv_xlen == 32
+ #define MY_CPU_NAME "riscv32"
+ #elif __riscv_xlen == 64
+ #define MY_CPU_NAME "riscv64"
+ #else
+ #define MY_CPU_NAME "riscv"
+ #endif
#endif
@@ -111,6 +139,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_X86_OR_AMD64
#endif
+#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
+#define MY_CPU_ARM_OR_ARM64
+#endif
+
#ifdef _WIN32
@@ -170,6 +202,40 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#error Stop_Compiling_Bad_32_64_BIT
#endif
+#ifdef __SIZEOF_POINTER__
+ #ifdef MY_CPU_SIZEOF_POINTER
+ #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+ #endif
+ #else
+ #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
+ #endif
+#endif
+
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+#if defined (_LP64)
+ #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
+#endif
+#endif
+
+#ifdef _MSC_VER
+ #if _MSC_VER >= 1300
+ #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
+ #define MY_CPU_pragma_pop __pragma(pack(pop))
+ #else
+ #define MY_CPU_pragma_pack_push_1
+ #define MY_CPU_pragma_pop
+ #endif
+#else
+ #ifdef __xlC__
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
+ #define MY_CPU_pragma_pop _Pragma("pack()")
+ #else
+ #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
+ #define MY_CPU_pragma_pop _Pragma("pack(pop)")
+ #endif
+#endif
+
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
@@ -189,8 +255,12 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
- || defined(MY_CPU_ARM64) \
- || defined(__ARM_FEATURE_UNALIGNED)
+ || defined(MY_CPU_ARM64)
+ #define MY_CPU_LE_UNALIGN
+ #define MY_CPU_LE_UNALIGN_64
+ #elif defined(__ARM_FEATURE_UNALIGNED)
+ /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
+ So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
@@ -200,11 +270,15 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#endif
-#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
-#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
-#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#ifdef MY_CPU_LE_UNALIGN_64
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+#endif
#else
@@ -218,8 +292,6 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
-#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
-
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
@@ -230,19 +302,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
+#endif
+
+
+#ifndef MY_CPU_LE_UNALIGN_64
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
+
+
+
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
#define MY__has_builtin(x) 0
#endif
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
@@ -253,8 +335,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
@@ -262,9 +344,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
@@ -325,10 +407,37 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
-BoolInt CPU_Is_InOrder();
-BoolInt CPU_Is_Aes_Supported();
-BoolInt CPU_IsSupported_PageGB();
+BoolInt CPU_Is_InOrder(void);
+
+BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX2(void);
+BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_SSSE3(void);
+BoolInt CPU_IsSupported_SSE41(void);
+BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_PageGB(void);
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+BoolInt CPU_IsSupported_CRC32(void);
+BoolInt CPU_IsSupported_NEON(void);
+
+#if defined(_WIN32)
+BoolInt CPU_IsSupported_CRYPTO(void);
+#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
+#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
+#else
+BoolInt CPU_IsSupported_SHA1(void);
+BoolInt CPU_IsSupported_SHA2(void);
+BoolInt CPU_IsSupported_AES(void);
+#endif
+
+#endif
+#if defined(__APPLE__)
+int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Delta.c b/multiarc/src/formats/7z/C/Delta.c
index e3edd21e..c4a4499f 100644..100755
--- a/multiarc/src/formats/7z/C/Delta.c
+++ b/multiarc/src/formats/7z/C/Delta.c
@@ -1,5 +1,5 @@
/* Delta.c -- Delta converter
-2009-05-26 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,53 +12,158 @@ void Delta_Init(Byte *state)
state[i] = 0;
}
-static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
-{
- unsigned i;
- for (i = 0; i < size; i++)
- dest[i] = src[i];
-}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
+ Byte temp[DELTA_STATE_SIZE];
+
+ if (size == 0)
+ return;
+
+ {
+ unsigned i = 0;
+ do
+ temp[i] = state[i];
+ while (++i != delta);
+ }
+
+ if (size <= delta)
+ {
+ unsigned i = 0, k;
+ do
+ {
+ Byte b = *data;
+ *data++ = (Byte)(b - temp[i]);
+ temp[i] = b;
+ }
+ while (++i != size);
+
+ k = 0;
+
+ do
+ {
+ if (i == delta)
+ i = 0;
+ state[k] = temp[i++];
+ }
+ while (++k != delta);
+
+ return;
+ }
+
{
- SizeT i;
- for (i = 0; i < size;)
+ Byte *p = data + size - delta;
+ {
+ unsigned i = 0;
+ do
+ state[i] = *p++;
+ while (++i != delta);
+ }
{
- for (j = 0; j < delta && i < size; i++, j++)
+ const Byte *lim = data + delta;
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+
+ if (((ptrdiff_t)size + dif) & 1)
{
- Byte b = data[i];
- data[i] = (Byte)(b - buf[j]);
- buf[j] = b;
+ --p; *p = (Byte)(*p - p[dif]);
}
+
+ while (p != lim)
+ {
+ --p; *p = (Byte)(*p - p[dif]);
+ --p; *p = (Byte)(*p - p[dif]);
+ }
+
+ dif = -dif;
+
+ do
+ {
+ --p; *p = (Byte)(*p - temp[--dif]);
+ }
+ while (dif != 0);
}
}
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
}
+
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
- Byte buf[DELTA_STATE_SIZE];
- unsigned j = 0;
- MyMemCpy(buf, state, delta);
+ unsigned i;
+ const Byte *lim;
+
+ if (size == 0)
+ return;
+
+ i = 0;
+ lim = data + size;
+
+ if (size <= delta)
+ {
+ do
+ *data = (Byte)(*data + state[i++]);
+ while (++data != lim);
+
+ for (; delta != i; state++, delta--)
+ *state = state[i];
+ data -= i;
+ }
+ else
{
- SizeT i;
- for (i = 0; i < size;)
+ /*
+ #define B(n) b ## n
+ #define I(n) Byte B(n) = state[n];
+ #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
+ #define F(n) if (data != lim) { U(n) }
+
+ if (delta == 1)
+ {
+ I(0)
+ if ((lim - data) & 1) { U(0) }
+ while (data != lim) { U(0) U(0) }
+ data -= 1;
+ }
+ else if (delta == 2)
{
- for (j = 0; j < delta && i < size; i++, j++)
+ I(0) I(1)
+ lim -= 1; while (data < lim) { U(0) U(1) }
+ lim += 1; F(0)
+ data -= 2;
+ }
+ else if (delta == 3)
+ {
+ I(0) I(1) I(2)
+ lim -= 2; while (data < lim) { U(0) U(1) U(2) }
+ lim += 2; F(0) F(1)
+ data -= 3;
+ }
+ else if (delta == 4)
+ {
+ I(0) I(1) I(2) I(3)
+ lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
+ lim += 3; F(0) F(1) F(2)
+ data -= 4;
+ }
+ else
+ */
+ {
+ do
+ {
+ *data = (Byte)(*data + state[i++]);
+ data++;
+ }
+ while (i != delta);
+
{
- buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+ ptrdiff_t dif = -(ptrdiff_t)delta;
+ do
+ *data = (Byte)(*data + data[dif]);
+ while (++data != lim);
+ data += dif;
}
}
}
- if (j == delta)
- j = 0;
- MyMemCpy(state, buf + j, delta - j);
- MyMemCpy(state + delta - j, buf, j);
+
+ do
+ *state++ = *data;
+ while (++data != lim);
}
diff --git a/multiarc/src/formats/7z/C/Delta.h b/multiarc/src/formats/7z/C/Delta.h
index 2fa54ad6..2fa54ad6 100644..100755
--- a/multiarc/src/formats/7z/C/Delta.h
+++ b/multiarc/src/formats/7z/C/Delta.h
diff --git a/multiarc/src/formats/7z/C/DllSecur.c b/multiarc/src/formats/7z/C/DllSecur.c
index 5ea108ab..dce0c96c 100644..100755
--- a/multiarc/src/formats/7z/C/DllSecur.c
+++ b/multiarc/src/formats/7z/C/DllSecur.c
@@ -1,16 +1,20 @@
/* DllSecur.c -- DLL loading security
-2018-02-21 : Igor Pavlov : Public domain */
+2022-07-15 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
#include "DllSecur.h"
#ifndef UNDER_CE
+#if defined(__GNUC__) && (__GNUC__ >= 8)
+ #pragma GCC diagnostic ignored "-Wcast-function-type"
+#endif
+
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
@@ -33,17 +37,19 @@ static const char * const g_Dlls =
#endif
+// #define MY_CAST_FUNC (void(*)())
+#define MY_CAST_FUNC
+
void My_SetDefaultDllDirectories()
{
#ifndef UNDER_CE
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
- GetVersionEx(&vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+ MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
@@ -66,7 +72,7 @@ void LoadSecurityDlls()
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
- GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+ MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
diff --git a/multiarc/src/formats/7z/C/DllSecur.h b/multiarc/src/formats/7z/C/DllSecur.h
index e2a049ad..64ff26cd 100644..100755
--- a/multiarc/src/formats/7z/C/DllSecur.h
+++ b/multiarc/src/formats/7z/C/DllSecur.h
@@ -10,8 +10,8 @@ EXTERN_C_BEGIN
#ifdef _WIN32
-void My_SetDefaultDllDirectories();
-void LoadSecurityDlls();
+void My_SetDefaultDllDirectories(void);
+void LoadSecurityDlls(void);
#endif
diff --git a/multiarc/src/formats/7z/C/HuffEnc.c b/multiarc/src/formats/7z/C/HuffEnc.c
index a54b3d87..f3c2996d 100644..100755
--- a/multiarc/src/formats/7z/C/HuffEnc.c
+++ b/multiarc/src/formats/7z/C/HuffEnc.c
@@ -1,5 +1,5 @@
/* HuffEnc.c -- functions for Huffman encoding
-2017-04-03 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -8,7 +8,7 @@
#define kMaxLen 16
#define NUM_BITS 10
-#define MASK ((1 << NUM_BITS) - 1)
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
#define NUM_COUNTERS 64
diff --git a/multiarc/src/formats/7z/C/HuffEnc.h b/multiarc/src/formats/7z/C/HuffEnc.h
index 92b6878d..92b6878d 100644..100755
--- a/multiarc/src/formats/7z/C/HuffEnc.h
+++ b/multiarc/src/formats/7z/C/HuffEnc.h
diff --git a/multiarc/src/formats/7z/C/LzFind.c b/multiarc/src/formats/7z/C/LzFind.c
index df55e86c..1b73c284 100644..100755
--- a/multiarc/src/formats/7z/C/LzFind.c
+++ b/multiarc/src/formats/7z/C/LzFind.c
@@ -1,20 +1,69 @@
/* LzFind.c -- Match finder for LZ algorithms
-2018-07-08 : Igor Pavlov : Public domain */
+2021-11-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
+// #include <stdio.h>
+#include "CpuArch.h"
#include "LzFind.h"
#include "LzHash.h"
+#define kBlockMoveAlign (1 << 7) // alignment for memmove()
+#define kBlockSizeAlign (1 << 16) // alignment for block allocation
+#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
+
#define kEmptyHashValue 0
-#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
-#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
-#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
-#define kMaxHistorySize ((UInt32)7 << 29)
-#define kStartMaxLen 3
+#define kMaxValForNormalize ((UInt32)0)
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
+
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#define GET_AVAIL_BYTES(p) \
+ Inline_MatchFinder_GetNumAvailableBytes(p)
+
+
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+#define kFix5HashSize kFix4HashSize
+
+/*
+ HASH2_CALC:
+ if (hv) match, then cur[0] and cur[1] also match
+*/
+#define HASH2_CALC hv = GetUi16(cur);
+
+// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
+
+/*
+ HASH3_CALC:
+ if (cur[0]) and (h2) match, then cur[1] also match
+ if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
+*/
+#define HASH3_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
+
+#define HASH5_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
+ /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
+ hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
+
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
{
@@ -25,46 +74,57 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
}
}
-/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
-static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
{
- UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
- if (p->directInput)
- {
- p->blockSize = blockSize;
- return 1;
- }
+ if (blockSize == 0)
+ return 0;
if (!p->bufferBase || p->blockSize != blockSize)
{
+ // size_t blockSizeT;
LzInWindow_Free(p, alloc);
p->blockSize = blockSize;
- p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+ // blockSizeT = blockSize;
+
+ // printf("\nblockSize = 0x%x\n", blockSize);
+ /*
+ #if defined _WIN64
+ // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
+ // we use UInt32 type for (p->blockSize), because
+ // we don't want to wrap over 4 GiB,
+ // when we use (p->streamPos - p->pos) that is UInt32.
+ if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
+ {
+ blockSizeT = ((size_t)1 << 32);
+ printf("\nchanged to blockSizeT = 4GiB\n");
+ }
+ #endif
+ */
+
+ p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+ // printf("\nbufferBase = %p\n", p->bufferBase);
+ // return 0; // for debug
}
return (p->bufferBase != NULL);
}
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
-{
- p->posLimit -= subValue;
- p->pos -= subValue;
- p->streamPos -= subValue;
-}
+MY_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p)
{
if (p->streamEndWasReached || p->result != SZ_OK)
return;
- /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+ /* We use (p->streamPos - p->pos) value.
+ (p->streamPos < p->pos) is allowed. */
if (p->directInput)
{
- UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+ UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem;
p->directInputRem -= curSize;
@@ -76,10 +136,22 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;)
{
- Byte *dest = p->buffer + (p->streamPos - p->pos);
- size_t size = (p->bufferBase + p->blockSize - dest);
+ Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+ size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
if (size == 0)
+ {
+ /* we call ReadBlock() after NeedMove() and MoveBlock().
+ NeedMove() and MoveBlock() povide more than (keepSizeAfter)
+ to the end of (blockSize).
+ So we don't execute this branch in normal code flow.
+ We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
+ */
+ // p->result = SZ_ERROR_FAIL; // we can show error here
return;
+ }
+
+ // #define kRead 3
+ // if (size > kRead) size = kRead; // for debug
p->result = ISeqInStream_Read(p->stream, dest, &size);
if (p->result != SZ_OK)
@@ -90,41 +162,52 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
return;
}
p->streamPos += (UInt32)size;
- if (p->streamPos - p->pos > p->keepSizeAfter)
+ if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
return;
+ /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
+ (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
}
+
+ // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
}
+
+
+MY_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p)
{
+ const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
+ const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+ p->buffer = p->bufferBase + keepBefore;
memmove(p->bufferBase,
- p->buffer - p->keepSizeBefore,
- (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
- p->buffer = p->bufferBase + p->keepSizeBefore;
+ p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+ keepBefore + (size_t)GET_AVAIL_BYTES(p));
}
+/* We call MoveBlock() before ReadBlock().
+ So MoveBlock() can be wasteful operation, if the whole input data
+ can fit in current block even without calling MoveBlock().
+ in important case where (dataSize <= historySize)
+ condition (p->blockSize > dataSize + p->keepSizeAfter) is met
+ So there is no MoveBlock() in that case case.
+*/
+
int MatchFinder_NeedMove(CMatchFinder *p)
{
if (p->directInput)
return 0;
- /* if (p->streamEndWasReached) return 0; */
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return 0;
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
}
void MatchFinder_ReadIfRequired(CMatchFinder *p)
{
- if (p->streamEndWasReached)
- return;
- if (p->keepSizeAfter >= p->streamPos - p->pos)
+ if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
MatchFinder_ReadBlock(p);
}
-static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
-{
- if (MatchFinder_NeedMove(p))
- MatchFinder_MoveBlock(p);
- MatchFinder_ReadBlock(p);
-}
+
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
{
@@ -175,39 +258,74 @@ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
}
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc)
+#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
+ #error Stop_Compiling_Bad_Reserve
+#endif
+
+
+
+static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
{
- UInt32 sizeReserv;
-
+ UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
+ /*
if (historySize > kMaxHistorySize)
- {
- MatchFinder_Free(p, alloc);
return 0;
- }
+ */
+ // printf("\nhistorySize == 0x%x\n", historySize);
- sizeReserv = historySize >> 1;
- if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
- else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+ if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
+ return 0;
- sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
+ const UInt32 rem = kBlockSizeMax - blockSize;
+ const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
+ if (blockSize >= kBlockSizeMax
+ || rem < kBlockSizeReserveMin) // we reject settings that will be slow
+ return 0;
+ if (reserve >= rem)
+ blockSize = kBlockSizeMax;
+ else
+ {
+ blockSize += reserve;
+ blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
+ }
+ }
+ // printf("\n LzFind_blockSize = %x\n", blockSize);
+ // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
+ return blockSize;
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ /* we need one additional byte in (p->keepSizeBefore),
+ since we use MoveBlock() after (p->pos++) and before dictionary using */
+ // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
- p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
-
- /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
-
- if (LzInWindow_Create(p, sizeReserv, alloc))
+
+ keepAddBufferAfter += matchMaxLen;
+ /* we need (p->keepSizeAfter >= p->numHashBytes) */
+ if (keepAddBufferAfter < p->numHashBytes)
+ keepAddBufferAfter = p->numHashBytes;
+ // keepAddBufferAfter -= 2; // for debug
+ p->keepSizeAfter = keepAddBufferAfter;
+
+ if (p->directInput)
+ p->blockSize = 0;
+ if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
{
- UInt32 newCyclicBufferSize = historySize + 1;
+ const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{
+ // UInt32 hs4;
p->fixedHashSize = 0;
- if (p->numHashBytes == 2)
- hs = (1 << 16) - 1;
- else
+ hs = (1 << 16) - 1;
+ if (p->numHashBytes != 2)
{
hs = historySize;
if (hs > p->expectedDataSize)
@@ -218,9 +336,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
+ // we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
- hs |= 0xFFFF; /* don't change it! It's required for Deflate */
- if (hs > (1 << 24))
+ if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
@@ -228,12 +346,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
+
+ // hs = ((UInt32)1 << 25) - 1; // for test
+
+ // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+ hs |= (1 << 16) - 1; /* don't change it! */
+
+ // bt5: we adjust the size with recommended minimum size
+ if (p->numHashBytes >= 5)
+ hs |= (256 << kLzHash_CrcShift_2) - 1;
}
p->hashMask = hs;
hs++;
+
+ /*
+ hs4 = (1 << 20);
+ if (hs4 > hs)
+ hs4 = hs;
+ // hs4 = (1 << 16); // for test
+ p->hash4Mask = hs4 - 1;
+ */
+
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
- if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+ // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hs += p->fixedHashSize;
}
@@ -242,13 +378,17 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
size_t numSons;
p->historySize = historySize;
p->hashSizeSum = hs;
- p->cyclicBufferSize = newCyclicBufferSize;
+ p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize;
if (p->btMode)
numSons <<= 1;
newSize = hs + numSons;
+ // aligned size is not required here, but it can be better for some loops
+ #define NUM_REFS_ALIGN_MASK 0xF
+ newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+
if (p->hash && p->numRefs == newSize)
return 1;
@@ -268,33 +408,43 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
return 0;
}
+
static void MatchFinder_SetLimits(CMatchFinder *p)
{
- UInt32 limit = kMaxValForNormalize - p->pos;
- UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
-
- if (limit2 < limit)
- limit = limit2;
- limit2 = p->streamPos - p->pos;
+ UInt32 k;
+ UInt32 n = kMaxValForNormalize - p->pos;
+ if (n == 0)
+ n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
- if (limit2 <= p->keepSizeAfter)
+ k = p->cyclicBufferSize - p->cyclicBufferPos;
+ if (k < n)
+ n = k;
+
+ k = GET_AVAIL_BYTES(p);
{
- if (limit2 > 0)
- limit2 = 1;
+ const UInt32 ksa = p->keepSizeAfter;
+ UInt32 mm = p->matchMaxLen;
+ if (k > ksa)
+ k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
+ else if (k >= mm)
+ {
+ // the limitation for (p->lenLimit) update
+ k -= mm; // optimization : to reduce the number of checks
+ k++;
+ // k = 1; // non-optimized version : for debug
+ }
+ else
+ {
+ mm = k;
+ if (k != 0)
+ k = 1;
+ }
+ p->lenLimit = mm;
}
- else
- limit2 -= p->keepSizeAfter;
-
- if (limit2 < limit)
- limit = limit2;
+ if (k < n)
+ n = k;
- {
- UInt32 lenLimit = p->streamPos - p->pos;
- if (lenLimit > p->matchMaxLen)
- lenLimit = p->matchMaxLen;
- p->lenLimit = lenLimit;
- }
- p->posLimit = p->pos + limit;
+ p->posLimit = p->pos + n;
}
@@ -302,7 +452,7 @@ void MatchFinder_Init_LowHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash;
- size_t numItems = p->fixedHashSize;
+ const size_t numItems = p->fixedHashSize;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
@@ -312,72 +462,322 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash + p->fixedHashSize;
- size_t numItems = (size_t)p->hashMask + 1;
+ const size_t numItems = (size_t)p->hashMask + 1;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
-void MatchFinder_Init_3(CMatchFinder *p, int readData)
+void MatchFinder_Init_4(CMatchFinder *p)
{
- p->cyclicBufferPos = 0;
p->buffer = p->bufferBase;
- p->pos =
- p->streamPos = p->cyclicBufferSize;
+ {
+ /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+ the code in CMatchFinderMt expects (pos = 1) */
+ p->pos =
+ p->streamPos =
+ 1; // it's smallest optimal value. do not change it
+ // 0; // for debug
+ }
p->result = SZ_OK;
p->streamEndWasReached = 0;
-
- if (readData)
- MatchFinder_ReadBlock(p);
-
- MatchFinder_SetLimits(p);
}
+// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
void MatchFinder_Init(CMatchFinder *p)
{
MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p);
- MatchFinder_Init_3(p, True);
+ MatchFinder_Init_4(p);
+ // if (readData)
+ MatchFinder_ReadBlock(p);
+
+ /* if we init (cyclicBufferPos = pos), then we can use one variable
+ instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
+ // p->cyclicBufferPos = 0; // smallest value
+ // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
+ MatchFinder_SetLimits(p);
}
-
-static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8) \
+ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+ #define USE_SATUR_SUB_128
+ #define USE_AVX2
+ #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+ #define ATTRIB_AVX2 __attribute__((__target__("avx2")))
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1600)
+ #define USE_SATUR_SUB_128
+ #if (_MSC_VER >= 1900)
+ #define USE_AVX2
+ #include <immintrin.h> // avx
+ #endif
+ #endif
+ #endif
+
+// #elif defined(MY_CPU_ARM_OR_ARM64)
+#elif defined(MY_CPU_ARM64)
+
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8)
+ #define USE_SATUR_SUB_128
+ #ifdef MY_CPU_ARM64
+ // #define ATTRIB_SSE41 __attribute__((__target__("")))
+ #else
+ // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1910)
+ #define USE_SATUR_SUB_128
+ #endif
+ #endif
+
+ #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
+ #include <arm_neon.h>
+ #endif
+
+#endif
+
+/*
+#ifndef ATTRIB_SSE41
+ #define ATTRIB_SSE41
+#endif
+#ifndef ATTRIB_AVX2
+ #define ATTRIB_AVX2
+#endif
+*/
+
+#ifdef USE_SATUR_SUB_128
+
+// #define _SHOW_HW_STATUS
+
+#ifdef _SHOW_HW_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+_PRF(;)
+#else
+#define _PRF(x)
+#endif
+
+#ifdef MY_CPU_ARM_OR_ARM64
+
+#ifdef MY_CPU_ARM64
+// #define FORCE_SATUR_SUB_128
+#endif
+
+typedef uint32x4_t v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
+
+#else
+
+#include <smmintrin.h> // sse4.1
+
+typedef __m128i v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
+
+#endif
+
+
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_SSE41
+ATTRIB_SSE41
+#endif
+void
+MY_FAST_CALL
+LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- return (p->pos - p->historySize - 1) & kNormalizeMask;
+ v128 sub2 =
+ #ifdef MY_CPU_ARM_OR_ARM64
+ vdupq_n_u32(subValue);
+ #else
+ _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ #endif
+ do
+ {
+ SASUB_128(0)
+ SASUB_128(1)
+ SASUB_128(2)
+ SASUB_128(3)
+ items += 4 * 4;
+ }
+ while (items != lim);
}
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+
+
+#ifdef USE_AVX2
+
+#include <immintrin.h> // avx
+
+#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_AVX2
+ATTRIB_AVX2
+#endif
+void
+MY_FAST_CALL
+LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- size_t i;
- for (i = 0; i < numItems; i++)
+ __m256i sub2 = _mm256_set_epi32(
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ do
{
- UInt32 value = items[i];
- if (value <= subValue)
- value = kEmptyHashValue;
- else
- value -= subValue;
- items[i] = value;
+ SASUB_256(0)
+ SASUB_256(1)
+ items += 2 * 8;
+ }
+ while (items != lim);
+}
+#endif // USE_AVX2
+
+#ifndef FORCE_SATUR_SUB_128
+typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+ UInt32 subValue, CLzRef *items, const CLzRef *lim);
+static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+#endif // FORCE_SATUR_SUB_128
+
+#endif // USE_SATUR_SUB_128
+
+
+// kEmptyHashValue must be zero
+// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m;
+#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue;
+
+#ifdef FORCE_SATUR_SUB_128
+
+#define DEFAULT_SaturSub LzFind_SaturSub_128
+
+#else
+
+#define DEFAULT_SaturSub LzFind_SaturSub_32
+
+MY_NO_INLINE
+static
+void
+MY_FAST_CALL
+LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ do
+ {
+ UInt32 v;
+ SASUB_32(0)
+ SASUB_32(1)
+ SASUB_32(2)
+ SASUB_32(3)
+ SASUB_32(4)
+ SASUB_32(5)
+ SASUB_32(6)
+ SASUB_32(7)
+ items += 8;
}
+ while (items != lim);
}
-static void MatchFinder_Normalize(CMatchFinder *p)
+#endif
+
+
+MY_NO_INLINE
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{
- UInt32 subValue = MatchFinder_GetSubValue(p);
- MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
- MatchFinder_ReduceOffsets(p, subValue);
+ #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
+
+ CLzRef *lim;
+
+ for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
+
+ {
+ #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
+ lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
+ numItems &= K_NORM_ALIGN_MASK;
+ if (items != lim)
+ {
+ #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
+ if (g_LzFind_SaturSub)
+ g_LzFind_SaturSub(subValue, items, lim);
+ else
+ #endif
+ DEFAULT_SaturSub(subValue, items, lim);
+ }
+ items = lim;
+ }
+
+
+ for (; numItems != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
}
+
+// call MatchFinder_CheckLimits() only after (p->pos++) update
+
MY_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p)
{
+ if (// !p->streamEndWasReached && p->result == SZ_OK &&
+ p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ {
+ // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+ }
+
if (p->pos == kMaxValForNormalize)
- MatchFinder_Normalize(p);
- if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
- MatchFinder_CheckAndMoveAndRead(p);
+ if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
+ /*
+ if we disable normalization for last bytes of data, and
+ if (data_size == 4 GiB), we don't call wastfull normalization,
+ but (pos) will be wrapped over Zero (0) in that case.
+ And we cannot resume later to normal operation
+ */
+ {
+ // MatchFinder_Normalize(p);
+ /* after normalization we need (p->pos >= p->historySize + 1); */
+ /* we can reduce subValue to aligned value, if want to keep alignment
+ of (p->pos) and (p->buffer) for speculated accesses. */
+ const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+ // const UInt32 subValue = (1 << 15); // for debug
+ // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+ size_t numSonRefs = p->cyclicBufferSize;
+ if (p->btMode)
+ numSonRefs <<= 1;
+ Inline_MatchFinder_ReduceOffsets(p, subValue);
+ MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
+ }
+
if (p->cyclicBufferPos == p->cyclicBufferSize)
p->cyclicBufferPos = 0;
+
MatchFinder_SetLimits(p);
}
@@ -386,9 +786,9 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
(lenLimit > maxLen)
*/
MY_FORCE_INLINE
-static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, unsigned maxLen)
+static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, unsigned maxLen)
{
/*
son[_cyclicBufferPos] = curMatch;
@@ -396,7 +796,7 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
{
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- return distances;
+ return d;
{
const Byte *pb = cur - delta;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
@@ -409,10 +809,10 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
if (maxLen < len)
{
maxLen = len;
- *distances++ = len;
- *distances++ = delta - 1;
+ *d++ = len;
+ *d++ = delta - 1;
if (len == lenLimit)
- return distances;
+ return d;
}
}
}
@@ -421,35 +821,41 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
const Byte *lim = cur + lenLimit;
son[_cyclicBufferPos] = curMatch;
+
do
{
- UInt32 delta = pos - curMatch;
+ UInt32 delta;
+
+ if (curMatch == 0)
+ break;
+ // if (curMatch2 >= curMatch) return NULL;
+ delta = pos - curMatch;
if (delta >= _cyclicBufferSize)
break;
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
- diff = (ptrdiff_t)0 - delta;
- if (cur[maxLen] == cur[maxLen + diff])
+ diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
{
const Byte *c = cur;
while (*c == c[diff])
{
if (++c == lim)
{
- distances[0] = (UInt32)(lim - cur);
- distances[1] = delta - 1;
- return distances + 2;
+ d[0] = (UInt32)(lim - cur);
+ d[1] = delta - 1;
+ return d + 2;
}
}
{
- unsigned len = (unsigned)(c - cur);
+ const unsigned len = (unsigned)(c - cur);
if (maxLen < len)
{
maxLen = len;
- distances[0] = (UInt32)len;
- distances[1] = delta - 1;
- distances += 2;
+ d[0] = (UInt32)len;
+ d[1] = delta - 1;
+ d += 2;
}
}
}
@@ -457,31 +863,36 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
}
while (--cutValue);
- return distances;
+ return d;
}
MY_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, UInt32 maxLen)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, UInt32 maxLen)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return distances;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = pair[0];
+ const UInt32 pair0 = pair[0];
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@@ -491,48 +902,60 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
if (maxLen < len)
{
maxLen = (UInt32)len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair0;
*ptr0 = pair[1];
- return distances;
+ return d;
}
}
}
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ // const UInt32 curMatch2 = pair[1];
+ // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+ // curMatch = curMatch2;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return d;
}
+
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (// curMatch >= pos || // failure
+ cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
@@ -554,80 +977,108 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
}
+
#define MOVE_POS \
++p->cyclicBufferPos; \
p->buffer++; \
- if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+ { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
-#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+#define MOVE_POS_RET MOVE_POS return distances;
-static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+MY_NO_INLINE
+static void MatchFinder_MovePos(CMatchFinder *p)
+{
+ /* we go here at the end of stream data, when (avail < num_hash_bytes)
+ We don't update sons[cyclicBufferPos << btMode].
+ So (sons) record will contain junk. And we cannot resume match searching
+ to normal operation, even if we will provide more input data in buffer.
+ p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
+ if (p->btMode)
+ p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
+ */
+ MOVE_POS;
+}
#define GET_MATCHES_HEADER2(minLen, ret_op) \
- unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer;
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
-#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
-#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+ distances = func(MF_PARAMS(p), \
+ distances, (UInt32)_maxLen_); MOVE_POS_RET;
+
+#define GET_MATCHES_FOOTER_BT(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+
+#define GET_MATCHES_FOOTER_HC(_maxLen_) \
+ GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
-#define GET_MATCHES_FOOTER(offset, maxLen) \
- offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
- distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
-#define SKIP_FOOTER \
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
#define UPDATE_maxLen { \
- ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); }
-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 1)
+ GET_MATCHES_FOOTER_BT(1)
}
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
- GET_MATCHES_FOOTER(offset, 2)
+ GET_MATCHES_FOOTER_BT(2)
}
-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+#define SET_mmm \
+ mmm = p->cyclicBufferSize; \
+ if (pos < mmm) \
+ mmm = pos;
+
+
+static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, d2, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(3)
@@ -643,29 +1094,32 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash[h2] = pos;
(hash + kFix3HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 2;
- offset = 0;
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
UPDATE_maxLen
distances[0] = (UInt32)maxLen;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
}
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -676,53 +1130,63 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
-
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
-
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
+ SET_mmm
+
+ maxLen = 3;
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ for (;;)
{
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
- if (offset != 0)
- {
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
- MOVE_POS_RET;
+ SkipMatchesSpec(MF_PARAMS(p));
+ MOVE_POS_RET
}
+ break;
}
- if (maxLen < 3)
- maxLen = 3;
-
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-/*
-static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -733,73 +1197,69 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ maxLen = 4;
+
+ for (;;)
{
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
+ break;
}
-
- if (maxLen < 4)
- maxLen = 4;
- GET_MATCHES_FOOTER(offset, maxLen)
+ GET_MATCHES_FOOTER_BT(maxLen)
}
-*/
-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -816,48 +1276,57 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
- {
- maxLen = 2;
- distances[0] = 2;
- distances[1] = d2 - 1;
- offset = 2;
- }
-
- if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- maxLen = 3;
- distances[(size_t)offset + 1] = d3 - 1;
- offset += 2;
- d2 = d3;
- }
-
- if (offset != 0)
+ maxLen = 3;
+
+ for (;;)
{
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ // distances[-2] = 3;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ d2 = d3;
+ distances[1] = d3 - 1;
+ distances += 2;
+ }
+ else
+ break;
+
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
+ break;
}
- if (maxLen < 3)
- maxLen = 3;
-
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(maxLen);
}
-/*
-static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -865,242 +1334,237 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash = p->hash;
pos = p->pos;
-
+
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
- d4 = pos - (hash + kFix4HashSize)[h4];
+ // d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
- (hash + kFix4HashSize)[h4] = pos;
+ // (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
- maxLen = 0;
- offset = 0;
+ SET_mmm
+
+ maxLen = 4;
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ for (;;)
{
- distances[0] = maxLen = 2;
- distances[1] = d2 - 1;
- offset = 2;
- if (*(cur - d2 + 2) == cur[2])
- distances[0] = maxLen = 3;
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
+ {
+ distances[0] = 2;
+ distances[1] = d2 - 1;
+ distances += 2;
+ if (*(cur - d2 + 2) == cur[2])
+ {
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
+ {
+ distances[1] = d3 - 1;
+ distances += 2;
+ d2 = d3;
+ }
+ else
+ break;
+ }
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[2] = maxLen = 3;
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
- }
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
- {
- distances[0] = maxLen = 3;
- distances[1] = d3 - 1;
- offset = 2;
- d2 = d3;
- }
-
- if (d2 != d4 && d4 < p->cyclicBufferSize
- && *(cur - d4) == *cur
- && *(cur - d4 + 3) == *(cur + 3))
- {
- maxLen = 4;
- distances[(size_t)offset + 1] = d4 - 1;
- offset += 2;
- d2 = d4;
- }
-
- if (offset != 0)
- {
+ else
+ break;
+
+ distances[-2] = 3;
+ if (*(cur - d2 + 3) != cur[3])
+ break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
+ break;
}
- if (maxLen < 4)
- maxLen = 4;
-
- offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances + offset, maxLen) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(maxLen);
}
-*/
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
- distances, 2) - (distances));
- MOVE_POS_RET
+ GET_MATCHES_FOOTER_HC(2)
}
+
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(2)
{
- SKIP_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
- SKIP_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
UInt32 h2;
UInt32 *hash;
- SKIP_HEADER(3)
HASH3_CALC;
hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv];
hash[h2] =
(hash + kFix3HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(4)
{
UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
-/*
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(5)
{
- UInt32 h2, h3, h4;
+ UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(5)
HASH5_CALC;
hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
+ // (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
-*/
+
+
+#define HC_SKIP_HEADER(minLen) \
+ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+ Byte *cur; \
+ UInt32 *hash; \
+ UInt32 *son; \
+ UInt32 pos = p->pos; \
+ UInt32 num2 = num; \
+ /* (p->pos == p->posLimit) is not allowed here !!! */ \
+ { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+ num -= num2; \
+ { const UInt32 cycPos = p->cyclicBufferPos; \
+ son = p->son + cycPos; \
+ p->cyclicBufferPos = cycPos + num2; } \
+ cur = p->buffer; \
+ hash = p->hash; \
+ do { \
+ UInt32 curMatch; \
+ UInt32 hv;
+
+
+#define HC_SKIP_FOOTER \
+ cur++; pos++; *son++ = curMatch; \
+ } while (--num2); \
+ p->buffer = cur; \
+ p->pos = pos; \
+ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
+ }} while(num); \
+
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
+ HC_SKIP_HEADER(4)
+
UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
- hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ (hash + kFix4HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-/*
+
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
- UInt32 h2, h3, h4;
- UInt32 *hash;
- SKIP_HEADER(5)
- HASH5_CALC;
- hash = p->hash;
- curMatch = hash + kFix5HashSize)[hv];
+ HC_SKIP_HEADER(5)
+
+ UInt32 h2, h3;
+ HASH5_CALC
+ curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ // (hash + kFix4HashSize)[h4] =
+ (hash + kFix5HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-*/
+
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
- SKIP_HEADER(3)
+ HC_SKIP_HEADER(3)
+
HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ curMatch = hash[hv];
+ hash[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
if (!p->btMode)
{
- /* if (p->numHashBytes <= 4) */
+ if (p->numHashBytes <= 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
}
- /*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
}
- */
}
else if (p->numHashBytes == 2)
{
@@ -1112,16 +1576,53 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
}
- else /* if (p->numHashBytes == 4) */
+ else if (p->numHashBytes == 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
}
- /*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
}
- */
+}
+
+
+
+void LzFindPrepare()
+{
+ #ifndef FORCE_SATUR_SUB_128
+ #ifdef USE_SATUR_SUB_128
+ LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+ #ifdef MY_CPU_ARM_OR_ARM64
+ {
+ if (CPU_IsSupported_NEON())
+ {
+ // #pragma message ("=== LzFind NEON")
+ _PRF(printf("\n=== LzFind NEON\n"));
+ f = LzFind_SaturSub_128;
+ }
+ // f = 0; // for debug
+ }
+ #else // MY_CPU_ARM_OR_ARM64
+ if (CPU_IsSupported_SSE41())
+ {
+ // #pragma message ("=== LzFind SSE41")
+ _PRF(printf("\n=== LzFind SSE41\n"));
+ f = LzFind_SaturSub_128;
+
+ #ifdef USE_AVX2
+ if (CPU_IsSupported_AVX2())
+ {
+ // #pragma message ("=== LzFind AVX2")
+ _PRF(printf("\n=== LzFind AVX2\n"));
+ f = LzFind_SaturSub_256;
+ }
+ #endif
+ }
+ #endif // MY_CPU_ARM_OR_ARM64
+ g_LzFind_SaturSub = f;
+ #endif // USE_SATUR_SUB_128
+ #endif // FORCE_SATUR_SUB_128
}
diff --git a/multiarc/src/formats/7z/C/LzFind.h b/multiarc/src/formats/7z/C/LzFind.h
index 42c13be1..eea873ff 100644..100755
--- a/multiarc/src/formats/7z/C/LzFind.h
+++ b/multiarc/src/formats/7z/C/LzFind.h
@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
-2017-06-10 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@@ -15,7 +15,7 @@ typedef struct _CMatchFinder
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
- UInt32 streamPos;
+ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
@@ -51,17 +51,19 @@ typedef struct _CMatchFinder
UInt64 expectedDataSize;
} CMatchFinder;
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
+/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
+*/
int MatchFinder_NeedMove(CMatchFinder *p);
-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
@@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+/*
+#define Inline_MatchFinder_InitPos(p, val) \
+ (p)->pos = (val); \
+ (p)->streamPos = (val);
+*/
+
+#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
+ (p)->pos -= (subValue); \
+ (p)->streamPos -= (subValue);
+
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@@ -91,7 +104,7 @@ Conditions:
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
-typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
@@ -101,21 +114,23 @@ typedef struct _IMatchFinder
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
-} IMatchFinder;
+} IMatchFinder2;
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
-void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void LzFindPrepare(void);
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/LzFindMt.c b/multiarc/src/formats/7z/C/LzFindMt.c
index bb0f42c3..4e67fc3f 100644..100755
--- a/multiarc/src/formats/7z/C/LzFindMt.c
+++ b/multiarc/src/formats/7z/C/LzFindMt.c
@@ -1,97 +1,215 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
-2018-12-29 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#include "LzHash.h"
+// #include <stdio.h>
+
+#include "CpuArch.h"
+#include "LzHash.h"
#include "LzFindMt.h"
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+extern UInt64 g_NumIters_Tree;
+extern UInt64 g_NumIters_Loop;
+extern UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+#define kMtHashBlockSize ((UInt32)1 << 17)
+#define kMtHashNumBlocks (1 << 1)
+
+#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
+
+#define kMtBtBlockSize ((UInt32)1 << 16)
+#define kMtBtNumBlocks (1 << 4)
+
+#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
+
+/*
+ HASH functions:
+ We use raw 8/16 bits from a[1] and a[2],
+ xored with crc(a[0]) and crc(a[3]).
+ We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
+ our crc() function provides one-to-one correspondence for low 8-bit values:
+ (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
+*/
+
+#define MF(mt) ((mt)->MatchFinder)
+#define MF_CRC (p->crc)
+
+// #define MF(mt) (&(mt)->MatchFinder)
+// #define MF_CRC (p->MatchFinder.crc)
+
+#define MT_HASH2_CALC \
+ h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+ UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+/*
+#define MT_HASH3_CALC__NO_2 { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define __MT_HASH4_CALC { \
+ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ h2 = temp & (kHash2Size - 1); \
+ temp ^= ((UInt32)cur[2] << 8); \
+ h3 = temp & (kHash3Size - 1); \
+ h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
+ // (kHash4Size - 1);
+*/
+
+
+MY_NO_INLINE
static void MtSync_Construct(CMtSync *p)
{
+ p->affinity = 0;
p->wasCreated = False;
p->csWasInitialized = False;
p->csWasEntered = False;
Thread_Construct(&p->thread);
Event_Construct(&p->canStart);
- Event_Construct(&p->wasStarted);
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
}
-static void MtSync_GetNextBlock(CMtSync *p)
+
+#define DEBUG_BUFFER_LOCK // define it to debug lock state
+
+#ifdef DEBUG_BUFFER_LOCK
+#include <stdlib.h>
+#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
+#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
+#else
+#define BUFFER_MUST_BE_LOCKED(p)
+#define BUFFER_MUST_BE_UNLOCKED(p)
+#endif
+
+#define LOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_UNLOCKED(p); \
+ CriticalSection_Enter(&(p)->cs); \
+ (p)->csWasEntered = True; }
+
+#define UNLOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_LOCKED(p); \
+ CriticalSection_Leave(&(p)->cs); \
+ (p)->csWasEntered = False; }
+
+
+MY_NO_INLINE
+static UInt32 MtSync_GetNextBlock(CMtSync *p)
{
+ UInt32 numBlocks = 0;
if (p->needStart)
{
+ BUFFER_MUST_BE_UNLOCKED(p)
p->numProcessedBlocks = 1;
p->needStart = False;
p->stopWriting = False;
p->exit = False;
- Event_Reset(&p->wasStarted);
Event_Reset(&p->wasStopped);
-
Event_Set(&p->canStart);
- Event_Wait(&p->wasStarted);
-
- // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
}
else
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
- p->numProcessedBlocks++;
+ UNLOCK_BUFFER(p)
+ // we free current block
+ numBlocks = p->numProcessedBlocks++;
Semaphore_Release1(&p->freeSemaphore);
}
+
+ // buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore);
- CriticalSection_Enter(&p->cs);
- p->csWasEntered = True;
+ LOCK_BUFFER(p);
+ return numBlocks;
}
-/* MtSync_StopWriting must be called if Writing was started */
+/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
+
+MY_NO_INLINE
static void MtSync_StopWriting(CMtSync *p)
{
- UInt32 myNumBlocks = p->numProcessedBlocks;
if (!Thread_WasCreated(&p->thread) || p->needStart)
return;
- p->stopWriting = True;
+
+ PRF(printf("\nMtSync_StopWriting %p\n", p));
+
if (p->csWasEntered)
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
+ /* we don't use buffer in this thread after StopWriting().
+ So we UNLOCK buffer.
+ And we restore default UNLOCKED state for stopped thread */
+ UNLOCK_BUFFER(p)
}
- Semaphore_Release1(&p->freeSemaphore);
-
+
+ /* We send (p->stopWriting) message and release freeSemaphore
+ to free current block.
+ So the thread will see (p->stopWriting) at some
+ iteration after Wait(freeSemaphore).
+ The thread doesn't need to fill all avail free blocks,
+ so we can get fast thread stop.
+ */
+
+ p->stopWriting = True;
+ Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
+
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
Event_Wait(&p->wasStopped);
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
+
+ /* 21.03 : we don't restore samaphore counters here.
+ We will recreate and reinit samaphores in next start */
- while (myNumBlocks++ != p->numProcessedBlocks)
- {
- Semaphore_Wait(&p->filledSemaphore);
- Semaphore_Release1(&p->freeSemaphore);
- }
p->needStart = True;
}
+
+MY_NO_INLINE
static void MtSync_Destruct(CMtSync *p)
{
+ PRF(printf("\nMtSync_Destruct %p\n", p));
+
if (Thread_WasCreated(&p->thread))
{
+ /* we want thread to be in Stopped state before sending EXIT command.
+ note: stop(btSync) will stop (htSync) also */
MtSync_StopWriting(p);
+ /* thread in Stopped state here : (p->needStart == true) */
p->exit = True;
- if (p->needStart)
- Event_Set(&p->canStart);
- Thread_Wait(&p->thread);
- Thread_Close(&p->thread);
+ // if (p->needStart) // it's (true)
+ Event_Set(&p->canStart); // we send EXIT command to thread
+ Thread_Wait_Close(&p->thread); // we wait thread finishing
}
+
if (p->csWasInitialized)
{
CriticalSection_Delete(&p->cs);
p->csWasInitialized = False;
}
+ p->csWasEntered = False;
Event_Close(&p->canStart);
- Event_Close(&p->wasStarted);
Event_Close(&p->wasStopped);
Semaphore_Close(&p->freeSemaphore);
Semaphore_Close(&p->filledSemaphore);
@@ -99,80 +217,251 @@ static void MtSync_Destruct(CMtSync *p)
p->wasCreated = False;
}
-#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
-static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
+// we want to get real system error codes here instead of SZ_ERROR_THREAD
+#define RINOK_THREAD(x) RINOK(x)
+
+
+// call it before each new file (when new starting is required):
+MY_NO_INLINE
+static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
+{
+ WRes wres;
+ // BUFFER_MUST_BE_UNLOCKED(p)
+ if (!p->needStart || p->csWasEntered)
+ return SZ_ERROR_FAIL;
+ wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
+ if (wres == 0)
+ wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
+}
+
+
+static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
+ WRes wres;
+
if (p->wasCreated)
return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs));
p->csWasInitialized = True;
+ p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
- RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
-
- RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
- RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
p->needStart = True;
-
- RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
+ p->exit = True; /* p->exit is unused before (canStart) Event.
+ But in case of some unexpected code failure we will get fast exit from thread */
+
+ // return ERROR_TOO_MANY_POSTS; // for debug
+ // return EINVAL; // for debug
+
+ if (p->affinity != 0)
+ wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
+ else
+ wres = Thread_Create(&p->thread, startAddress, obj);
+
+ RINOK_THREAD(wres);
p->wasCreated = True;
return SZ_OK;
}
-static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+
+MY_NO_INLINE
+static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
- SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
- if (res != SZ_OK)
- MtSync_Destruct(p);
- return res;
+ const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
+ if (wres == 0)
+ return 0;
+ MtSync_Destruct(p);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
}
-void MtSync_Init(CMtSync *p) { p->needStart = True; }
+
+// ---------- HASH THREAD ----------
#define kMtMaxValForNormalize 0xFFFFFFFF
+// #define kMtMaxValForNormalize ((1 << 21)) // for debug
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
-#define DEF_GetHeads2(name, v, action) \
- static void GetHeads ## name(const Byte *p, UInt32 pos, \
- UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
- { action; for (; numHeads != 0; numHeads--) { \
- const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } }
+#ifdef MY_CPU_LE_UNALIGN
+ #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
+#else
+ #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
+#endif
+
+#define GetHeads_DECL(name) \
+ static void GetHeads ## name(const Byte *p, UInt32 pos, \
+ UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
+
+#define GetHeads_LOOP(v) \
+ for (; numHeads != 0; numHeads--) { \
+ const UInt32 value = (v); \
+ p++; \
+ *heads++ = pos - hash[value]; \
+ hash[value] = pos++; }
+#define DEF_GetHeads2(name, v, action) \
+ GetHeads_DECL(name) { action \
+ GetHeads_LOOP(v) }
+
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
-DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
-DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
-DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
-DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
-/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
+DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
+DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
+// BT3 is not good for crc collisions for big hashMask values.
+
+/*
+GetHeads_DECL(3b)
+{
+ UNUSED_VAR(hashMask);
+ UNUSED_VAR(crc);
+ {
+ const Byte *pLim = p + numHeads;
+ if (numHeads == 0)
+ return;
+ pLim--;
+ while (p < pLim)
+ {
+ UInt32 v1 = GetUi32(p);
+ UInt32 v0 = v1 & 0xFFFFFF;
+ UInt32 h0, h1;
+ p += 2;
+ v1 >>= 8;
+ h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
+ h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
+ heads += 2;
+ }
+ if (p == pLim)
+ {
+ UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
+ *heads = pos - hash[v0];
+ hash[v0] = pos;
+ }
+ }
+}
+*/
+
+/*
+GetHeads_DECL(4)
+{
+ unsigned sh = 0;
+ UNUSED_VAR(crc)
+ while ((hashMask & 0x80000000) == 0)
+ {
+ hashMask <<= 1;
+ sh++;
+ }
+ GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
+}
+#define GetHeads4b GetHeads4
+*/
+
+#define USE_GetHeads_LOCAL_CRC
+
+#ifdef USE_GetHeads_LOCAL_CRC
+
+GetHeads_DECL(4)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ // crc1[i] = rotlFixed(v, 8) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(4b)
+{
+ UInt32 crc0[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ crc0[i] = crc[i] & hashMask;
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
+}
+
+GetHeads_DECL(5)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ UInt32 crc2[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
+}
+
+GetHeads_DECL(5b)
+{
+ UInt32 crc0[256];
+ UInt32 crc1[256];
+ {
+ unsigned i;
+ for (i = 0; i < 256; i++)
+ {
+ UInt32 v = crc[i];
+ crc0[i] = v & hashMask;
+ crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
+ }
+ }
+ GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
+}
+
+#else
+
+DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
+DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
+DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
+DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
+
+#endif
+
static void HashThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->hashSync;
+ PRF(printf("\nHashThreadFunc\n"));
+
for (;;)
{
- UInt32 numProcessedBlocks = 0;
+ UInt32 blockIndex = 0;
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
+ if (p->exit)
+ {
+ PRF(printf("\nHashThreadFunc : exit \n"));
+ return;
+ }
- MatchFinder_Init_HighHash(mt->MatchFinder);
+ MatchFinder_Init_HighHash(MF(mt));
for (;;)
{
- if (p->exit)
- return;
- if (p->stopWriting)
- {
- p->numProcessedBlocks = numProcessedBlocks;
- Event_Set(&p->wasStopped);
- break;
- }
+ PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
{
- CMatchFinder *mf = mt->MatchFinder;
+ CMatchFinder *mf = MF(mt);
if (MatchFinder_NeedMove(mf))
{
CriticalSection_Enter(&mt->btSync.cs);
@@ -185,194 +474,178 @@ static void HashThreadFunc(CMatchFinderMt *mt)
mt->pointerToCurPos -= offset;
mt->buffer -= offset;
}
- CriticalSection_Leave(&mt->btSync.cs);
CriticalSection_Leave(&mt->hashSync.cs);
+ CriticalSection_Leave(&mt->btSync.cs);
continue;
}
Semaphore_Wait(&p->freeSemaphore);
+ if (p->exit) // exit is unexpected here. But we check it here for some failure case
+ return;
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
+ if (p->stopWriting)
+ break;
+
MatchFinder_ReadIfRequired(mf);
- if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
- {
- UInt32 subValue = (mf->pos - mf->historySize - 1);
- MatchFinder_ReduceOffsets(mf, subValue);
- MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
- }
{
- UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- UInt32 num = mf->streamPos - mf->pos;
+ UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
+ UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
heads[0] = 2;
heads[1] = num;
+
+ /* heads[1] contains the number of avail bytes:
+ if (avail < mf->numHashBytes) :
+ {
+ it means that stream was finished
+ HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
+ HASH_THREAD doesn't stop,
+ HASH_THREAD fills only the header (2 numbers) for all next blocks:
+ {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
+ }
+ else
+ {
+ HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
+ }
+ */
+
if (num >= mf->numHashBytes)
{
num = num - mf->numHashBytes + 1;
if (num > kMtHashBlockSize - 2)
num = kMtHashBlockSize - 2;
- mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
+
+ if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
+ {
+ const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ Inline_MatchFinder_ReduceOffsets(mf, subValue);
+ MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
+ }
+
heads[0] = 2 + num;
+ mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
}
- mf->pos += num;
+
+ mf->pos += num; // wrap over zero is allowed at the end of stream
mf->buffer += num;
}
}
Semaphore_Release1(&p->filledSemaphore);
- }
- }
-}
+ } // for() processing end
-static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
-{
- MtSync_GetNextBlock(&p->hashSync);
- p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
- p->hashNumAvail = p->hashBuf[p->hashBufPos++];
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
+ } // for() thread end
}
-#define kEmptyHashValue 0
+
+
+
+// ---------- BT THREAD ----------
+
+/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
+ here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
/*
- we use size_t for _cyclicBufferPos instead of UInt32
+ we use size_t for (pos) instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
-MY_NO_INLINE
-static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
- UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
-{
- do
- {
- UInt32 *_distances = ++distances;
- UInt32 delta = *hash++;
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- UInt32 cutValue = _cutValue;
- unsigned maxLen = (unsigned)_maxLen;
-
- /*
- if (size > 1)
- {
- UInt32 delta = *hash;
- if (delta < _cyclicBufferSize)
- {
- UInt32 cyc1 = _cyclicBufferPos + 1;
- CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
- Byte b = *(cur + 1 - delta);
- _distances[0] = pair[0];
- _distances[1] = b;
- }
- }
- */
- if (cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- }
- else
- for(;;)
- {
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = *pair;
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
- if (len == lenLimit)
- {
- UInt32 pair1 = pair[1];
- *ptr1 = pair0;
- *ptr0 = pair1;
- break;
- }
- }
- }
- {
- UInt32 curMatch = pos - delta;
- // delta = pos - *pair;
- // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
- if (pb[len] < cur[len])
- {
- delta = pos - pair[1];
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- delta = pos - *pair;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- }
- }
- }
- if (--cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- break;
- }
- }
- pos++;
- _cyclicBufferPos++;
- cur++;
- {
- UInt32 num = (UInt32)(distances - _distances);
- _distances[-1] = num;
- }
- }
- while (distances < limit && --size != 0);
- *posRes = pos;
- return distances;
-}
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
#endif
-
-static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
+static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
- UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
- distances[1] = p->hashNumAvail;
+ /* GetMatchesSpec() functions don't create (len = 1)
+ in [len, dist] match pairs, if (p->numHashBytes >= 2)
+ Also we suppose here that (matchMaxLen >= 2).
+ So the following code for (reserve) is not required
+ UInt32 reserve = (p->matchMaxLen * 2);
+ const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
+ if (reserve < kNumHashBytes_Max - 1)
+ reserve = kNumHashBytes_Max - 1;
+ const UInt32 limit = kMtBtBlockSize - (reserve);
+ */
+
+ const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
+
+ d[1] = p->hashNumAvail;
+
+ if (p->failure_BT)
+ {
+ // printf("\n == 1 BtGetMatches() p->failure_BT\n");
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
- MatchFinderMt_GetNextBlock_Hash(p);
- distances[1] = numProcessed + p->hashNumAvail;
- if (p->hashNumAvail >= p->numHashBytes)
+ // MatchFinderMt_GetNextBlock_Hash(p);
+ UInt32 avail;
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
+ const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
+ const UInt32 *h = p->hashBuf + k;
+ avail = h[1];
+ p->hashBufPosLimit = k + h[0];
+ p->hashNumAvail = avail;
+ p->hashBufPos = k + 2;
+ }
+
+ {
+ /* we must prevent UInt32 overflow for avail total value,
+ if avail was increased with new hash block */
+ UInt32 availSum = numProcessed + avail;
+ if (availSum < numProcessed)
+ availSum = (UInt32)(Int32)-1;
+ d[1] = availSum;
+ }
+
+ if (avail >= p->numHashBytes)
continue;
- distances[0] = curPos + p->hashNumAvail;
- distances += curPos;
- for (; p->hashNumAvail != 0; p->hashNumAvail--)
- *distances++ = 0;
+
+ // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
+
+ /* (avail < p->numHashBytes)
+ It means that stream was finished.
+ And (avail) - is a number of remaining bytes,
+ we fill (d) for (avail) bytes for LZ_THREAD (receiver).
+ but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
+
+ /* here we suppose that we have space enough:
+ (kMtBtBlockSize - curPos >= p->hashNumAvail) */
+ p->hashNumAvail = 0;
+ d[0] = curPos + avail;
+ d += curPos;
+ for (; avail != 0; avail--)
+ *d++ = 0;
return;
}
{
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
- UInt32 lenLimit = p->matchMaxLen;
UInt32 pos = p->pos;
UInt32 cyclicBufferPos = p->cyclicBufferPos;
+ UInt32 lenLimit = p->matchMaxLen;
if (lenLimit >= p->hashNumAvail)
lenLimit = p->hashNumAvail;
{
@@ -384,10 +657,18 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
size = size2;
}
+ if (pos > (UInt32)kMtMaxValForNormalize - size)
+ {
+ const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
+ pos -= subValue;
+ p->pos = pos;
+ MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
+ }
+
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
- UInt32 *startDistances = distances + curPos;
+ UInt32 *startDistances = d + curPos;
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
startDistances + 1, p->numHashBytes - 1) - startDistances);
@@ -399,81 +680,112 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
}
#else
{
- UInt32 posRes;
- curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
- distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
- distances + limit,
- size, &posRes) - distances);
- p->hashBufPos += posRes - pos;
- cyclicBufferPos += posRes - pos;
- p->buffer += posRes - pos;
- pos = posRes;
+ UInt32 posRes = pos;
+ const UInt32 *d_end;
+ {
+ d_end = GetMatchesSpecN_2(
+ p->buffer + lenLimit - 1,
+ pos, p->buffer, p->son, p->cutValue, d + curPos,
+ p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+ d + limit, p->hashBuf + p->hashBufPos + size,
+ cyclicBufferPos, p->cyclicBufferSize,
+ &posRes);
+ }
+ {
+ if (!d_end)
+ {
+ // printf("\n == 2 BtGetMatches() p->failure_BT\n");
+ // internal data failure
+ p->failure_BT = True;
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
+ }
+ curPos = (UInt32)(d_end - d);
+ {
+ const UInt32 processed = posRes - pos;
+ pos = posRes;
+ p->hashBufPos += processed;
+ cyclicBufferPos += processed;
+ p->buffer += processed;
+ }
}
#endif
- numProcessed += pos - p->pos;
- p->hashNumAvail -= pos - p->pos;
- p->pos = pos;
+ {
+ const UInt32 processed = pos - p->pos;
+ numProcessed += processed;
+ p->hashNumAvail -= processed;
+ p->pos = pos;
+ }
if (cyclicBufferPos == p->cyclicBufferSize)
cyclicBufferPos = 0;
p->cyclicBufferPos = cyclicBufferPos;
}
}
- distances[0] = curPos;
+ d[0] = curPos;
}
+
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
{
CMtSync *sync = &p->hashSync;
+
+ BUFFER_MUST_BE_UNLOCKED(sync)
+
if (!sync->needStart)
{
- CriticalSection_Enter(&sync->cs);
- sync->csWasEntered = True;
+ LOCK_BUFFER(sync)
}
- BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
-
- if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
- {
- UInt32 subValue = p->pos - p->cyclicBufferSize;
- MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
- p->pos -= subValue;
- }
+ BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
+
+ /* We suppose that we have called GetNextBlock() from start.
+ So buffer is LOCKED */
- if (!sync->needStart)
- {
- CriticalSection_Leave(&sync->cs);
- sync->csWasEntered = False;
- }
+ UNLOCK_BUFFER(sync)
}
-void BtThreadFunc(CMatchFinderMt *mt)
+
+MY_NO_INLINE
+static void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
for (;;)
{
UInt32 blockIndex = 0;
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+
for (;;)
{
+ PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
+ /* (p->exit == true) is possible after (p->canStart) at first loop iteration
+ and is unexpected after more Wait(freeSemaphore) iterations */
if (p->exit)
return;
+
+ Semaphore_Wait(&p->freeSemaphore);
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
if (p->stopWriting)
- {
- p->numProcessedBlocks = blockIndex;
- MtSync_StopWriting(&mt->hashSync);
- Event_Set(&p->wasStopped);
break;
- }
- Semaphore_Wait(&p->freeSemaphore);
+
BtFillBlock(mt, blockIndex++);
+
Semaphore_Release1(&p->filledSemaphore);
}
+
+ // we stop HASH_THREAD here
+ MtSync_StopWriting(&mt->hashSync);
+
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
}
}
+
void MatchFinderMt_Construct(CMatchFinderMt *p)
{
p->hashBuf = NULL;
@@ -489,16 +801,39 @@ static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
- MtSync_Destruct(&p->hashSync);
+ /*
+ HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
+ So we must be sure that HASH_THREAD will not use CriticalSection(s)
+ after deleting CriticalSection here.
+
+ we call ReleaseStream(p)
+ that calls StopWriting(btSync)
+ that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
+ after StopWriting() it's safe to destruct MtSync(s) in any order */
+
+ MatchFinderMt_ReleaseStream(p);
+
MtSync_Destruct(&p->btSync);
+ MtSync_Destruct(&p->hashSync);
+
+ LOG_ITER(
+ printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
+ (UInt32)(g_NumIters_Tree / 1000),
+ (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
+ (UInt32)(g_NumIters_Loop / 1000),
+ (UInt32)(g_NumIters_Bytes / 1000)
+ ));
+
MatchFinderMt_FreeMem(p, alloc);
}
+
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
+
+static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
+static THREAD_FUNC_DECL BtThreadFunc2(void *p)
{
Byte allocaDummy[0x180];
unsigned i = 0;
@@ -509,16 +844,17 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
return 0;
}
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->historySize = historySize;
if (kMtBtBlockSize <= matchMaxLen * 4)
return SZ_ERROR_PARAM;
if (!p->hashBuf)
{
- p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
+ p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
if (!p->hashBuf)
return SZ_ERROR_MEM;
p->btBuf = p->hashBuf + kHashBufferSize;
@@ -528,253 +864,457 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM;
- RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
- RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
+ RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));
+ RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));
return SZ_OK;
}
-/* Call it after ReleaseStream / SetStream */
+
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
+{
+ RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));
+ return MtSync_Init(&p->btSync, kMtBtNumBlocks);
+}
+
+
static void MatchFinderMt_Init(CMatchFinderMt *p)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->btBufPos =
- p->btBufPosLimit = 0;
+ p->btBufPosLimit = NULL;
p->hashBufPos =
p->hashBufPosLimit = 0;
+ p->hashNumAvail = 0; // 21.03
+
+ p->failure_BT = False;
/* Init without data reading. We don't want to read data in this thread */
- MatchFinder_Init_3(mf, False);
+ MatchFinder_Init_4(mf);
+
MatchFinder_Init_LowHash(mf);
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
p->btNumAvailBytes = 0;
- p->lzPos = p->historySize + 1;
+ p->failure_LZ_BT = False;
+ // p->failure_LZ_LZ = False;
+
+ p->lzPos =
+ 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
+ // p->hash4Mask = mf->hash4Mask;
p->crc = mf->crc;
+ // memcpy(p->crc, mf->crc, sizeof(mf->crc));
p->son = mf->son;
p->matchMaxLen = mf->matchMaxLen;
p->numHashBytes = mf->numHashBytes;
- p->pos = mf->pos;
- p->buffer = mf->buffer;
- p->cyclicBufferPos = mf->cyclicBufferPos;
+
+ /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
+ // mf->streamPos = mf->pos = 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
+
+ /* we must init (p->pos = mf->pos) for BT, because
+ BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
+ p->pos = mf->pos; // do not change it
+
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
p->cyclicBufferSize = mf->cyclicBufferSize;
+ p->buffer = mf->buffer;
p->cutValue = mf->cutValue;
+ // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
}
+
/* ReleaseStream is required to finish multithreading */
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
{
+ // Sleep(1); // for debug
MtSync_StopWriting(&p->btSync);
+ // Sleep(200); // for debug
/* p->MatchFinder->ReleaseStream(); */
}
-static void MatchFinderMt_Normalize(CMatchFinderMt *p)
-{
- MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
- p->lzPos = p->historySize + 1;
-}
-static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
+MY_NO_INLINE
+static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
- UInt32 blockIndex;
- MtSync_GetNextBlock(&p->btSync);
- blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
- p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
- p->btBufPosLimit += p->btBuf[p->btBufPos++];
- p->btNumAvailBytes = p->btBuf[p->btBufPos++];
- if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
- MatchFinderMt_Normalize(p);
+ if (p->failure_LZ_BT)
+ p->btBufPos = p->failureBuf;
+ else
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
+ const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
+ {
+ const UInt32 numItems = bt[0];
+ p->btBufPosLimit = bt + numItems;
+ p->btNumAvailBytes = bt[1];
+ p->btBufPos = bt + 2;
+ if (numItems < 2 || numItems > kMtBtBlockSize)
+ {
+ p->failureBuf[0] = 0;
+ p->btBufPos = p->failureBuf;
+ p->btBufPosLimit = p->failureBuf + 1;
+ p->failure_LZ_BT = True;
+ // p->btNumAvailBytes = 0;
+ /* we don't want to decrease AvailBytes, that was load before.
+ that can be unxepected for the code that have loaded anopther value before */
+ }
+ }
+
+ if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
+ {
+ /* we don't check (lzPos) over exact avail bytes in (btBuf).
+ (fixedHashSize) is small, so normalization is fast */
+ const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ p->lzPos -= subValue;
+ MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
+ }
+ }
+ return p->btNumAvailBytes;
}
+
+
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
{
return p->pointerToCurPos;
}
+
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
+
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
{
- GET_NEXT_BLOCK_IF_REQUIRED;
- return p->btNumAvailBytes;
+ if (p->btBufPos != p->btBufPosLimit)
+ return p->btNumAvailBytes;
+ return MatchFinderMt_GetNextBlock_Bt(p);
}
-static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+
+// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
+#define CHECK_FAILURE_LZ(_match_, _pos_)
+
+static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
- UInt32 h2, curMatch2;
+ UInt32 h2, c2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH2_CALC
- curMatch2 = hash[h2];
- hash[h2] = lzPos;
+ c2 = hash[h2];
+ hash[h2] = m;
- if (curMatch2 >= matchMinPos)
- if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ if (c2 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- *distances++ = 2;
- *distances++ = lzPos - curMatch2 - 1;
+ *d++ = 2;
+ *d++ = m - c2 - 1;
}
+ }
- return distances;
+ return d;
}
-static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
- UInt32 h2, h3, curMatch2, curMatch3;
+ UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH3_CALC
- curMatch2 = hash[ h2];
- curMatch3 = (hash + kFix3HashSize)[h3];
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
- hash[ h2] = lzPos;
- (hash + kFix3HashSize)[h3] = lzPos;
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
- if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ if (c2 >= matchMinPos)
{
- distances[1] = lzPos - curMatch2 - 1;
- if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- distances[0] = 3;
- return distances + 2;
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ return d + 2;
+ }
+ d[0] = 2;
+ d += 2;
}
- distances[0] = 2;
- distances += 2;
}
- if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+ if (c3 >= matchMinPos)
{
- *distances++ = 3;
- *distances++ = lzPos - curMatch3 - 1;
+ CHECK_FAILURE_LZ(c3, m)
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
}
- return distances;
+ return d;
}
+
+#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
+
/*
-static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
+static
+UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
- UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ UInt32 matchMinPos;
+ UInt32 avail = p->btNumAvailBytes - 1;
+ p->btBufPos = btLim;
+
+ {
+ p->btNumAvailBytes = avail;
+
+ #define BT_HASH_BYTES_MAX 5
+
+ matchMinPos = p->lzPos;
+
+ if (len != 0)
+ matchMinPos -= bt[1];
+ else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ INCREASE_LZ_POS
+ return d;
+ }
+ else
+ {
+ const UInt32 hs = p->historySize;
+ if (matchMinPos > hs)
+ matchMinPos -= hs;
+ else
+ matchMinPos = 1;
+ }
+ }
+
+ for (;;)
+ {
+
+ UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 lzPos = p->lzPos;
- MT_HASH4_CALC
-
- curMatch2 = hash[ h2];
- curMatch3 = (hash + kFix3HashSize)[h3];
- curMatch4 = (hash + kFix4HashSize)[h4];
+ UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ {
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ d += 2;
+ break;
+ }
+ // else
+ {
+ d[0] = 2;
+ d += 2;
+ }
+ }
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
+ break;
+ }
+
+ if (len != 0)
+ {
+ do
+ {
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
+ }
+ while (bt != btLim);
+ }
+ INCREASE_LZ_POS
+ return d;
+}
+*/
+
+
+static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
+{
+ UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
+ UInt32 *hash = p->hash;
+ const Byte *cur = p->pointerToCurPos;
+ const UInt32 m = p->lzPos;
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ c2 = hash[h2];
+ c3 = (hash + kFix3HashSize)[h3];
+ // c4 = (hash + kFix4HashSize)[h4];
- hash[ h2] = lzPos;
- (hash + kFix3HashSize)[h3] = lzPos;
- (hash + kFix4HashSize)[h4] = lzPos;
+ hash[h2] = m;
+ (hash + kFix3HashSize)[h3] = m;
+ // (hash + kFix4HashSize)[h4] = m;
+
+ #define _USE_H2
- if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
+ #ifdef _USE_H2
+ if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- distances[1] = lzPos - curMatch2 - 1;
- if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
- distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
- return distances + 2;
+ // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
+ // return d + 2;
+
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
+ {
+ d[0] = 4;
+ return d + 2;
+ }
+ d[0] = 3;
+ d += 2;
+
+ #ifdef _USE_H4
+ if (c4 >= matchMinPos)
+ if (
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
+ )
+ {
+ *d++ = 4;
+ *d++ = m - c4 - 1;
+ }
+ #endif
+ return d;
}
- distances[0] = 2;
- distances += 2;
+ d[0] = 2;
+ d += 2;
}
+ #endif
- if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
+ if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
- distances[1] = lzPos - curMatch3 - 1;
- if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
+ d[1] = m - c3 - 1;
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
{
- distances[0] = 4;
- return distances + 2;
+ d[0] = 4;
+ return d + 2;
}
- distances[0] = 3;
- distances += 2;
+ d[0] = 3;
+ d += 2;
}
- if (curMatch4 >= matchMinPos)
+ #ifdef _USE_H4
+ if (c4 >= matchMinPos)
if (
- cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
- cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
+ cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
- *distances++ = 4;
- *distances++ = lzPos - curMatch4 - 1;
+ *d++ = 4;
+ *d++ = m - c4 - 1;
}
+ #endif
- return distances;
+ return d;
}
-*/
-#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
-static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
+static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
- const UInt32 *btBuf = p->btBuf + p->btBufPos;
- UInt32 len = *btBuf++;
- p->btBufPos += 1 + len;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ p->btBufPos = btLim;
p->btNumAvailBytes--;
+ INCREASE_LZ_POS
{
- UInt32 i;
- for (i = 0; i < len; i += 2)
+ while (bt != btLim)
{
- UInt32 v0 = btBuf[0];
- UInt32 v1 = btBuf[1];
- btBuf += 2;
- distances[0] = v0;
- distances[1] = v1;
- distances += 2;
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
}
}
- INCREASE_LZ_POS
- return len;
+ return d;
}
-static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
-{
- const UInt32 *btBuf = p->btBuf + p->btBufPos;
- UInt32 len = *btBuf++;
- p->btBufPos += 1 + len;
+
+static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
+{
+ const UInt32 *bt = p->btBufPos;
+ UInt32 len = *bt++;
+ const UInt32 avail = p->btNumAvailBytes - 1;
+ p->btNumAvailBytes = avail;
+ p->btBufPos = bt + len;
if (len == 0)
{
- /* change for bt5 ! */
- if (p->btNumAvailBytes-- >= 4)
- len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
+ #define BT_HASH_BYTES_MAX 5
+ if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
+ {
+ UInt32 m = p->lzPos;
+ if (m > p->historySize)
+ m -= p->historySize;
+ else
+ m = 1;
+ d = p->MixMatchesFunc(p, m, d);
+ }
}
else
{
- /* Condition: there are matches in btBuf with length < p->numHashBytes */
- UInt32 *distances2;
- p->btNumAvailBytes--;
- distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
+ /*
+ first match pair from BinTree: (match_len, match_dist),
+ (match_len >= numHashBytes).
+ MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
+ */
+ d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
+ // if (d) // check for failure
do
{
- UInt32 v0 = btBuf[0];
- UInt32 v1 = btBuf[1];
- btBuf += 2;
- distances2[0] = v0;
- distances2[1] = v1;
- distances2 += 2;
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
+ bt += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
}
- while ((len -= 2) != 0);
- len = (UInt32)(distances2 - (distances));
+ while (len -= 2);
}
INCREASE_LZ_POS
- return len;
+ return d;
}
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
-#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
+#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
{
@@ -803,12 +1343,16 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
}
/*
+// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
+// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
+
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
- UInt32 h2, h3, h4;
- MT_HASH4_CALC
- (hash + kFix4HashSize)[h4] =
+ UInt32 h2, h3; // h4
+ MT_HASH3_CALC
+ // MT_HASH4_CALC
+ // (hash + kFix4HashSize)[h4] =
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
@@ -816,14 +1360,14 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
}
*/
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
- switch (p->MatchFinder->numHashBytes)
+ switch (MF(p)->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
@@ -832,22 +1376,25 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
- p->GetHeadsFunc = GetHeads3;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
- default:
- /* case 4: */
- p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
+ case 4:
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
+
+ // it's fast inline version of GetMatches()
+ // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
+
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
- /*
default:
- p->GetHeadsFunc = GetHeads5;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
+ vTable->Skip =
+ (Mf_Skip_Func)MatchFinderMt3_Skip;
+ // (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
- */
}
}
diff --git a/multiarc/src/formats/7z/C/LzFindMt.h b/multiarc/src/formats/7z/C/LzFindMt.h
index ef431e3f..660b7244 100644..100755
--- a/multiarc/src/formats/7z/C/LzFindMt.h
+++ b/multiarc/src/formats/7z/C/LzFindMt.h
@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
-2018-07-04 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@@ -9,31 +9,26 @@
EXTERN_C_BEGIN
-#define kMtHashBlockSize (1 << 13)
-#define kMtHashNumBlocks (1 << 3)
-#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
-
-#define kMtBtBlockSize (1 << 14)
-#define kMtBtNumBlocks (1 << 6)
-#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
-
typedef struct _CMtSync
{
+ UInt32 numProcessedBlocks;
+ CThread thread;
+ UInt64 affinity;
+
BoolInt wasCreated;
BoolInt needStart;
+ BoolInt csWasInitialized;
+ BoolInt csWasEntered;
+
BoolInt exit;
BoolInt stopWriting;
- CThread thread;
CAutoResetEvent canStart;
- CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
- BoolInt csWasInitialized;
- BoolInt csWasEntered;
CCriticalSection cs;
- UInt32 numProcessedBlocks;
+ // UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -49,18 +44,23 @@ typedef struct _CMatchFinderMt
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
- UInt32 btBufPos;
- UInt32 btBufPosLimit;
+ const UInt32 *btBufPos;
+ const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
+ // UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
-
+ UInt32 failure_LZ_BT; // failure in BT transfered to LZ
+ // UInt32 failure_LZ_LZ; // failure in LZ tables
+ UInt32 failureBuf[1];
+ // UInt32 crc[256];
+
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
@@ -70,6 +70,8 @@ typedef struct _CMatchFinderMt
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
+ UInt32 failure_BT;
+
CLzRef *son;
UInt32 matchMaxLen;
@@ -77,7 +79,7 @@ typedef struct _CMatchFinderMt
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
- UInt32 cyclicBufferSize; /* it must be historySize + 1 */
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
@@ -87,13 +89,19 @@ typedef struct _CMatchFinderMt
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
+ // CMatchFinder MatchFinder;
} CMatchFinderMt;
+// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
+
+/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/LzFindOpt.c b/multiarc/src/formats/7z/C/LzFindOpt.c
new file mode 100755
index 00000000..8ff006e0
--- /dev/null
+++ b/multiarc/src/formats/7z/C/LzFindOpt.c
@@ -0,0 +1,578 @@
+/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
+2021-07-13 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "LzFind.h"
+
+// #include "LzFindMt.h"
+
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+// #define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+UInt64 g_NumIters_Tree;
+UInt64 g_NumIters_Loop;
+UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+// ---------- BT THREAD ----------
+
+#define USE_SON_PREFETCH
+#define USE_LONG_MATCH_OPT
+
+#define kEmptyHashValue 0
+
+// #define CYC_TO_POS_OFFSET 0
+
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+/*
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
+{
+ do
+ {
+ UInt32 delta;
+ if (hash == size)
+ break;
+ delta = *hash++;
+
+ if (delta == 0 || delta > (UInt32)pos)
+ return NULL;
+
+ lenLimit++;
+
+ if (delta == (UInt32)pos)
+ {
+ CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+
+ const Byte *len0 = cur, *len1 = cur;
+ UInt32 cutValue = _cutValue;
+ const Byte *maxLen = cur + _maxLen;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ hash++;
+ pos++;
+ cur++;
+ lenLimit++;
+ {
+ CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+ #else
+ const UInt32 p0 = ptr[0 + (diff * 2)];
+ const UInt32 p1 = ptr[1 + (diff * 2)];
+ ptr[0] = p0;
+ ptr[1] = p1;
+ // ptr[0] = ptr[0 + (diff * 2)];
+ // ptr[1] = ptr[1 + (diff * 2)];
+ #endif
+ }
+ // PrintSon(son + 2, pos - 1);
+ // printf("\npos = %x delta = %x\n", pos, delta);
+ len++;
+ *d++ = 2;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ if (delta >= curMatch)
+ return NULL;
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ if (delta >= curMatch)
+ return NULL;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= pos)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
+
+/* define cbs if you use 2 functions.
+ GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
+ GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
+
+ do not define cbs if you use 1 function:
+ GetMatchesSpecN_2()
+*/
+
+// #define cbs _cyclicBufferSize
+
+/*
+ we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
+ to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
+
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ lenLimit++;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+
+ UInt32 cutValue = _cutValue;
+ const Byte *len0 = cur, *len1 = cur;
+ const Byte *maxLen = cur + _maxLen;
+
+ // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // SPEC code
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)(lenLimit - cur);
+ *d++ = delta - 1;
+ cur++;
+ lenLimit++;
+ // SPEC
+ _cyclicBufferPos++;
+ {
+ // SPEC code
+ CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
+ const CLzRef *src = dest + ((diff
+ + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
+ // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ pos++;
+ hash++;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ } // for() end for long matches
+ }
+ #endif
+
+ break; // break from TREE iterations
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= cbs)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+
+
+
+/*
+typedef UInt32 uint32plus; // size_t
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ UInt32 *_distances = ++d;
+ uint32plus len0 = 0, len1 = 0;
+ UInt32 cutValue = _cutValue;
+ uint32plus maxLen = _maxLen;
+ // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+ const Byte *pb = cur - delta;
+ uint32plus len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr0 = pair1;
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ }
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)lenLimit;
+ *d++ = delta - 1;
+ _cyclicBufferPos++;
+ {
+ CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
+ const CLzRef *src = dest + ((diff +
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ hash++;
+ pos++;
+ cur++;
+ pb++;
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta;
+ if (pb[len] < cur[len])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ {
+ if (delta >= curMatch)
+ return NULL;
+ delta = (UInt32)pos - delta;
+ if (delta >= cbs
+ // delta >= _cyclicBufferSize || delta >= pos
+ || --cutValue == 0)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
diff --git a/multiarc/src/formats/7z/C/LzHash.h b/multiarc/src/formats/7z/C/LzHash.h
index e7c94230..77b898cf 100644..100755
--- a/multiarc/src/formats/7z/C/LzHash.h
+++ b/multiarc/src/formats/7z/C/LzHash.h
@@ -1,57 +1,34 @@
/* LzHash.h -- HASH functions for LZ algorithms
-2015-04-12 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
+/*
+ (kHash2Size >= (1 << 8)) : Required
+ (kHash3Size >= (1 << 16)) : Required
+*/
+
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
-#define kHash4Size (1 << 20)
+// #define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
-#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
-
-#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
-
-#define HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
-
-#define HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
-
-#define HASH5_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- temp ^= (p->crc[cur[3]] << 5); \
- h4 = temp & (kHash4Size - 1); \
- hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
-
-/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
-#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
-
-
-#define MT_HASH2_CALC \
- h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
-
-#define MT_HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
-
-#define MT_HASH4_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
- h2 = temp & (kHash2Size - 1); \
- temp ^= ((UInt32)cur[2] << 8); \
- h3 = temp & (kHash3Size - 1); \
- h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+/*
+ We use up to 3 crc values for hash:
+ crc0
+ crc1 << Shift_1
+ crc2 << Shift_2
+ (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
+ Small values for Shift are not good for collision rate.
+ Big value for Shift_2 increases the minimum size
+ of hash table, that will be slow for small files.
+*/
+
+#define kLzHash_CrcShift_1 5
+#define kLzHash_CrcShift_2 10
#endif
diff --git a/multiarc/src/formats/7z/C/Lzma2Dec.c b/multiarc/src/formats/7z/C/Lzma2Dec.c
index 4e138a4a..ac970a84 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Dec.c
+++ b/multiarc/src/formats/7z/C/Lzma2Dec.c
@@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p)
LzmaDec_Init(&p->decoder);
}
-static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+// ELzma2State
+static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{
diff --git a/multiarc/src/formats/7z/C/Lzma2Dec.h b/multiarc/src/formats/7z/C/Lzma2Dec.h
index b8ddeac8..b8ddeac8 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Dec.h
+++ b/multiarc/src/formats/7z/C/Lzma2Dec.h
diff --git a/multiarc/src/formats/7z/C/Lzma2DecMt.c b/multiarc/src/formats/7z/C/Lzma2DecMt.c
index 988643d9..9f1dc52b 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2DecMt.c
+++ b/multiarc/src/formats/7z/C/Lzma2DecMt.c
@@ -1,25 +1,25 @@
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
+// #define _7ZIP_ST
+
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
+#ifndef _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
-
#define PRF_STR(s) PRF(printf("\n" s "\n"))
-#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
-
-// #define _7ZIP_ST
+#endif
#include "Alloc.h"
@@ -28,10 +28,10 @@
#ifndef _7ZIP_ST
#include "MtDec.h"
-#endif
-
#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
+#endif
+
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
{
@@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
const unsigned kNumAlignBits = 12;
const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */
t->alloc.numAlignBits = kNumAlignBits;
- t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits));
+ t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits));
t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc;
}
}
@@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
BoolInt *needContinue, BoolInt *canRecode)
{
CLzma2DecMt *me = (CLzma2DecMt *)pp;
@@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
const Byte *data = t->outBuf;
BoolInt needContinue2 = True;
+ UNUSED_VAR(src)
+ UNUSED_VAR(srcSize)
+ UNUSED_VAR(isCross)
+
PRF_STR_INT_2("Write", coderIndex, srcSize);
*needContinue = False;
*canRecode = True;
- UNUSED_VAR(src)
- UNUSED_VAR(srcSize)
if (
// t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
@@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
- p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+ p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim);
// p->readProcessed += inLim;
// inLim -= 5; p->readWasFinished = True; // for test
if (inLim == 0 || p->readRes != SZ_OK)
@@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
p->inProcessed = 0;
p->readWasFinished = False;
+ p->readRes = SZ_OK;
*isMT = False;
@@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
if (p->props.numThreads > 1)
{
- IMtDecCallback vt;
+ IMtDecCallback2 vt;
Lzma2DecMt_FreeSt(p);
@@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
*inProcessed = p->inProcessed;
// res = SZ_OK; // for test
- if (res == SZ_OK && p->readRes != SZ_OK)
+ if (res == SZ_ERROR_INPUT_EOF)
+ {
+ if (p->readRes != SZ_OK)
+ res = p->readRes;
+ }
+ else if (res == SZ_OK && p->readRes != SZ_OK)
res = p->readRes;
/*
diff --git a/multiarc/src/formats/7z/C/Lzma2DecMt.h b/multiarc/src/formats/7z/C/Lzma2DecMt.h
index 7791c310..7791c310 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2DecMt.h
+++ b/multiarc/src/formats/7z/C/Lzma2DecMt.h
diff --git a/multiarc/src/formats/7z/C/Lzma2Enc.c b/multiarc/src/formats/7z/C/Lzma2Enc.c
index 5c1ad493..e61a5dfe 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Enc.c
+++ b/multiarc/src/formats/7z/C/Lzma2Enc.c
@@ -1,5 +1,5 @@
/* Lzma2Enc.c -- LZMA2 Encoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
- t2r = (unsigned)numBlocks;
+ t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1(
{
if (outBuf)
{
- size_t destPos = *outBufSize;
+ const size_t destPos = *outBufSize;
if (destPos >= outLim)
return SZ_ERROR_OUTPUT_EOF;
- outBuf[destPos] = 0;
+ outBuf[destPos] = LZMA2_CONTROL_EOF; // 0
*outBufSize = destPos + 1;
}
else
{
- Byte b = 0;
+ const Byte b = LZMA2_CONTROL_EOF; // 0;
if (ISeqOutStream_Write(outStream, &b, 1) != 1)
return SZ_ERROR_WRITE;
}
@@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
p->outBufSize = destBlockSize;
}
- p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max;
+ p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{
SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream)
- *outBufSize = p->outBuf - outBuf;
+ *outBufSize = (size_t)(p->outBuf - outBuf);
return res;
}
}
diff --git a/multiarc/src/formats/7z/C/Lzma2Enc.h b/multiarc/src/formats/7z/C/Lzma2Enc.h
index 6a6110ff..6a6110ff 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma2Enc.h
+++ b/multiarc/src/formats/7z/C/Lzma2Enc.h
diff --git a/multiarc/src/formats/7z/C/Lzma86.h b/multiarc/src/formats/7z/C/Lzma86.h
index bebed5cb..bebed5cb 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86.h
+++ b/multiarc/src/formats/7z/C/Lzma86.h
diff --git a/multiarc/src/formats/7z/C/Lzma86Dec.c b/multiarc/src/formats/7z/C/Lzma86Dec.c
index 21031745..21031745 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86Dec.c
+++ b/multiarc/src/formats/7z/C/Lzma86Dec.c
diff --git a/multiarc/src/formats/7z/C/Lzma86Enc.c b/multiarc/src/formats/7z/C/Lzma86Enc.c
index 2617bab8..14fcd65c 100644..100755
--- a/multiarc/src/formats/7z/C/Lzma86Enc.c
+++ b/multiarc/src/formats/7z/C/Lzma86Enc.c
@@ -11,8 +11,6 @@
#include "Bra.h"
#include "LzmaEnc.h"
-#define SZE_OUT_OVERFLOW SZE_DATA_ERROR
-
int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode)
{
diff --git a/multiarc/src/formats/7z/C/LzmaDec.c b/multiarc/src/formats/7z/C/LzmaDec.c
index ba3e1dd5..d6742e5a 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaDec.c
+++ b/multiarc/src/formats/7z/C/LzmaDec.c
@@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -13,10 +13,12 @@
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
-#define kNumMoveBits 5
#define RC_INIT_SIZE 5
+#ifndef _LZMA_DEC_OPT
+
+#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
@@ -62,9 +64,10 @@
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+#endif // _LZMA_DEC_OPT
-#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
@@ -114,6 +117,9 @@
#define kMatchMinLen 2
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+#define kMatchSpecLen_Error_Data (1 << 9)
+#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
+
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
/* (probs_1664) is faster and better for code size at some platforms */
@@ -166,10 +172,12 @@
/*
p->remainLen : shows status of LZMA decoder:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
- = kMatchSpecLenStart + 1 : need init range coder
- = kMatchSpecLenStart + 2 : need init range coder and state
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+ = kMatchSpecLenStart + 1 : need init range coder
+ = kMatchSpecLenStart + 2 : need init range coder and state
+ = kMatchSpecLen_Error_Fail : Internal Code Failure
+ = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
*/
/* ---------- LZMA_DECODE_REAL ---------- */
@@ -188,23 +196,31 @@ In:
{
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
- is not END_OF_PAYALOAD_MARKER, then function returns error code.
+ is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
+ the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
}
Processing:
- first LZMA symbol will be decoded in any case
- All checks for limits are at the end of main loop,
- It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+ The first LZMA symbol will be decoded in any case.
+ All main checks for limits are at the end of main loop,
+ It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+ But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
+ next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
+ that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
+ So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
Out:
RangeCoder is normalized
Result:
SZ_OK - OK
- SZ_ERROR_DATA - Error
- p->remainLen:
- < kMatchSpecLenStart : normal remain
- = kMatchSpecLenStart : finished
+ p->remainLen:
+ < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
+ = kMatchSpecLenStart : the LZMA stream was finished with end mark
+
+ SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
+ p->remainLen : undefined
+ p->reps[*] : undefined
*/
@@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
else
{
UPDATE_1(prob);
- /*
- // that case was checked before with kBadRepCode
- if (checkDicSize == 0 && processedPos == 0)
- return SZ_ERROR_DATA;
- */
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
@@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob)
{
UPDATE_0(prob);
+
+ // that case was checked before with kBadRepCode
+ // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+ // The caller doesn't allow (dicPos == limit) case here
+ // so we don't need the following check:
+ // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
+
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
processedPos++;
@@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
{
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
+ len += kMatchSpecLen_Error_Data + kMatchMinLen;
+ // len = kMatchSpecLen_Error_Data;
+ // len += kMatchMinLen;
+ break;
}
}
@@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
if ((rem = limit - dicPos) == 0)
{
- p->dicPos = dicPos;
- return SZ_ERROR_DATA;
+ /*
+ We stop decoding and return SZ_OK, and we can resume decoding later.
+ Any error conditions can be tested later in caller code.
+ For more strict mode we can stop decoding with error
+ // len += kMatchSpecLen_Error_Data;
+ */
+ break;
}
curLen = ((rem < len) ? (unsigned)rem : len);
@@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->buf = buf;
p->range = range;
p->code = code;
- p->remainLen = (UInt32)len;
+ p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
@@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = (UInt32)state;
-
+ if (len >= kMatchSpecLen_Error_Data)
+ return SZ_ERROR_DATA;
return SZ_OK;
}
#endif
+
+
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
- if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+ unsigned len = (unsigned)p->remainLen;
+ if (len == 0 /* || len >= kMatchSpecLenStart */)
+ return;
{
- Byte *dic = p->dic;
SizeT dicPos = p->dicPos;
- SizeT dicBufSize = p->dicBufSize;
- unsigned len = (unsigned)p->remainLen;
- SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
- SizeT rem = limit - dicPos;
- if (rem < len)
- len = (unsigned)(rem);
+ Byte *dic;
+ SizeT dicBufSize;
+ SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+ {
+ SizeT rem = limit - dicPos;
+ if (rem < len)
+ {
+ len = (unsigned)(rem);
+ if (len == 0)
+ return;
+ }
+ }
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
- while (len != 0)
+ dic = p->dic;
+ rep0 = p->reps[0];
+ dicBufSize = p->dicBufSize;
+ do
{
- len--;
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
}
+ while (--len);
p->dicPos = dicPos;
}
}
+/*
+At staring of new stream we have one of the following symbols:
+ - Literal - is allowed
+ - Non-Rep-Match - is allowed only if it's end marker symbol
+ - Rep-Match - is not allowed
+We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
+*/
+
#define kRange0 0xFFFFFFFF
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
@@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
#error Stop_Compiling_Bad_LZMA_Check
#endif
+
+/*
+LzmaDec_DecodeReal2():
+ It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
+
+We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
+and we support the following state of (p->checkDicSize):
+ if (total_processed < p->prop.dicSize) then
+ {
+ (total_processed == p->processedPos)
+ (p->checkDicSize == 0)
+ }
+ else
+ (p->checkDicSize == p->prop.dicSize)
+*/
+
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
- do
+ if (p->checkDicSize == 0)
{
- SizeT limit2 = limit;
- if (p->checkDicSize == 0)
- {
- UInt32 rem = p->prop.dicSize - p->processedPos;
- if (limit - p->dicPos > rem)
- limit2 = p->dicPos + rem;
-
- if (p->processedPos == 0)
- if (p->code >= kBadRepCode)
- return SZ_ERROR_DATA;
- }
-
- RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
-
+ UInt32 rem = p->prop.dicSize - p->processedPos;
+ if (limit - p->dicPos > rem)
+ limit = p->dicPos + rem;
+ }
+ {
+ int res = LZMA_DECODE_REAL(p, limit, bufLimit);
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
-
- LzmaDec_WriteRem(p, limit);
+ return res;
}
- while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
-
- return 0;
}
+
+
typedef enum
{
- DUMMY_ERROR, /* unexpected end of input stream */
+ DUMMY_INPUT_EOF, /* need more input data */
DUMMY_LIT,
DUMMY_MATCH,
DUMMY_REP
} ELzmaDummy;
-static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+
+#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
{
UInt32 range = p->range;
UInt32 code = p->code;
- const Byte *bufLimit = buf + inSize;
+ const Byte *bufLimit = *bufOut;
const CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
ELzmaDummy res;
+ for (;;)
{
const CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
- unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+ unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
prob = probs + IsMatch + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
- /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
-
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += ((UInt32)LZMA_LIT_SIZE *
- ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
- (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+ ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+ ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
- NORMALIZE_CHECK;
- return DUMMY_REP;
+ break;
}
else
{
@@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
- /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
-
if (posSlot < kEndPosModelIndex)
{
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
@@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
}
}
+ break;
}
NORMALIZE_CHECK;
+
+ *bufOut = buf;
return res;
}
-
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
{
p->remainLen = kMatchSpecLenStart + 1;
@@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p)
}
+/*
+LZMA supports optional end_marker.
+So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
+That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
+When the decoder reaches dicLimit, it looks (finishMode) parameter:
+ if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
+ if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
+
+When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
+ 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
+ 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
+ must check (status) value. The caller can show the error,
+ if the end of stream is expected, and the (status) is noit
+ LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
+*/
+
+
+#define RETURN__NOT_FINISHED__FOR_FINISH \
+ *status = LZMA_STATUS_NOT_FINISHED; \
+ return SZ_ERROR_DATA; // for strict mode
+ // return SZ_OK; // for relaxed mode
+
+
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
(*srcLen) = 0;
-
*status = LZMA_STATUS_NOT_SPECIFIED;
if (p->remainLen > kMatchSpecLenStart)
{
+ if (p->remainLen > kMatchSpecLenStart + 2)
+ return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
+
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
@@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
+
+ if (p->checkDicSize == 0
+ && p->processedPos == 0
+ && p->code >= kBadRepCode)
+ return SZ_ERROR_DATA;
+
p->range = 0xFFFFFFFF;
p->tempBufSize = 0;
@@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->remainLen = 0;
}
- LzmaDec_WriteRem(p, dicLimit);
-
- while (p->remainLen != kMatchSpecLenStart)
+ for (;;)
{
+ if (p->remainLen == kMatchSpecLenStart)
+ {
+ if (p->code != 0)
+ return SZ_ERROR_DATA;
+ *status = LZMA_STATUS_FINISHED_WITH_MARK;
+ return SZ_OK;
+ }
+
+ LzmaDec_WriteRem(p, dicLimit);
+
+ {
+ // (p->remainLen == 0 || p->dicPos == dicLimit)
+
int checkEndMarkNow = 0;
if (p->dicPos >= dicLimit)
@@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
checkEndMarkNow = 1;
}
+ // (p->remainLen == 0)
+
if (p->tempBufSize == 0)
{
- SizeT processed;
const Byte *bufLimit;
+ int dummyProcessed = -1;
+
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
- int dummyRes = LzmaDec_TryDummy(p, src, inSize);
- if (dummyRes == DUMMY_ERROR)
+ const Byte *bufOut = src + inSize;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
{
- memcpy(p->tempBuf, src, inSize);
- p->tempBufSize = (unsigned)inSize;
+ size_t i;
+ if (inSize >= LZMA_REQUIRED_INPUT_MAX)
+ break;
(*srcLen) += inSize;
+ p->tempBufSize = (unsigned)inSize;
+ for (i = 0; i < inSize; i++)
+ p->tempBuf[i] = src[i];
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+
+ dummyProcessed = (int)(bufOut - src);
+ if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ unsigned i;
+ (*srcLen) += (unsigned)dummyProcessed;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ for (i = 0; i < (unsigned)dummyProcessed; i++)
+ p->tempBuf[i] = src[i];
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
+
bufLimit = src;
+ // we will decode only one iteration
}
else
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+
p->buf = src;
- if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
- return SZ_ERROR_DATA;
- processed = (SizeT)(p->buf - src);
- (*srcLen) += processed;
- src += processed;
- inSize -= processed;
+
+ {
+ int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
+
+ SizeT processed = (SizeT)(p->buf - src);
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > inSize)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
+ }
+ continue;
}
- else
+
{
- unsigned rem = p->tempBufSize, lookAhead = 0;
- while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
- p->tempBuf[rem++] = src[lookAhead++];
- p->tempBufSize = rem;
+ // we have some data in (p->tempBuf)
+ // in strict mode: tempBufSize is not enough for one Symbol decoding.
+ // in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
+
+ unsigned rem = p->tempBufSize;
+ unsigned ahead = 0;
+ int dummyProcessed = -1;
+
+ while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
+ p->tempBuf[rem++] = src[ahead++];
+
+ // ahead - the size of new data copied from (src) to (p->tempBuf)
+ // rem - the size of temp buffer including new data from (src)
+
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
- int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
- if (dummyRes == DUMMY_ERROR)
+ const Byte *bufOut = p->tempBuf + rem;
+
+ ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
+
+ if (dummyRes == DUMMY_INPUT_EOF)
{
- (*srcLen) += (SizeT)lookAhead;
+ if (rem >= LZMA_REQUIRED_INPUT_MAX)
+ break;
+ p->tempBufSize = rem;
+ (*srcLen) += (SizeT)ahead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
- if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+
+ dummyProcessed = (int)(bufOut - p->tempBuf);
+
+ if ((unsigned)dummyProcessed < p->tempBufSize)
+ break;
+
+ if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
- *status = LZMA_STATUS_NOT_FINISHED;
- return SZ_ERROR_DATA;
+ (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+ p->tempBufSize = (unsigned)dummyProcessed;
+ // p->remainLen = kMatchSpecLen_Error_Data;
+ RETURN__NOT_FINISHED__FOR_FINISH;
}
}
+
p->buf = p->tempBuf;
- if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
- return SZ_ERROR_DATA;
{
- unsigned kkk = (unsigned)(p->buf - p->tempBuf);
- if (rem < kkk)
- return SZ_ERROR_FAIL; /* some internal error */
- rem -= kkk;
- if (lookAhead < rem)
- return SZ_ERROR_FAIL; /* some internal error */
- lookAhead -= rem;
+ // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
+ int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
+
+ SizeT processed = (SizeT)(p->buf - p->tempBuf);
+ rem = p->tempBufSize;
+
+ if (dummyProcessed < 0)
+ {
+ if (processed > LZMA_REQUIRED_INPUT_MAX)
+ break;
+ if (processed < rem)
+ break;
+ }
+ else if ((unsigned)dummyProcessed != processed)
+ break;
+
+ processed -= rem;
+
+ src += processed;
+ inSize -= processed;
+ (*srcLen) += processed;
+ p->tempBufSize = 0;
+
+ if (res != SZ_OK)
+ {
+ p->remainLen = kMatchSpecLen_Error_Data;
+ return SZ_ERROR_DATA;
+ }
}
- (*srcLen) += (SizeT)lookAhead;
- src += lookAhead;
- inSize -= (SizeT)lookAhead;
- p->tempBufSize = 0;
}
+ }
}
-
- if (p->code != 0)
- return SZ_ERROR_DATA;
- *status = LZMA_STATUS_FINISHED_WITH_MARK;
- return SZ_OK;
+
+ /* Some unexpected error: internal error of code, memory corruption or hardware failure */
+ p->remainLen = kMatchSpecLen_Error_Fail;
+ return SZ_ERROR_FAIL;
}
+
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen;
diff --git a/multiarc/src/formats/7z/C/LzmaDec.h b/multiarc/src/formats/7z/C/LzmaDec.h
index 1f0927ab..6f129625 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaDec.h
+++ b/multiarc/src/formats/7z/C/LzmaDec.h
@@ -1,5 +1,5 @@
/* LzmaDec.h -- LZMA Decoder
-2018-04-21 : Igor Pavlov : Public domain */
+2020-03-19 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
@@ -181,6 +181,7 @@ Returns:
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@@ -223,6 +224,7 @@ Returns:
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+ SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
diff --git a/multiarc/src/formats/7z/C/LzmaEnc.c b/multiarc/src/formats/7z/C/LzmaEnc.c
index 46a0db00..c8b31a19 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaEnc.c
+++ b/multiarc/src/formats/7z/C/LzmaEnc.c
@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
-2019-01-10: Igor Pavlov : Public domain */
+2022-07-15: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,6 +12,7 @@
#include <stdio.h>
#endif
+#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
@@ -19,12 +20,25 @@
#include "LzFindMt.h"
#endif
+/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
+ ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+ UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+ Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
+void LzmaEnc_Finish(CLzmaEncHandle pp);
+void LzmaEnc_SaveState(CLzmaEncHandle pp);
+void LzmaEnc_RestoreState(CLzmaEncHandle pp);
+
#ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0;
#endif
-#define kLzmaMaxHistorySize ((UInt32)3 << 29)
-/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+/* for good normalization speed we still reserve 256 MB before 4 GB range */
+#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@@ -36,7 +50,7 @@ static unsigned g_STAT_OFFSET = 0;
#define kNumMoveReducingBits 4
#define kNumBitPriceShiftBits 4
-#define kBitPrice (1 << kNumBitPriceShiftBits)
+// #define kBitPrice (1 << kNumBitPriceShiftBits)
#define REP_LEN_COUNT 64
@@ -47,6 +61,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->writeEndMark = 0;
+ p->affinity = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -55,16 +70,21 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (level < 0) level = 5;
p->level = level;
- if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+ if (p->dictSize == 0)
+ p->dictSize =
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+ )));
+
if (p->dictSize > p->reduceSize)
{
- unsigned i;
- UInt32 reduceSize = (UInt32)p->reduceSize;
- for (i = 11; i <= 30; i++)
- {
- if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
- if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
- }
+ UInt32 v = (UInt32)p->reduceSize;
+ const UInt32 kReduceMin = ((UInt32)1 << 12);
+ if (v < kReduceMin)
+ v = kReduceMin;
+ if (p->dictSize > v)
+ p->dictSize = v;
}
if (p->lc < 0) p->lc = 3;
@@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
- if (p->numHashBytes < 0) p->numHashBytes = 4;
- if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+ if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+ if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numThreads < 0)
p->numThreads =
@@ -93,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
-#if (_MSC_VER >= 1400)
-/* BSR code is fast for some new CPUs */
-/* #define LZMA_LOG_BSR */
+
+/*
+x86/x64:
+
+BSR:
+ IF (SRC == 0) ZF = 1, DEST is undefined;
+ AMD : DEST is unchanged;
+ IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
+ BSR is slow in some processors
+
+LZCNT:
+ IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
+ IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
+ IF (DEST == 0) ZF = 1;
+
+LZCNT works only in new processors starting from Haswell.
+if LZCNT is not supported by processor, then it's executed as BSR.
+LZCNT can be faster than BSR, if supported.
+*/
+
+// #define LZMA_LOG_BSR
+
+#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
+
+ #if (defined(__clang__) && (__clang_major__ >= 6)) \
+ || (defined(__GNUC__) && (__GNUC__ >= 6))
+ #define LZMA_LOG_BSR
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+ // #if defined(MY_CPU_ARM_OR_ARM64)
+ #define LZMA_LOG_BSR
+ // #endif
+ #endif
#endif
+// #include <intrin.h>
+
#ifdef LZMA_LOG_BSR
-#define kDicLogSizeMaxCompress 32
+#if defined(__clang__) \
+ || defined(__GNUC__)
+
+/*
+ C code: : (30 - __builtin_clz(x))
+ gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
+ clang10 for x64 : 31 + (bsr(x) xor -32)
+*/
+
+ #define MY_clz(x) ((unsigned)__builtin_clz(x))
+ // __lzcnt32
+ // __builtin_ia32_lzcnt_u32
+
+#else // #if defined(_MSC_VER)
+
+ #ifdef MY_CPU_ARM_OR_ARM64
+
+ #define MY_clz _CountLeadingZeros
+
+ #else // if defined(MY_CPU_X86_OR_AMD64)
+
+ // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
+ // _BitScanReverse code is not optimal for some MSVC compilers
+ #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
+ res = (zz + zz) + (pos >> zz); }
+
+ #endif // MY_CPU_X86_OR_AMD64
+
+#endif // _MSC_VER
+
+
+#ifndef BSR2_RET
-#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+ #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
+ res = (zz + zz) + (pos >> zz); }
-static unsigned GetPosSlot1(UInt32 pos)
+#endif
+
+
+unsigned GetPosSlot1(UInt32 pos);
+unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
@@ -113,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
-#else
-#define kNumLogBits (9 + sizeof(size_t) / 2)
-/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+#else // ! LZMA_LOG_BSR
+
+#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
@@ -163,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
-#endif
+#endif // LZMA_LOG_BSR
#define LZMA_NUM_REPS 4
@@ -193,7 +280,7 @@ typedef struct
#define kNumLenToPosStates 4
#define kNumPosSlotBits 6
-#define kDicLogSizeMin 0
+// #define kDicLogSizeMin 0
#define kDicLogSizeMax 32
#define kDistTableSizeMax (kDicLogSizeMax * 2)
@@ -299,7 +386,7 @@ typedef UInt32 CProbPrice;
typedef struct
{
void *matchFinderObj;
- IMatchFinder matchFinder;
+ IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
@@ -344,10 +431,14 @@ typedef struct
// begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
+ // #else
+ // CMatchFinder matchFinderBase;
#endif
-
CMatchFinder matchFinderBase;
+
+ // we suppose that we have 8-bytes alignment after CMatchFinder
+
#ifndef _7ZIP_ST
Byte pad[128];
#endif
@@ -355,8 +446,10 @@ typedef struct
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
- UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+ // we want {len , dist} pairs to be 8-bytes aligned in matches array
+ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
+ // we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
@@ -385,12 +478,19 @@ typedef struct
CSaveState saveState;
+ // BoolInt mf_Failure;
#ifndef _7ZIP_ST
Byte pad2[128];
#endif
} CLzmaEnc;
+#define MFB (p->matchFinderBase)
+/*
+#ifndef _7ZIP_ST
+#define MFB (p->matchFinderMt.MatchFinder)
+#endif
+*/
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
@@ -455,41 +555,51 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
- || props.pb > LZMA_PB_MAX
- || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
- || props.dictSize > kLzmaMaxHistorySize)
+ || props.pb > LZMA_PB_MAX)
return SZ_ERROR_PARAM;
+
+ if (props.dictSize > kLzmaMaxHistorySize)
+ props.dictSize = kLzmaMaxHistorySize;
+
+ #ifndef LZMA_LOG_BSR
+ {
+ const UInt64 dict64 = props.dictSize;
+ if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
+ return SZ_ERROR_PARAM;
+ }
+ #endif
+
p->dictSize = props.dictSize;
{
- unsigned fb = props.fb;
+ unsigned fb = (unsigned)props.fb;
if (fb < 5)
fb = 5;
if (fb > LZMA_MATCH_LEN_MAX)
fb = LZMA_MATCH_LEN_MAX;
p->numFastBytes = fb;
}
- p->lc = props.lc;
- p->lp = props.lp;
- p->pb = props.pb;
+ p->lc = (unsigned)props.lc;
+ p->lp = (unsigned)props.lp;
+ p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
- p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+ MFB.btMode = (Byte)(props.btMode ? 1 : 0);
{
unsigned numHashBytes = 4;
if (props.btMode)
{
- if (props.numHashBytes < 2)
- numHashBytes = 2;
- else if (props.numHashBytes < 4)
- numHashBytes = props.numHashBytes;
+ if (props.numHashBytes < 2) numHashBytes = 2;
+ else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
}
- p->matchFinderBase.numHashBytes = numHashBytes;
+ if (props.numHashBytes >= 5) numHashBytes = 5;
+
+ MFB.numHashBytes = numHashBytes;
}
- p->matchFinderBase.cutValue = props.mc;
+ MFB.cutValue = props.mc;
- p->writeEndMark = props.writeEndMark;
+ p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST
/*
@@ -500,6 +610,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
*/
p->multiThread = (props.numThreads > 1);
+ p->matchFinderMt.btSync.affinity =
+ p->matchFinderMt.hashSync.affinity = props.affinity;
#endif
return SZ_OK;
@@ -509,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.expectedDataSize = expectedDataSiize;
+ MFB.expectedDataSize = expectedDataSiize;
}
@@ -536,8 +648,8 @@ static void RangeEnc_Construct(CRangeEnc *p)
p->bufBase = NULL;
}
-#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
-#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RC_BUF_SIZE (1 << 16)
@@ -556,12 +668,11 @@ static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->bufBase);
- p->bufBase = 0;
+ p->bufBase = NULL;
}
static void RangeEnc_Init(CRangeEnc *p)
{
- /* Stream.Init(); */
p->range = 0xFFFFFFFF;
p->cache = 0;
p->low = 0;
@@ -575,12 +686,12 @@ static void RangeEnc_Init(CRangeEnc *p)
MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
{
- size_t num;
- if (p->res != SZ_OK)
- return;
- num = p->buf - p->bufBase;
- if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
- p->res = SZ_ERROR_WRITE;
+ const size_t num = (size_t)(p->buf - p->bufBase);
+ if (p->res == SZ_OK)
+ {
+ if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+ p->res = SZ_ERROR_WRITE;
+ }
p->processed += num;
p->buf = p->bufBase;
}
@@ -656,7 +767,7 @@ static void RangeEnc_FlushData(CRangeEnc *p)
range += newBound & mask; \
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
mask += ((1 << kNumMoveBits) - 1); \
- ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
+ ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
@@ -749,7 +860,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
bitCount++;
}
}
- ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+ ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
// printf("\n%3d: %5d", i, ProbPrices[i]);
}
}
@@ -985,7 +1096,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
p->additionalOffset++;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
- numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ {
+ const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
+ numPairs = (unsigned)(d - p->matches);
+ }
*numPairsRes = numPairs;
#ifdef SHOW_STAT
@@ -1001,7 +1116,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
if (numPairs == 0)
return 0;
{
- unsigned len = p->matches[(size_t)numPairs - 2];
+ const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
{
@@ -1011,7 +1126,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
- ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
+ const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@@ -1167,6 +1282,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
+ if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
+ break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
@@ -1179,10 +1296,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
matches = p->matches;
+ #define MATCHES matches
+ // #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
- p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
@@ -1276,13 +1395,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (len < 2)
len = 2;
else
- while (len > matches[offs])
+ while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
- UInt32 dist = matches[(size_t)offs + 1];
+ UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
@@ -1306,7 +1425,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
opt->extra = 0;
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
@@ -1727,8 +1846,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (newLen > numAvail)
{
newLen = numAvail;
- for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
- matches[numPairs] = (UInt32)newLen;
+ for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
+ MATCHES[numPairs] = (UInt32)newLen;
numPairs += 2;
}
@@ -1747,9 +1866,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
offs = 0;
- while (startLen > matches[offs])
+ while (startLen > MATCHES[offs])
offs += 2;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
@@ -1776,7 +1895,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
@@ -1841,7 +1960,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
offs += 2;
if (offs == numPairs)
break;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
}
@@ -2059,8 +2178,23 @@ static SRes CheckErrors(CLzmaEnc *p)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
- if (p->matchFinderBase.result != SZ_OK)
+
+ #ifndef _7ZIP_ST
+ if (
+ // p->mf_Failure ||
+ (p->mtMode &&
+ ( // p->matchFinderMt.failure_LZ_LZ ||
+ p->matchFinderMt.failure_LZ_BT))
+ )
+ {
+ p->result = MY_HRES_ERROR__INTERNAL_ERROR;
+ // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+ }
+ #endif
+
+ if (MFB.result != SZ_OK)
p->result = SZ_ERROR_READ;
+
if (p->result != SZ_OK)
p->finished = True;
return p->result;
@@ -2198,14 +2332,14 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
-void LzmaEnc_Construct(CLzmaEnc *p)
+static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
- MatchFinder_Construct(&p->matchFinderBase);
+ MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST
+ p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt);
- p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
@@ -2221,7 +2355,6 @@ void LzmaEnc_Construct(CLzmaEnc *p)
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
-
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
@@ -2233,7 +2366,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
return p;
}
-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->litProbs);
ISzAlloc_Free(alloc, p->saveState.litProbs);
@@ -2241,13 +2374,13 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
p->saveState.litProbs = NULL;
}
-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
#ifndef _7ZIP_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
- MatchFinder_Free(&p->matchFinderBase, allocBig);
+ MatchFinder_Free(&MFB, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
@@ -2259,11 +2392,18 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
}
+MY_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{
UInt32 nowPos32, startPos32;
if (p->needInit)
{
+ #ifndef _7ZIP_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
+ }
+ #endif
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
}
@@ -2521,12 +2661,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
// { int y; for (y = 0; y < 100; y++) {
FillDistancesPrices(p);
// }}
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
}
if (p->repLenEncCounter <= 0)
{
p->repLenEncCounter = REP_LEN_COUNT;
- LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
}
@@ -2559,11 +2699,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
UInt32 beforeSize = kNumOpts;
+ UInt32 dictSize;
+
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef _7ZIP_ST
- p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+ p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif
{
@@ -2582,36 +2724,56 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
- p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+ MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
- if (beforeSize + p->dictSize < keepWindowSize)
- beforeSize = keepWindowSize - p->dictSize;
+
+ dictSize = p->dictSize;
+ if (dictSize == ((UInt32)2 << 30) ||
+ dictSize == ((UInt32)3 << 30))
+ {
+ /* 21.03 : here we reduce the dictionary for 2 reasons:
+ 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
+ 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
+ where data size is aligned for 1 GB: 5/6/8 GB.
+ That reducing must be >= 1 for such corner cases. */
+ dictSize -= 1;
+ }
+
+ if (beforeSize + dictSize < keepWindowSize)
+ beforeSize = keepWindowSize - dictSize;
+
+ /* in worst case we can look ahead for
+ max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
+ we send larger value for (keepAfter) to MantchFinder_Create():
+ (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+ */
#ifndef _7ZIP_ST
if (p->mtMode)
{
- RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
- LZMA_MATCH_LEN_MAX
- + 1 /* 18.04 */
+ RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig));
p->matchFinderObj = &p->matchFinderMt;
- p->matchFinderBase.bigHash = (Byte)(
- (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+ MFB.bigHash = (Byte)(
+ (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
- if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+ if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
+ , allocBig))
return SZ_ERROR_MEM;
- p->matchFinderObj = &p->matchFinderBase;
- MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+ p->matchFinderObj = &MFB;
+ MatchFinder_CreateVTable(&MFB, &p->matchFinder);
}
return SZ_OK;
}
-void LzmaEnc_Init(CLzmaEnc *p)
+static void LzmaEnc_Init(CLzmaEnc *p)
{
unsigned i;
p->state = 0;
@@ -2675,12 +2837,14 @@ void LzmaEnc_Init(CLzmaEnc *p)
p->additionalOffset = 0;
- p->pbMask = (1 << p->pb) - 1;
+ p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+
+ // p->mf_Failure = False;
}
-void LzmaEnc_InitPrices(CLzmaEnc *p)
+static void LzmaEnc_InitPrices(CLzmaEnc *p)
{
if (!p->fastMode)
{
@@ -2694,8 +2858,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
p->repLenEncCounter = REP_LEN_COUNT;
- LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
- LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
+ LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
@@ -2719,7 +2883,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
@@ -2730,16 +2894,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
- p->matchFinderBase.directInput = 1;
- p->matchFinderBase.bufferBase = (Byte *)src;
- p->matchFinderBase.directInputRem = srcLen;
+ MFB.directInput = 1;
+ MFB.bufferBase = (Byte *)src;
+ MFB.directInputRem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
@@ -2781,19 +2945,23 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
size = p->rem;
p->overflow = True;
}
- memcpy(p->data, data, size);
- p->rem -= size;
- p->data += size;
+ if (size != 0)
+ {
+ memcpy(p->data, data, size);
+ p->rem -= size;
+ p->data += size;
+ }
return size;
}
+/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
}
-
+*/
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
{
@@ -2802,6 +2970,7 @@ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
}
+// (desiredPackSize == 0) is not allowed
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
{
@@ -2822,14 +2991,10 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
if (reInit)
LzmaEnc_Init(p);
LzmaEnc_InitPrices(p);
-
- nowPos64 = p->nowPos64;
RangeEnc_Init(&p->rc);
p->rc.outStream = &outStream.vt;
-
- if (desiredPackSize == 0)
- return SZ_ERROR_OUTPUT_EOF;
-
+ nowPos64 = p->nowPos64;
+
res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize);
*unpackSize = (UInt32)(p->nowPos64 - nowPos64);
@@ -2841,6 +3006,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
}
+MY_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
{
SRes res = SZ_OK;
@@ -2870,7 +3036,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
LzmaEnc_Finish(p);
/*
- if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
+ if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
@@ -2889,35 +3055,43 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- unsigned i;
- UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
- props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
-
- if (dictSize >= ((UInt32)1 << 22))
- {
- UInt32 kDictMask = ((UInt32)1 << 20) - 1;
- if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
- dictSize = (dictSize + kDictMask) & ~kDictMask;
- }
- else for (i = 11; i <= 30; i++)
{
- if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
- if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
- }
+ const CLzmaEnc *p = (const CLzmaEnc *)pp;
+ const UInt32 dictSize = p->dictSize;
+ UInt32 v;
+ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+ // we write aligned dictionary value to properties for lzma decoder
+ if (dictSize >= ((UInt32)1 << 21))
+ {
+ const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+ v = (dictSize + kDictMask) & ~kDictMask;
+ if (v < dictSize)
+ v = dictSize;
+ }
+ else
+ {
+ unsigned i = 11 * 2;
+ do
+ {
+ v = (UInt32)(2 + (i & 1)) << (i >> 1);
+ i++;
+ }
+ while (v < dictSize);
+ }
- for (i = 0; i < 4; i++)
- props[1 + i] = (Byte)(dictSize >> (8 * i));
- return SZ_OK;
+ SetUi32(props + 1, v);
+ return SZ_OK;
+ }
}
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
{
- return ((CLzmaEnc *)pp)->writeEndMark;
+ return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
}
@@ -2974,3 +3148,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
LzmaEnc_Destroy(p, alloc, allocBig);
return res;
}
+
+
+/*
+#ifndef _7ZIP_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
+{
+ const CLzmaEnc *p = (CLzmaEnc *)pp;
+ lz_threads[0] = p->matchFinderMt.hashSync.thread;
+ lz_threads[1] = p->matchFinderMt.btSync.thread;
+}
+#endif
+*/
diff --git a/multiarc/src/formats/7z/C/LzmaEnc.h b/multiarc/src/formats/7z/C/LzmaEnc.h
index 9194ee57..bc2ed504 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaEnc.h
+++ b/multiarc/src/formats/7z/C/LzmaEnc.h
@@ -1,5 +1,5 @@
/* LzmaEnc.h -- LZMA Encoder
-2017-07-27 : Igor Pavlov : Public domain */
+2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
@@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
+
+ UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);
diff --git a/multiarc/src/formats/7z/C/LzmaLib.c b/multiarc/src/formats/7z/C/LzmaLib.c
index 706e9e58..706e9e58 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaLib.c
+++ b/multiarc/src/formats/7z/C/LzmaLib.c
diff --git a/multiarc/src/formats/7z/C/LzmaLib.h b/multiarc/src/formats/7z/C/LzmaLib.h
index 88fa87d3..c343a859 100644..100755
--- a/multiarc/src/formats/7z/C/LzmaLib.h
+++ b/multiarc/src/formats/7z/C/LzmaLib.h
@@ -1,5 +1,5 @@
/* LzmaLib.h -- LZMA library interface
-2013-01-18 : Igor Pavlov : Public domain */
+2021-04-03 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H
#define __LZMA_LIB_H
@@ -40,14 +40,16 @@ outPropsSize -
level - compression level: 0 <= level <= 9;
level dictSize algo fb
- 0: 16 KB 0 32
- 1: 64 KB 0 32
- 2: 256 KB 0 32
- 3: 1 MB 0 32
- 4: 4 MB 0 32
+ 0: 64 KB 0 32
+ 1: 256 KB 0 32
+ 2: 1 MB 0 32
+ 3: 4 MB 0 32
+ 4: 16 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
- 7+: 64 MB 1 64
+ 7: 32 MB 1 64
+ 8: 64 MB 1 64
+ 9: 64 MB 1 64
The default value for "level" is 5.
@@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes).
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
+In:
+ dest - output data buffer
+ destLen - output data buffer size
+ src - input data
+ srcLen - input data size
Out:
destLen - processed output size
Returns:
@@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
LzmaUncompress
--------------
In:
- dest - output data
- destLen - output data size
+ dest - output data buffer
+ destLen - output data buffer size
src - input data
srcLen - input data size
Out:
diff --git a/multiarc/src/formats/7z/C/MtCoder.c b/multiarc/src/formats/7z/C/MtCoder.c
index 95359857..99dc9090 100644..100755
--- a/multiarc/src/formats/7z/C/MtCoder.c
+++ b/multiarc/src/formats/7z/C/MtCoder.c
@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
-2018-07-04 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,7 +7,7 @@
#ifndef _7ZIP_ST
-SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
+static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
{
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
UInt64 inSize2 = 0;
@@ -44,7 +44,7 @@ static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
}
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp);
+static THREAD_FUNC_DECL ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
@@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
{
t->stop = 1;
Event_Set(&t->startEvent);
- Thread_Wait(&t->thread);
- Thread_Close(&t->thread);
+ Thread_Wait_Close(&t->thread);
}
Event_Close(&t->startEvent);
@@ -336,13 +335,13 @@ static SRes ThreadFunc2(CMtCoderThread *t)
}
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
+static THREAD_FUNC_DECL ThreadFunc(void *pp)
{
CMtCoderThread *t = (CMtCoderThread *)pp;
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
- return SZ_ERROR_THREAD;
+ return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
if (t->stop)
return 0;
{
@@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
- return SZ_ERROR_THREAD;
+ return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
}
#endif
}
@@ -496,12 +495,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
-
- if (Semaphore_IsCreated(&p->blocksSemaphore))
- {
- RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore));
- }
- RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
+ RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
diff --git a/multiarc/src/formats/7z/C/MtCoder.h b/multiarc/src/formats/7z/C/MtCoder.h
index 5a5f4d11..5a5f4d11 100644..100755
--- a/multiarc/src/formats/7z/C/MtCoder.h
+++ b/multiarc/src/formats/7z/C/MtCoder.h
diff --git a/multiarc/src/formats/7z/C/MtDec.c b/multiarc/src/formats/7z/C/MtDec.c
index 7803bf2a..45a67139 100644..100755
--- a/multiarc/src/formats/7z/C/MtDec.c
+++ b/multiarc/src/formats/7z/C/MtDec.c
@@ -1,16 +1,21 @@
/* MtDec.c -- Multi-thread Decoder
-2019-02-02 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
// #include <stdio.h>
+#include <string.h>
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
+#include "MtDec.h"
+
+#ifndef _7ZIP_ST
+
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
@@ -19,10 +24,6 @@
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
-#include "MtDec.h"
-
-#ifndef _7ZIP_ST
-
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)
{
p->progress = progress;
@@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
}
-#define RINOK_THREAD(x) RINOK(x)
+#define RINOK_THREAD(x) RINOK_WRes(x)
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
@@ -101,7 +102,7 @@ typedef struct __CMtDecBufLink CMtDecBufLink;
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp);
+static THREAD_FUNC_DECL ThreadFunc(void *pp);
static WRes MtDecThread_CreateEvents(CMtDecThread *t)
@@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
{
Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */
Event_Set(&t->canRead);
- Thread_Wait(&t->thread);
- Thread_Close(&t->thread);
+ Thread_Wait_Close(&t->thread);
}
Event_Close(&t->canRead);
@@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
Byte *afterEndData = NULL;
size_t afterEndData_Size = 0;
+ BoolInt afterEndData_IsCross = False;
BoolInt canCreateNewThread = False;
// CMtDecCallbackInfo parse;
CMtDecThread *nextThread;
- PRF_STR_INT("Event_Wait(&t->canRead)", t->index);
+ PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);
RINOK_THREAD(Event_Wait(&t->canRead));
if (p->exitThread)
@@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
parse.srcFinished = finish;
parse.canCreateNewThread = True;
- // PRF(printf("\nParse size = %d\n", (unsigned)size))
+ PRF(printf("\nParse size = %d\n", (unsigned)size));
p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse);
+ PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state));
+
needWrite = True;
canCreateNewThread = parse.canCreateNewThread;
@@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
if (parse.state == MTDEC_PARSE_END)
{
- p->crossStart = 0;
- p->crossEnd = 0;
-
- if (crossSize != 0)
- memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data
- afterEndData_Size = size - parse.srcSize;
afterEndData = parseData + parse.srcSize;
-
+ afterEndData_Size = size - parse.srcSize;
+ if (crossSize != 0)
+ afterEndData_IsCross = True;
// we reduce data size to required bytes (parsed only)
- inDataSize -= (size - parse.srcSize);
+ inDataSize -= afterEndData_Size;
if (!prev)
inDataSize_Start = parse.srcSize;
break;
@@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t)
{
// p->inProcessed += inCodePos;
+ PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size));
+
res = p->mtCallback->Write(p->mtCallbackObject, t->index,
res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite
- afterEndData, afterEndData_Size,
+ afterEndData, afterEndData_Size, afterEndData_IsCross,
&needContinue,
&canRecode);
-
- // res= E_INVALIDARG; // for test
+
+ // res = SZ_ERROR_FAIL; // for test
PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue));
PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed));
@@ -835,7 +836,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
#endif
-static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
+static THREAD_FUNC_DECL ThreadFunc1(void *pp)
{
WRes res;
@@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
res = ThreadFunc2(t);
p = t->mtDec;
if (res == 0)
- return p->exitThreadWRes;
+ return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
{
// it's unexpected situation for some threading function error
if (p->exitThreadWRes == 0)
@@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
Event_Set(&p->threads[0].canWrite);
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
}
- return res;
+ return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
}
-static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
+static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp)
{
+ #ifdef USE_ALLOCA
CMtDecThread *t = (CMtDecThread *)pp;
-
// fprintf(stderr, "\n%d = %p - before", t->index, &t);
- #ifdef USE_ALLOCA
t->allocaPtr = alloca(t->index * 128);
#endif
return ThreadFunc1(pp);
@@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p)
{
WRes wres;
- WRes sres;
+ SRes sres;
CMtDecThread *nextThread = &p->threads[p->numStartedThreads++];
// wres = MtDecThread_CreateAndStart(nextThread);
wres = MtDecThread_CreateEvents(nextThread);
if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead);
- if (wres == 0) { wres = ThreadFunc(nextThread);
+ if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);
+ wres = (WRes)(UINT_PTR)res;
if (wres != 0)
{
p->needContinue = False;
@@ -1130,8 +1131,8 @@ SRes MtDec_Code(CMtDec *p)
return SZ_OK;
// if (sres != SZ_OK)
- return sres;
- // return E_FAIL;
+ return sres;
+ // return SZ_ERROR_FAIL;
}
}
diff --git a/multiarc/src/formats/7z/C/MtDec.h b/multiarc/src/formats/7z/C/MtDec.h
index 9b577667..c2da46ae 100644..100755
--- a/multiarc/src/formats/7z/C/MtDec.h
+++ b/multiarc/src/formats/7z/C/MtDec.h
@@ -1,5 +1,5 @@
/* MtDec.h -- Multi-thread Decoder
-2018-07-04 : Igor Pavlov : Public domain */
+2020-03-05 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
@@ -108,11 +108,12 @@ typedef struct
*/
SRes (*Write)(void *p, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode);
-} IMtDecCallback;
+
+} IMtDecCallback2;
@@ -132,7 +133,7 @@ typedef struct _CMtDec
ICompressProgress *progress;
ISzAllocPtr alloc;
- IMtDecCallback *mtCallback;
+ IMtDecCallback2 *mtCallback;
void *mtCallbackObject;
diff --git a/multiarc/src/formats/7z/C/Ppmd.h b/multiarc/src/formats/7z/C/Ppmd.h
index a5c1e3ef..b1987920 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd.h
+++ b/multiarc/src/formats/7z/C/Ppmd.h
@@ -1,5 +1,5 @@
/* Ppmd.h -- PPMD codec common code
-2017-04-03 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
@@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
EXTERN_C_BEGIN
-#ifdef MY_CPU_32BIT
+#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
+/*
+ PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
+ if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
+ if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
+ if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
+ if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
+ and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
+ PPMD code works slightly faster in (PPMD_32BIT) mode.
+*/
#define PPMD_32BIT
#endif
@@ -28,7 +37,7 @@ EXTERN_C_BEGIN
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
-#pragma pack(push, 1)
+MY_CPU_pragma_pack_push_1
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
@@ -40,41 +49,114 @@ typedef struct
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
- { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
+ { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
+
typedef struct
{
Byte Symbol;
Byte Freq;
- UInt16 SuccessorLow;
- UInt16 SuccessorHigh;
+ UInt16 Successor_0;
+ UInt16 Successor_1;
} CPpmd_State;
-#pragma pack(pop)
-
-typedef
- #ifdef PPMD_32BIT
- CPpmd_State *
- #else
- UInt32
- #endif
- CPpmd_State_Ref;
-
-typedef
- #ifdef PPMD_32BIT
- void *
- #else
- UInt32
- #endif
- CPpmd_Void_Ref;
-
-typedef
- #ifdef PPMD_32BIT
- Byte *
- #else
- UInt32
- #endif
- CPpmd_Byte_Ref;
+typedef struct CPpmd_State2_
+{
+ Byte Symbol;
+ Byte Freq;
+} CPpmd_State2;
+
+typedef struct CPpmd_State4_
+{
+ UInt16 Successor_0;
+ UInt16 Successor_1;
+} CPpmd_State4;
+
+MY_CPU_pragma_pop
+
+/*
+ PPMD code can write full CPpmd_State structure data to CPpmd*_Context
+ at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
+
+ If we use pointers to different types, but that point to shared
+ memory space, we can have aliasing problem (strict aliasing).
+
+ XLC compiler in -O2 mode can change the order of memory write instructions
+ in relation to read instructions, if we have use pointers to different types.
+
+ To solve that aliasing problem we use combined CPpmd*_Context structure
+ with unions that contain the fields from both structures:
+ the original CPpmd*_Context and CPpmd_State.
+ So we can access the fields from both structures via one pointer,
+ and the compiler doesn't change the order of write instructions
+ in relation to read instructions.
+
+ If we don't use memory write instructions to shared memory in
+ some local code, and we use only reading instructions (read only),
+ then probably it's safe to use pointers to different types for reading.
+*/
+
+
+
+#ifdef PPMD_32BIT
+
+ #define Ppmd_Ref_Type(type) type *
+ #define Ppmd_GetRef(p, ptr) (ptr)
+ #define Ppmd_GetPtr(p, ptr) (ptr)
+ #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
+
+#else
+
+ #define Ppmd_Ref_Type(type) UInt32
+ #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
+ #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
+ #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
+
+#endif // PPMD_32BIT
+
+
+typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
+typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
+typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
+
+
+/*
+#ifdef MY_CPU_LE_UNALIGN
+// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
+#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
+#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
+
+#else
+*/
+
+/*
+ We can write 16-bit halves to 32-bit (Successor) field in any selected order.
+ But the native order is more consistent way.
+ So we use the native order, if LE/BE order can be detected here at compile time.
+*/
+
+#ifdef MY_CPU_BE
+
+ #define Ppmd_GET_SUCCESSOR(p) \
+ ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
+
+ #define Ppmd_SET_SUCCESSOR(p, v) { \
+ (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
+ (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
+
+#else
+
+ #define Ppmd_GET_SUCCESSOR(p) \
+ ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
+
+ #define Ppmd_SET_SUCCESSOR(p, v) { \
+ (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
+ (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
+
+#endif
+
+// #endif
+
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
diff --git a/multiarc/src/formats/7z/C/Ppmd7.c b/multiarc/src/formats/7z/C/Ppmd7.c
index 470aadcc..cf401cb3 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7.c
+++ b/multiarc/src/formats/7z/C/Ppmd7.c
@@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec
-2018-07-04 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Ppmd7.h"
-const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
+// #define PPMD7_ORDER_0_SUPPPORT
+
+MY_ALIGN(16)
+static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
@@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
-#define I2U(indx) (p->Indx2Units[indx])
+#define I2U(indx) ((unsigned)p->Indx2Units[indx])
+#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
-#ifdef PPMD_32BIT
- #define REF(ptr) (ptr)
-#else
- #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
-#endif
+#define REF(ptr) Ppmd_GetRef(p, ptr)
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
@@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR;
struct CPpmd7_Node_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd7_Node_ *
- #else
- UInt32
- #endif
- CPpmd7_Node_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
typedef struct CPpmd7_Node_
{
@@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_
CPpmd7_Node_Ref Prev;
} CPpmd7_Node;
-#ifdef PPMD_32BIT
- #define NODE(ptr) (ptr)
-#else
- #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs)))
-#endif
+#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
void Ppmd7_Construct(CPpmd7 *p)
{
unsigned i, k, m;
- p->Base = 0;
+ p->Base = NULL;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
@@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p)
for (i = 0; i < 3; i++)
p->NS2Indx[i] = (Byte)i;
+
for (m = i, k = 1; i < 256; i++)
{
p->NS2Indx[i] = (Byte)m;
@@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p)
k = (++m) - 2;
}
- memset(p->HB2Flag, 0, 0x40);
- memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40);
+ memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
}
+
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
- p->Base = 0;
+ p->Base = NULL;
}
+
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
- size_t size2;
Ppmd7_Free(p, alloc);
- size2 = 0
- #ifndef PPMD_32BIT
- + UNIT_SIZE
- #endif
- ;
- p->AlignOffset =
- #ifdef PPMD_32BIT
- (4 - size) & 3;
- #else
- 4 - (size & 3);
- #endif
- if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0)
+ p->AlignOffset = (4 - size) & 3;
+ if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
return False;
p->Size = size;
}
return True;
}
+
+
+// ---------- Internal Memory Allocator ----------
+
+/* We can use CPpmd7_Node in list of free units (as in Ppmd8)
+ But we still need one additional list walk pass in GlueFreeBlocks().
+ So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode()
+*/
+
+#define EMPTY_NODE 0
+
+
static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
{
*((CPpmd_Void_Ref *)node) = p->FreeList[indx];
+ // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
+
p->FreeList[indx] = REF(node);
+
}
+
static void *RemoveNode(CPpmd7 *p, unsigned indx)
{
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
p->FreeList[indx] = *node;
+ // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
+ // p->FreeList[indx] = node->Next;
return node;
}
+
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
@@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
InsertNode(p, ptr, i);
}
-static void GlueFreeBlocks(CPpmd7 *p)
+
+/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
+
+typedef union _CPpmd7_Node_Union
{
- #ifdef PPMD_32BIT
- CPpmd7_Node headItem;
- CPpmd7_Node_Ref head = &headItem;
- #else
- CPpmd7_Node_Ref head = p->AlignOffset + p->Size;
- #endif
-
- CPpmd7_Node_Ref n = head;
- unsigned i;
+ CPpmd7_Node Node;
+ CPpmd7_Node_Ref NextRef;
+} CPpmd7_Node_Union;
+
+/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks()
+ we use single linked list similar to Ppmd8 code */
+
+static void GlueFreeBlocks(CPpmd7 *p)
+{
+ /*
+ we use first UInt16 field of 12-bytes UNITs as record type stamp
+ CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0
+ CPpmd7_Context { UInt16 NumStats; : NumStats != 0
+ CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record
+ : Stamp == 1 for head record and guard
+ Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
+ */
+ CPpmd7_Node_Ref head, n = 0;
+
p->GlueCount = 255;
- /* create doubly-linked list of free blocks */
- for (i = 0; i < PPMD_NUM_INDEXES; i++)
+
+ /* we set guard NODE at LoUnit */
+ if (p->LoUnit != p->HiUnit)
+ ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
+
{
- UInt16 nu = I2U(i);
- CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
- p->FreeList[i] = 0;
- while (next != 0)
+ /* Create list of free blocks.
+ We still need one additional list walk pass before Glue. */
+ unsigned i;
+ for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd7_Node *node = NODE(next);
- node->Next = n;
- n = NODE(n)->Prev = next;
- next = *(const CPpmd7_Node_Ref *)node;
- node->Stamp = 0;
- node->NU = (UInt16)nu;
+ const UInt16 nu = I2U_UInt16(i);
+ CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
+ p->FreeList[i] = 0;
+ while (next != 0)
+ {
+ /* Don't change the order of the following commands: */
+ CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
+ const CPpmd7_Node_Ref tmp = next;
+ next = un->NextRef;
+ un->Node.Stamp = EMPTY_NODE;
+ un->Node.NU = nu;
+ un->Node.Next = n;
+ n = tmp;
+ }
}
}
- NODE(head)->Stamp = 1;
- NODE(head)->Next = n;
- NODE(n)->Prev = head;
- if (p->LoUnit != p->HiUnit)
- ((CPpmd7_Node *)p->LoUnit)->Stamp = 1;
-
- /* Glue free blocks */
- while (n != head)
+
+ head = n;
+ /* Glue and Fill must walk the list in same direction */
{
- CPpmd7_Node *node = NODE(n);
- UInt32 nu = (UInt32)node->NU;
- for (;;)
+ /* Glue free blocks */
+ CPpmd7_Node_Ref *prev = &head;
+ while (n)
{
- CPpmd7_Node *node2 = NODE(n) + nu;
- nu += node2->NU;
- if (node2->Stamp != 0 || nu >= 0x10000)
- break;
- NODE(node2->Prev)->Next = node2->Next;
- NODE(node2->Next)->Prev = node2->Prev;
- node->NU = (UInt16)nu;
+ CPpmd7_Node *node = NODE(n);
+ UInt32 nu = node->NU;
+ n = node->Next;
+ if (nu == 0)
+ {
+ *prev = n;
+ continue;
+ }
+ prev = &node->Next;
+ for (;;)
+ {
+ CPpmd7_Node *node2 = node + nu;
+ nu += node2->NU;
+ if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
+ break;
+ node->NU = (UInt16)nu;
+ node2->NU = 0;
+ }
}
- n = node->Next;
}
-
+
/* Fill lists of free blocks */
- for (n = NODE(head)->Next; n != head;)
+ for (n = head; n != 0;)
{
CPpmd7_Node *node = NODE(n);
- unsigned nu;
- CPpmd7_Node_Ref next = node->Next;
- for (nu = node->NU; nu > 128; nu -= 128, node += 128)
+ UInt32 nu = node->NU;
+ unsigned i;
+ n = node->Next;
+ if (nu == 0)
+ continue;
+ for (; nu > 128; nu -= 128, node += 128)
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
- InsertNode(p, node + k, nu - k - 1);
+ InsertNode(p, node + k, (unsigned)nu - k - 1);
}
InsertNode(p, node, i);
- n = next;
}
}
+
+MY_NO_INLINE
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
{
unsigned i;
- void *retVal;
+
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
+
i = indx;
+
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
+ Byte *us = p->UnitsStart;
p->GlueCount--;
- return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+ return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
}
}
while (p->FreeList[i] == 0);
- retVal = RemoveNode(p, i);
- SplitBlock(p, retVal, i, indx);
- return retVal;
+
+ {
+ void *block = RemoveNode(p, i);
+ SplitBlock(p, block, i, indx);
+ return block;
+ }
}
+
static void *AllocUnits(CPpmd7 *p, unsigned indx)
{
- UInt32 numBytes;
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
- numBytes = U2B(I2U(indx));
- if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
{
- void *retVal = p->LoUnit;
- p->LoUnit += numBytes;
- return retVal;
+ UInt32 numBytes = U2B(I2U(indx));
+ Byte *lo = p->LoUnit;
+ if ((UInt32)(p->HiUnit - lo) >= numBytes)
+ {
+ p->LoUnit = lo + numBytes;
+ return lo;
+ }
}
return AllocUnitsRare(p, indx);
}
+
#define MyMem12Cpy(dest, src, num) \
- { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \
- do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); }
+ { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
+ do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
+
+/*
static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
@@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
SplitBlock(p, oldPtr, i0, i1);
return oldPtr;
}
+*/
-#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
- (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
- (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+ Ppmd_SET_SUCCESSOR(p, v);
}
-static void RestartModel(CPpmd7 *p)
+
+
+MY_NO_INLINE
+static
+void RestartModel(CPpmd7 *p)
{
- unsigned i, k, m;
+ unsigned i, k;
memset(p->FreeList, 0, sizeof(p->FreeList));
+
p->Text = p->Base + p->AlignOffset;
p->HiUnit = p->Text + p->Size;
p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
@@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p)
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
- p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
- p->MinContext->Suffix = 0;
- p->MinContext->NumStats = 256;
- p->MinContext->SummFreq = 256 + 1;
- p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
- p->LoUnit += U2B(256 / 2);
- p->MinContext->Stats = REF(p->FoundState);
- for (i = 0; i < 256; i++)
{
- CPpmd_State *s = &p->FoundState[i];
- s->Symbol = (Byte)i;
- s->Freq = 1;
- SetSuccessor(s, 0);
+ CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+ CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+
+ p->LoUnit += U2B(256 / 2);
+ p->MaxContext = p->MinContext = mc;
+ p->FoundState = s;
+
+ mc->NumStats = 256;
+ mc->Union2.SummFreq = 256 + 1;
+ mc->Union4.Stats = REF(s);
+ mc->Suffix = 0;
+
+ for (i = 0; i < 256; i++, s++)
+ {
+ s->Symbol = (Byte)i;
+ s->Freq = 1;
+ SetSuccessor(s, 0);
+ }
+
+ #ifdef PPMD7_ORDER_0_SUPPPORT
+ if (p->MaxOrder == 0)
+ {
+ CPpmd_Void_Ref r = REF(mc);
+ s = p->FoundState;
+ for (i = 0; i < 256; i++, s++)
+ SetSuccessor(s, r);
+ return;
+ }
+ #endif
}
for (i = 0; i < 128; i++)
+
+
+
for (k = 0; k < 8; k++)
{
+ unsigned m;
UInt16 *dest = p->BinSumm[i] + k;
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
for (m = 0; m < 64; m += 8)
dest[m] = val;
}
-
+
+
for (i = 0; i < 25; i++)
- for (k = 0; k < 16; k++)
+ {
+
+ CPpmd_See *s = p->See[i];
+
+
+
+ unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
+ for (k = 0; k < 16; k++, s++)
{
- CPpmd_See *s = &p->See[i][k];
- s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4));
+ s->Summ = (UInt16)summ;
+ s->Shift = (PPMD_PERIOD_BITS - 4);
s->Count = 4;
}
+ }
+
+ p->DummySee.Summ = 0; /* unused */
+ p->DummySee.Shift = PPMD_PERIOD_BITS;
+ p->DummySee.Count = 64; /* unused */
}
+
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
{
p->MaxOrder = maxOrder;
+
RestartModel(p);
- p->DummySee.Shift = PPMD_PERIOD_BITS;
- p->DummySee.Summ = 0; /* unused */
- p->DummySee.Count = 64; /* unused */
}
-static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
+
+
+/*
+ CreateSuccessors()
+ It's called when (FoundState->Successor) is RAW-Successor,
+ that is the link to position in Raw text.
+ So we create Context records and write the links to
+ FoundState->Successor and to identical RAW-Successors in suffix
+ contexts of MinContex.
+
+ The function returns:
+ if (OrderFall == 0) then MinContext is already at MAX order,
+ { return pointer to new or existing context of same MAX order }
+ else
+ { return pointer to new real context that will be (Order+1) in comparison with MinContext
+
+ also it can return pointer to real context of same order,
+*/
+
+MY_NO_INLINE
+static CTX_PTR CreateSuccessors(CPpmd7 *p)
{
- CPpmd_State upState;
CTX_PTR c = p->MinContext;
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
- CPpmd_State *ps[PPMD7_MAX_ORDER];
+ Byte newSym, newFreq;
unsigned numPs = 0;
-
- if (!skip)
+ CPpmd_State *ps[PPMD7_MAX_ORDER];
+
+ if (p->OrderFall != 0)
ps[numPs++] = p->FoundState;
while (c->Suffix)
@@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
+
+
if (c->NumStats != 1)
{
- for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
+ Byte sym = p->FoundState->Symbol;
+ for (s = STATS(c); s->Symbol != sym; s++);
+
}
else
+ {
s = ONE_STATE(c);
+
+ }
successor = SUCCESSOR(s);
if (successor != upBranch)
{
+ // (c) is real record Context here,
c = CTX(successor);
if (numPs == 0)
+ {
+ // (c) is real record MAX Order Context here,
+ // So we don't need to create any new contexts.
return c;
+ }
break;
}
ps[numPs++] = s;
}
- upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
- SetSuccessor(&upState, upBranch + 1);
+ // All created contexts will have single-symbol with new RAW-Successor
+ // All new RAW-Successors will point to next position in RAW text
+ // after FoundState->Successor
+
+ newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
+ upBranch++;
+
if (c->NumStats == 1)
- upState.Freq = ONE_STATE(c)->Freq;
+ newFreq = ONE_STATE(c)->Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
- for (s = STATS(c); s->Symbol != upState.Symbol; s++);
- cf = s->Freq - 1;
- s0 = c->SummFreq - c->NumStats - cf;
- upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0))));
+ for (s = STATS(c); s->Symbol != newSym; s++);
+ cf = (UInt32)s->Freq - 1;
+ s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
+ /*
+ cf - is frequency of symbol that will be Successor in new context records.
+ s0 - is commulative frequency sum of another symbols from parent context.
+ max(newFreq)= (s->Freq + 1), when (s0 == 1)
+ we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
+ so (s->Freq < 128) - is requirement for multi-symbol contexts
+ */
+ newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
}
+ // Create new single-symbol contexts from low order to high order in loop
+
do
{
- /* Create Child */
- CTX_PTR c1; /* = AllocContext(p); */
+ CTX_PTR c1;
+ /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
- c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+ c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
@@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
if (!c1)
return NULL;
}
+
c1->NumStats = 1;
- *ONE_STATE(c1) = upState;
+ ONE_STATE(c1)->Symbol = newSym;
+ ONE_STATE(c1)->Freq = newFreq;
+ SetSuccessor(ONE_STATE(c1), upBranch);
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
@@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip)
return c;
}
-static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
-{
- CPpmd_State tmp = *t1;
- *t1 = *t2;
- *t2 = tmp;
-}
-static void UpdateModel(CPpmd7 *p)
+
+#define SwapStates(s) \
+ { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
+
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+MY_NO_INLINE
+void Ppmd7_UpdateModel(CPpmd7 *p)
{
- CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
- CTX_PTR c;
+ CPpmd_Void_Ref maxSuccessor, minSuccessor;
+ CTX_PTR c, mc;
unsigned s0, ns;
-
+
+
+
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
+ /* Update Freqs in Suffix Context */
+
c = SUFFIX(p->MinContext);
if (c->NumStats == 1)
@@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p)
else
{
CPpmd_State *s = STATS(c);
- if (s->Symbol != p->FoundState->Symbol)
+ Byte sym = p->FoundState->Symbol;
+
+ if (s->Symbol != sym)
{
- do { s++; } while (s->Symbol != p->FoundState->Symbol);
+ do
+ {
+ // s++; if (s->Symbol == sym) break;
+ s++;
+ }
+ while (s->Symbol != sym);
+
if (s[0].Freq >= s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s);
s--;
}
}
+
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
}
+
if (p->OrderFall == 0)
{
- p->MinContext = p->MaxContext = CreateSuccessors(p, True);
- if (p->MinContext == 0)
+ /* MAX ORDER context */
+ /* (FoundState->Successor) is RAW-Successor. */
+ p->MaxContext = p->MinContext = CreateSuccessors(p);
+ if (!p->MinContext)
{
RestartModel(p);
return;
@@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p)
SetSuccessor(p->FoundState, REF(p->MinContext));
return;
}
+
+
+ /* NON-MAX ORDER context */
- *p->Text++ = p->FoundState->Symbol;
- successor = REF(p->Text);
- if (p->Text >= p->UnitsStart)
{
- RestartModel(p);
- return;
+ Byte *text = p->Text;
+ *text++ = p->FoundState->Symbol;
+ p->Text = text;
+ if (text >= p->UnitsStart)
+ {
+ RestartModel(p);
+ return;
+ }
+ maxSuccessor = REF(text);
}
- if (fSuccessor)
+ minSuccessor = SUCCESSOR(p->FoundState);
+
+ if (minSuccessor)
{
- if (fSuccessor <= successor)
+ // there is Successor for FoundState in MinContext.
+ // So the next context will be one order higher than MinContext.
+
+ if (minSuccessor <= maxSuccessor)
{
- CTX_PTR cs = CreateSuccessors(p, False);
- if (cs == NULL)
+ // minSuccessor is RAW-Successor. So we will create real contexts records:
+ CTX_PTR cs = CreateSuccessors(p);
+ if (!cs)
{
RestartModel(p);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
+
+ // minSuccessor now is real Context pointer that points to existing (Order+1) context
+
if (--p->OrderFall == 0)
{
- successor = fSuccessor;
+ /*
+ if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
+ MinContext that is (MaxOrder - 1)
+ MaxContext that is (MaxOrder)
+ so we don't need new RAW-Successor, and we can use real minSuccessor
+ as succssors for both MinContext and MaxContext.
+ */
+ maxSuccessor = minSuccessor;
+
+ /*
+ if (MaxContext != MinContext)
+ {
+ there was order fall from MaxOrder and we don't need current symbol
+ to transfer some RAW-Succesors to real contexts.
+ So we roll back pointer in raw data for one position.
+ }
+ */
p->Text -= (p->MaxContext != p->MinContext);
}
}
else
{
- SetSuccessor(p->FoundState, successor);
- fSuccessor = REF(p->MinContext);
+ /*
+ FoundState has NULL-Successor here.
+ And only root 0-order context can contain NULL-Successors.
+ We change Successor in FoundState to RAW-Successor,
+ And next context will be same 0-order root Context.
+ */
+ SetSuccessor(p->FoundState, maxSuccessor);
+ minSuccessor = REF(p->MinContext);
}
-
- s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1);
-
- for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c))
+
+ mc = p->MinContext;
+ c = p->MaxContext;
+
+ p->MaxContext = p->MinContext = CTX(minSuccessor);
+
+ if (c == mc)
+ return;
+
+ // s0 : is pure Escape Freq
+ s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
+
+ do
{
unsigned ns1;
- UInt32 cf, sf;
+ UInt32 sum;
+
if ((ns1 = c->NumStats) != 1)
{
if ((ns1 & 1) == 0)
@@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p)
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
- c->Stats = STATS_REF(ptr);
+ c->Union4.Stats = STATS_REF(ptr);
}
}
- c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1)));
+ sum = c->Union2.SummFreq;
+ /* max increase of Escape_Freq is 3 here.
+ total increase of Union2.SummFreq for all symbols is less than 256 here */
+ sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
+ /* original PPMdH uses 16-bit variable for (sum) here.
+ But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
+ // sum = (UInt16)sum;
}
else
{
+ // instead of One-symbol context we create 2-symbol context
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
if (!s)
{
RestartModel(p);
return;
}
- *s = *ONE_STATE(c);
- c->Stats = REF(s);
- if (s->Freq < MAX_FREQ / 4 - 1)
- s->Freq <<= 1;
- else
- s->Freq = MAX_FREQ - 4;
- c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3));
- }
- cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6);
- sf = (UInt32)s0 + c->SummFreq;
- if (cf < 6 * sf)
- {
- cf = 1 + (cf > sf) + (cf >= 4 * sf);
- c->SummFreq += 3;
- }
- else
- {
- cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
- c->SummFreq = (UInt16)(c->SummFreq + cf);
+ {
+ unsigned freq = c->Union2.State2.Freq;
+ // s = *ONE_STATE(c);
+ s->Symbol = c->Union2.State2.Symbol;
+ s->Successor_0 = c->Union4.State4.Successor_0;
+ s->Successor_1 = c->Union4.State4.Successor_1;
+ // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
+ // (Successor_0 and Successor_1) in LE/BE.
+ c->Union4.Stats = REF(s);
+ if (freq < MAX_FREQ / 4 - 1)
+ freq <<= 1;
+ else
+ freq = MAX_FREQ - 4;
+ // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
+ s->Freq = (Byte)freq;
+ // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
+ sum = freq + p->InitEsc + (ns > 3);
+ }
}
+
{
CPpmd_State *s = STATS(c) + ns1;
- SetSuccessor(s, successor);
+ UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
+ UInt32 sf = (UInt32)s0 + sum;
s->Symbol = p->FoundState->Symbol;
- s->Freq = (Byte)cf;
c->NumStats = (UInt16)(ns1 + 1);
+ SetSuccessor(s, maxSuccessor);
+
+ if (cf < 6 * sf)
+ {
+ cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
+ sum += 3;
+ /* It can add (0, 1, 2) to Escape_Freq */
+ }
+ else
+ {
+ cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
+ sum += cf;
+ }
+
+ c->Union2.SummFreq = (UInt16)sum;
+ s->Freq = (Byte)cf;
}
+ c = SUFFIX(c);
}
- p->MaxContext = p->MinContext = CTX(fSuccessor);
+ while (c != mc);
}
+
+
+MY_NO_INLINE
static void Rescale(CPpmd7 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
+
+ /* Sort the list by Freq */
+ if (s != stats)
{
CPpmd_State tmp = *s;
- for (; s != stats; s--)
+ do
s[0] = s[-1];
+ while (--s != stats);
*s = tmp;
}
- escFreq = p->MinContext->SummFreq - s->Freq;
- s->Freq += 4;
- adder = (p->OrderFall != 0);
- s->Freq = (Byte)((s->Freq + adder) >> 1);
+
sumFreq = s->Freq;
+ escFreq = p->MinContext->Union2.SummFreq - sumFreq;
+
+ /*
+ if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context
+ if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
+ */
+
+ adder = (p->OrderFall != 0);
+
+ #ifdef PPMD7_ORDER_0_SUPPPORT
+ adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
+ #endif
+
+ sumFreq = (sumFreq + 4 + adder) >> 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
+ s->Freq = (Byte)sumFreq;
- i = p->MinContext->NumStats - 1;
do
{
- escFreq -= (++s)->Freq;
- s->Freq = (Byte)((s->Freq + adder) >> 1);
- sumFreq += s->Freq;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + adder) >> 1;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
+ CPpmd_State tmp = *s;
CPpmd_State *s1 = s;
- CPpmd_State tmp = *s1;
do
+ {
s1[0] = s1[-1];
- while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+ }
+ while (--s1 != stats && freq > s1[-1].Freq);
*s1 = tmp;
}
}
@@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p)
if (s->Freq == 0)
{
- unsigned numStats = p->MinContext->NumStats;
- unsigned n0, n1;
- do { i++; } while ((--s)->Freq == 0);
+ /* Remove all items with Freq == 0 */
+ CPpmd7_Context *mc;
+ unsigned numStats, numStatsNew, n0, n1;
+
+ i = 0; do { i++; } while ((--s)->Freq == 0);
+
+ /* We increase (escFreq) for the number of removed symbols.
+ So we will have (0.5) increase for Escape_Freq in avarage per
+ removed symbol after Escape_Freq halving */
escFreq += i;
- p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i);
- if (p->MinContext->NumStats == 1)
+ mc = p->MinContext;
+ numStats = mc->NumStats;
+ numStatsNew = numStats - i;
+ mc->NumStats = (UInt16)(numStatsNew);
+ n0 = (numStats + 1) >> 1;
+
+ if (numStatsNew == 1)
{
- CPpmd_State tmp = *stats;
+ /* Create Single-Symbol context */
+ unsigned freq = stats->Freq;
+
do
{
- tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1));
escFreq >>= 1;
+ freq = (freq + 1) >> 1;
}
while (escFreq > 1);
- InsertNode(p, stats, U2I(((numStats + 1) >> 1)));
- *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+
+ s = ONE_STATE(mc);
+ *s = *stats;
+ s->Freq = (Byte)freq; // (freq <= 260 / 4)
+ p->FoundState = s;
+ InsertNode(p, stats, U2I(n0));
return;
}
- n0 = (numStats + 1) >> 1;
- n1 = (p->MinContext->NumStats + 1) >> 1;
+
+ n1 = (numStatsNew + 1) >> 1;
if (n0 != n1)
- p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ {
+ // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ unsigned i0 = U2I(n0);
+ unsigned i1 = U2I(n1);
+ if (i0 != i1)
+ {
+ if (p->FreeList[i1] != 0)
+ {
+ void *ptr = RemoveNode(p, i1);
+ p->MinContext->Union4.Stats = STATS_REF(ptr);
+ MyMem12Cpy(ptr, (const void *)stats, n1);
+ InsertNode(p, stats, i0);
+ }
+ else
+ SplitBlock(p, stats, i0, i1);
+ }
+ }
+ }
+ {
+ CPpmd7_Context *mc = p->MinContext;
+ mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+ // Escape_Freq halving here
+ p->FoundState = STATS(mc);
}
- p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
- p->FoundState = STATS(p->MinContext);
}
+
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
{
CPpmd_See *see;
- unsigned nonMasked = p->MinContext->NumStats - numMasked;
- if (p->MinContext->NumStats != 256)
+ const CPpmd7_Context *mc = p->MinContext;
+ unsigned numStats = mc->NumStats;
+ if (numStats != 256)
{
- see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] +
- (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) +
- 2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) +
- 4 * (unsigned)(numMasked > nonMasked) +
+ unsigned nonMasked = numStats - numMasked;
+ see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
+ + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
+ + 4 * (unsigned)(numMasked > nonMasked) +
p->HiBitsFlag;
{
- unsigned r = (see->Summ >> see->Shift);
- see->Summ = (UInt16)(see->Summ - r);
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
}
}
@@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
return see;
}
+
static void NextContext(CPpmd7 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
- if (p->OrderFall == 0 && (Byte *)c > p->Text)
- p->MinContext = p->MaxContext = c;
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
else
- UpdateModel(p);
+ Ppmd7_UpdateModel(p);
}
+
void Ppmd7_Update1(CPpmd7 *p)
{
CPpmd_State *s = p->FoundState;
- s->Freq += 4;
- p->MinContext->SummFreq += 4;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = s->Freq;
+ freq += 4;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s);
p->FoundState = --s;
- if (s->Freq > MAX_FREQ)
+ if (freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
+
void Ppmd7_Update1_0(CPpmd7 *p)
{
- p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq);
- p->RunLength += p->PrevSuccess;
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
+ CPpmd_State *s = p->FoundState;
+ CPpmd7_Context *mc = p->MinContext;
+ unsigned freq = s->Freq;
+ unsigned summFreq = mc->Union2.SummFreq;
+ p->PrevSuccess = (2 * freq > summFreq);
+ p->RunLength += (int)p->PrevSuccess;
+ mc->Union2.SummFreq = (UInt16)(summFreq + 4);
+ freq += 4;
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
Rescale(p);
NextContext(p);
}
+
+/*
void Ppmd7_UpdateBin(CPpmd7 *p)
{
- p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0));
+ unsigned freq = p->FoundState->Freq;
+ p->FoundState->Freq = (Byte)(freq + (freq < 128));
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
+*/
void Ppmd7_Update2(CPpmd7 *p)
{
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
- Rescale(p);
+ CPpmd_State *s = p->FoundState;
+ unsigned freq = s->Freq;
+ freq += 4;
p->RunLength = p->InitRL;
- UpdateModel(p);
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
+ Rescale(p);
+ Ppmd7_UpdateModel(p);
+}
+
+
+
+/*
+PPMd Memory Map:
+{
+ [ 0 ] contains subset of original raw text, that is required to create context
+ records, Some symbols are not written, when max order context was reached
+ [ Text ] free area
+ [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records
+ [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items
+[ HiUnit ] CPpmd7_Context records
+ [ Size ] end of array
}
+
+These addresses don't cross at any time.
+And the following condtions is true for addresses:
+ (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
+
+Raw text is BYTE--aligned.
+the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
+
+Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
+The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
+The code doesn't free UNITs allocated for CPpmd7_Context records.
+
+The code calls RestartModel(), when there is no free memory for allocation.
+And RestartModel() changes the state to orignal start state, with full free block.
+
+
+The code allocates UNITs with the following order:
+
+Allocation of 1 UNIT for Context record
+ - from free space (HiUnit) down to (LoUnit)
+ - from FreeList[0]
+ - AllocUnitsRare()
+
+AllocUnits() for CPpmd_State vectors:
+ - from FreeList[i]
+ - from free space (LoUnit) up to (HiUnit)
+ - AllocUnitsRare()
+
+AllocUnitsRare()
+ - if (GlueCount == 0)
+ { Glue lists, GlueCount = 255, allocate from FreeList[i]] }
+ - loop for all higher sized FreeList[...] lists
+ - from (UnitsStart - Text), GlueCount--
+ - ERROR
+
+
+Each Record with Context contains the CPpmd_State vector, where each
+CPpmd_State contains the link to Successor.
+There are 3 types of Successor:
+ 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored
+ only in 0-order Root Context Record.
+ We use 0 value as NULL-Successor
+ 2) RAW-Successor - the link to position in raw text,
+ that "RAW-Successor" is being created after first
+ occurrence of new symbol for some existing context record.
+ (RAW-Successor > 0).
+ 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
+ that record is being created when we go via RAW-Successor again.
+
+For any successors at any time: the following condtions are true for Successor links:
+(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
+
+
+---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
+
+CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
+
+The PPMd code tries to fulfill the condition:
+ (SummFreq <= (256 * 128 = RC::kBot))
+
+We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
+So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
+If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
+SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions.
+Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for
+max-order context.
+
+When the PPMd code still break (Total <= RC::Range) condition in range coder,
+we have two ways to resolve that problem:
+ 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
+ 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
+*/
diff --git a/multiarc/src/formats/7z/C/Ppmd7.h b/multiarc/src/formats/7z/C/Ppmd7.h
index 610539a0..d31809ae 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7.h
+++ b/multiarc/src/formats/7z/C/Ppmd7.h
@@ -1,10 +1,8 @@
-/* Ppmd7.h -- PPMdH compression codec
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
-
-/* This code supports virtual RangeDecoder and includes the implementation
-of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
-If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
+/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#ifndef __PPMD7_H
#define __PPMD7_H
@@ -21,23 +19,56 @@ EXTERN_C_BEGIN
struct CPpmd7_Context_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd7_Context_ *
- #else
- UInt32
- #endif
- CPpmd7_Context_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
+
+// MY_CPU_pragma_pack_push_1
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
- UInt16 SummFreq;
- CPpmd_State_Ref Stats;
+
+
+ union
+ {
+ UInt16 SummFreq;
+ CPpmd_State2 State2;
+ } Union2;
+
+ union
+ {
+ CPpmd_State_Ref Stats;
+ CPpmd_State4 State4;
+ } Union4;
+
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
-#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+// MY_CPU_pragma_pop
+
+#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
+
+
+
+
+typedef struct
+{
+ UInt32 Range;
+ UInt32 Code;
+ UInt32 Low;
+ IByteIn *Stream;
+} CPpmd7_RangeDec;
+
+
+typedef struct
+{
+ UInt32 Range;
+ Byte Cache;
+ // Byte _dummy_[3];
+ UInt64 Low;
+ UInt64 CacheSize;
+ IByteOut *Stream;
+} CPpmd7z_RangeEnc;
+
typedef struct
{
@@ -48,17 +79,30 @@ typedef struct
UInt32 Size;
UInt32 GlueCount;
- Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
+ Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
- Byte Indx2Units[PPMD_NUM_INDEXES];
+
+
+
+ union
+ {
+ CPpmd7_RangeDec dec;
+ CPpmd7z_RangeEnc enc;
+ } rc;
+
+ Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
- Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
+
+ Byte NS2BSIndx[256], NS2Indx[256];
+ Byte ExpEscape[16];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
+ // int LastSymbol;
} CPpmd7;
+
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
@@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
/* ---------- Internal Functions ---------- */
-extern const Byte PPMD7_kExpEscape[16];
-
-#ifdef PPMD_32BIT
- #define Ppmd7_GetPtr(p, ptr) (ptr)
- #define Ppmd7_GetContext(p, ptr) (ptr)
- #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
-#else
- #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
- #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
- #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
-#endif
+#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
+#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
+#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
-void Ppmd7_UpdateBin(CPpmd7 *p);
+
+#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
+#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
+// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
+// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
#define Ppmd7_GetBinSumm(p) \
- &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
- p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
- (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
- 2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
- ((p->RunLength >> 26) & 0x20)]
+ &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
+ [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+ + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+ + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
+/*
+We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
+ 1) Ppmd7a_*: original PPMdH
+ 2) Ppmd7z_*: modified PPMdH with 7z Range Coder
+Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
+*/
+
/* ---------- Decode ---------- */
-typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
+#define PPMD7_SYM_END (-1)
+#define PPMD7_SYM_ERROR (-2)
-struct IPpmd7_RangeDec
-{
- UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
- void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
- UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
-};
+/*
+You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
-typedef struct
-{
- IPpmd7_RangeDec vt;
- UInt32 Range;
- UInt32 Code;
- IByteIn *Stream;
-} CPpmd7z_RangeDec;
+Ppmd7*_DecodeSymbol()
+out:
+ >= 0 : decoded byte
+ -1 : PPMD7_SYM_END : End of payload marker
+ -2 : PPMD7_SYM_ERROR : Data error
+*/
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
-#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+/* Ppmd7a_* : original PPMdH */
+BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7a_DecodeSymbol(CPpmd7 *p);
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
+/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
+#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
+int Ppmd7z_DecodeSymbol(CPpmd7 *p);
+// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
/* ---------- Encode ---------- */
-typedef struct
-{
- UInt64 Low;
- UInt32 Range;
- Byte Cache;
- UInt64 CacheSize;
- IByteOut *Stream;
-} CPpmd7z_RangeEnc;
-
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
-
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
+// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Ppmd7Dec.c b/multiarc/src/formats/7z/C/Ppmd7Dec.c
index 311e9f9d..55d74ff9 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7Dec.c
+++ b/multiarc/src/formats/7z/C/Ppmd7Dec.c
@@ -1,6 +1,8 @@
-/* Ppmd7Dec.c -- PPMdH Decoder
-2018-07-04 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#include "Precomp.h"
@@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
-BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
+
+#define READ_BYTE(p) IByteIn_Read((p)->Stream)
+
+BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
- if (IByteIn_Read(p->Stream) != 0)
+ if (READ_BYTE(p) != 0)
return False;
for (i = 0; i < 4; i++)
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
+ p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
-#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
-
-static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
-{
- GET_Ppmd7z_RangeDec
- return p->Code / (p->Range /= total);
-}
+#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
+ { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
-static void Range_Normalize(CPpmd7z_RangeDec *p)
-{
- if (p->Range < kTopValue)
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
- p->Range <<= 8;
- if (p->Range < kTopValue)
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
- p->Range <<= 8;
- }
- }
-}
+#define RC_NORM_1(p) RC_NORM_BASE(p) }
+#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
-static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
-{
- GET_Ppmd7z_RangeDec
- p->Code -= start * p->Range;
- p->Range *= size;
- Range_Normalize(p);
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
-static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
-{
- GET_Ppmd7z_RangeDec
- UInt32 newBound = (p->Range >> 14) * size0;
- UInt32 symbol;
- if (p->Code < newBound)
- {
- symbol = 0;
- p->Range = newBound;
- }
- else
- {
- symbol = 1;
- p->Code -= newBound;
- p->Range -= newBound;
- }
- Range_Normalize(p);
- return symbol;
-}
+#define R (&p->rc.dec)
-void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
- p->vt.GetThreshold = Range_GetThreshold;
- p->vt.Decode = Range_Decode;
- p->vt.DecodeBit = Range_DecodeBit;
+
+
+ R->Code -= start * R->Range;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
}
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
-#define MASK(sym) ((signed char *)charMask)[sym]
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd7_UpdateModel(CPpmd7 *p);
-int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+// MY_FORCE_INLINE
+// static
+int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
- if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+
+
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
{
- Byte symbol;
- rc->Decode(rc, 0, s->Freq);
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update1_0(p);
- return symbol;
+ return sym;
}
+
p->PrevSuccess = 0;
- i = p->MinContext->NumStats - 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
+
do
{
- if ((hiCnt += (++s)->Freq) > count)
+ if ((Int32)(count -= (++s)->Freq) < 0)
{
- Byte symbol;
- rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update1(p);
- return symbol;
+ return sym;
}
}
while (--i);
- if (count >= p->MinContext->SummFreq)
- return -2;
- p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
- rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
+
+ if (hiCnt >= summFreq)
+ return PPMD7_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats - 1;
- do { MASK((--s)->Symbol) = 0; } while (--i);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
+ CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
- if (rc->DecodeBit(rc, *prob) == 0)
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
{
- Byte symbol;
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
- Ppmd7_UpdateBin(p);
- return symbol;
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM_1(R)
+ /* we can use single byte normalization here because of
+ (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
+
+ // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
+ return sym;
}
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(size0);
+
+ R->Code -= size0;
+ R->Range -= size0;
+ RC_NORM_LOCAL(R)
+
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
+
for (;;)
{
- CPpmd_State *ps[256], *s;
+ CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
+
CPpmd_See *see;
- unsigned i, num, numMasked = p->MinContext->NumStats;
+ CPpmd7_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
- return -1;
- p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+ if (!mc->Suffix)
+ return PPMD7_SYM_END;
+ mc = Ppmd7_GetContext(p, mc->Suffix);
}
- while (p->MinContext->NumStats == numMasked);
- hiCnt = 0;
- s = Ppmd7_GetStats(p, p->MinContext);
- i = 0;
- num = p->MinContext->NumStats - numMasked;
- do
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd7_GetStats(p, mc);
+
{
- int k = (int)(MASK(s->Symbol));
- hiCnt += (s->Freq & k);
- ps[i] = s++;
- i -= k;
+ unsigned num = mc->NumStats;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
}
- while (i != num);
-
+
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
- count = rc->GetThreshold(rc, freqSum);
+
+
+
+
+ count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
- Byte symbol;
- CPpmd_State **pps = ps;
- for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
- s = *pps;
- rc->Decode(rc, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ hiCnt = count;
+ // count -= s->Freq & (unsigned)(MASK(s->Symbol));
+ // if ((Int32)count >= 0)
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ };
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd7_Update2(p);
- return symbol;
+ return sym;
}
+
if (count >= freqSum)
- return -2;
- rc->Decode(rc, hiCnt, freqSum - hiCnt);
+ return PPMD7_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
- do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
+ }
+}
+
+/*
+Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
+{
+ int sym = 0;
+ if (buf != lim)
+ do
+ {
+ sym = Ppmd7z_DecodeSymbol(p);
+ if (sym < 0)
+ break;
+ *buf = (Byte)sym;
}
+ while (++buf < lim);
+ p->LastSymbol = sym;
+ return buf;
}
+*/
diff --git a/multiarc/src/formats/7z/C/Ppmd7Enc.c b/multiarc/src/formats/7z/C/Ppmd7Enc.c
index 286b8712..62139c5b 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd7Enc.c
+++ b/multiarc/src/formats/7z/C/Ppmd7Enc.c
@@ -1,6 +1,8 @@
-/* Ppmd7Enc.c -- PPMdH Encoder
-2017-04-03 : Igor Pavlov : Public domain
-This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
+/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain */
+
#include "Precomp.h"
@@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
-void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p)
+#define R (&p->rc.enc)
+
+void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
{
- p->Low = 0;
- p->Range = 0xFFFFFFFF;
- p->Cache = 0;
- p->CacheSize = 1;
+ R->Low = 0;
+ R->Range = 0xFFFFFFFF;
+ R->Cache = 0;
+ R->CacheSize = 1;
}
-static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p)
+MY_NO_INLINE
+static void RangeEnc_ShiftLow(CPpmd7 *p)
{
- if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0)
+ if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
{
- Byte temp = p->Cache;
+ Byte temp = R->Cache;
do
{
- IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32)));
+ IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32)));
temp = 0xFF;
}
- while (--p->CacheSize != 0);
- p->Cache = (Byte)((UInt32)p->Low >> 24);
+ while (--R->CacheSize != 0);
+ R->Cache = (Byte)((UInt32)R->Low >> 24);
}
- p->CacheSize++;
- p->Low = (UInt32)p->Low << 8;
+ R->CacheSize++;
+ R->Low = (UInt32)((UInt32)R->Low << 8);
}
-static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total)
-{
- p->Low += start * (p->Range /= total);
- p->Range *= size;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
-}
+#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
+#define RC_NORM_1(p) RC_NORM_BASE(p) }
+#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
-static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0)
-{
- p->Range = (p->Range >> 14) * size0;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
-}
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+/*
+#define RangeEnc_Encode(p, start, _size_) \
+ { UInt32 size = _size_; \
+ R->Low += start * R->Range; \
+ R->Range *= size; \
+ RC_NORM_LOCAL(p); }
+*/
-static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0)
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
{
- UInt32 newBound = (p->Range >> 14) * size0;
- p->Low += newBound;
- p->Range -= newBound;
- while (p->Range < kTopValue)
- {
- p->Range <<= 8;
- RangeEnc_ShiftLow(p);
- }
+ R->Low += start * R->Range;
+ R->Range *= size;
+ RC_NORM_LOCAL(p);
}
-void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
+void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
{
unsigned i;
for (i = 0; i < 5; i++)
@@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
}
-#define MASK(sym) ((signed char *)charMask)[sym]
-void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
+#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
+#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+#define SUFFIX(ctx) CTX((ctx)->Suffix)
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+
+void Ppmd7_UpdateModel(CPpmd7 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+MY_FORCE_INLINE
+static
+void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
+
+
+
+
+ R->Range /= p->MinContext->Union2.SummFreq;
+
if (s->Symbol == symbol)
{
- RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq);
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_EncodeFinal(0, s->Freq);
p->FoundState = s;
Ppmd7_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
- i = p->MinContext->NumStats - 1;
+ i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((++s)->Symbol == symbol)
{
- RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq);
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_EncodeFinal(sum, s->Freq);
p->FoundState = s;
Ppmd7_Update1(p);
return;
@@ -106,82 +125,199 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
sum += s->Freq;
}
while (--i);
+
+ // R->Range /= p->MinContext->Union2.SummFreq;
+ RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
- p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats - 1;
- do { MASK((--s)->Symbol) = 0; } while (--i);
- RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
+ // MASK(s->Symbol) = 0;
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
+ UInt32 pr = *prob;
+ UInt32 bound = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
- RangeEnc_EncodeBit_0(rc, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- p->FoundState = s;
- Ppmd7_UpdateBin(p);
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+ // RangeEnc_EncodeBit_0(p, bound);
+ R->Range = bound;
+ RC_NORM_1(p);
+
+ // p->FoundState = s;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
return;
}
- else
- {
- RangeEnc_EncodeBit_1(rc, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
- PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- p->PrevSuccess = 0;
- }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+ // RangeEnc_EncodeBit_1(p, bound);
+ R->Low += bound;
+ R->Range -= bound;
+ RC_NORM_LOCAL(p)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(s->Symbol) = 0;
+ p->PrevSuccess = 0;
}
+
for (;;)
{
- UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
- UInt32 sum;
- unsigned i, numMasked = p->MinContext->NumStats;
+ UInt32 sum, escFreq;
+ CPpmd7_Context *mc;
+ unsigned i, numMasked;
+
+ RC_NORM_REMOTE(p)
+
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
+ if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
- p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
+ mc = Ppmd7_GetContext(p, mc->Suffix);
+ i = mc->NumStats;
}
- while (p->MinContext->NumStats == numMasked);
+ while (i == numMasked);
+
+ p->MinContext = mc;
- see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
- s = Ppmd7_GetStats(p, p->MinContext);
+ // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
+ {
+ if (i != 256)
+ {
+ unsigned nonMasked = i - numMasked;
+ see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ + p->HiBitsFlag
+ + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i)
+ + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i)
+ + 4 * (unsigned)(numMasked > nonMasked);
+ {
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
+ escFreq = r + (r == 0);
+ }
+ }
+ else
+ {
+ see = &p->DummySee;
+ escFreq = 1;
+ }
+ }
+
+ s = Ppmd7_GetStats(p, mc);
sum = 0;
- i = p->MinContext->NumStats;
+ // i = mc->NumStats;
+
do
{
- int cur = s->Symbol;
- if (cur == symbol)
+ unsigned cur = s->Symbol;
+ if ((int)cur == symbol)
{
UInt32 low = sum;
- CPpmd_State *s1 = s;
- do
+ UInt32 freq = s->Freq;
+ unsigned num2;
+
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sum += escFreq;
+
+ num2 = i / 2;
+ i &= 1;
+ sum += freq & (0 - (UInt32)i);
+ if (num2 != 0)
{
- sum += (s->Freq & (int)(MASK(s->Symbol)));
- s++;
+ s += i;
+ for (;;)
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ if (--num2 == 0)
+ break;
+ }
}
- while (--i);
- RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq);
- Ppmd_See_Update(see);
- p->FoundState = s1;
+
+
+ R->Range /= sum;
+ RC_EncodeFinal(low, freq);
Ppmd7_Update2(p);
return;
}
- sum += (s->Freq & (int)(MASK(cur)));
- MASK(cur) = 0;
+ sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
- RangeEnc_Encode(rc, sum, escFreq, sum + escFreq);
- see->Summ = (UInt16)(see->Summ + sum + escFreq);
+ {
+ UInt32 total = sum + escFreq;
+ see->Summ = (UInt16)(see->Summ + total);
+
+ R->Range /= total;
+ RC_Encode(sum, escFreq);
+ }
+
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ s--;
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
+ }
+}
+
+
+void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
+{
+ for (; buf < lim; buf++)
+ {
+ Ppmd7z_EncodeSymbol(p, *buf);
}
}
diff --git a/multiarc/src/formats/7z/C/Ppmd7aDec.c b/multiarc/src/formats/7z/C/Ppmd7aDec.c
new file mode 100755
index 00000000..c4245784
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Ppmd7aDec.c
@@ -0,0 +1,279 @@
+/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
+2021-04-13 : Igor Pavlov : Public domain
+This code is based on:
+ PPMd var.H (2001): Dmitry Shkarin : Public domain
+ Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
+
+#include "Precomp.h"
+
+#include "Ppmd7.h"
+
+#define kTop (1 << 24)
+#define kBot (1 << 15)
+
+#define READ_BYTE(p) IByteIn_Read((p)->Stream)
+
+BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
+{
+ unsigned i;
+ p->Code = 0;
+ p->Range = 0xFFFFFFFF;
+ p->Low = 0;
+
+ for (i = 0; i < 4; i++)
+ p->Code = (p->Code << 8) | READ_BYTE(p);
+ return (p->Code < 0xFFFFFFFF);
+}
+
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
+ p->Code = (p->Code << 8) | READ_BYTE(p); \
+ p->Range <<= 8; p->Low <<= 8; }
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+#define R (&p->rc.dec)
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
+static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
+{
+ start *= R->Range;
+ R->Low += start;
+ R->Code -= start;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
+}
+
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
+
+#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
+typedef CPpmd7_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd7_UpdateModel(CPpmd7 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+
+int Ppmd7a_DecodeSymbol(CPpmd7 *p)
+{
+ size_t charMask[256 / sizeof(size_t)];
+
+ if (p->MinContext->NumStats != 1)
+ {
+ CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
+ unsigned i;
+ UInt32 count, hiCnt;
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ if (summFreq > R->Range)
+ return PPMD7_SYM_ERROR;
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
+ {
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update1_0(p);
+ return sym;
+ }
+
+ p->PrevSuccess = 0;
+ i = (unsigned)p->MinContext->NumStats - 1;
+
+ do
+ {
+ if ((Int32)(count -= (++s)->Freq) < 0)
+ {
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update1(p);
+ return sym;
+ }
+ }
+ while (--i);
+
+ if (hiCnt >= summFreq)
+ return PPMD7_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+ p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
+ }
+ else
+ {
+ CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
+ UInt16 *prob = Ppmd7_GetBinSumm(p);
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
+ {
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM(R)
+
+
+
+ // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
+ // Ppmd7_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 128));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c > p->Text)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd7_UpdateModel(p);
+ }
+ return sym;
+ }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(size0);
+ R->Low += size0;
+ R->Code -= size0;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
+ RC_NORM_LOCAL(R)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
+ p->PrevSuccess = 0;
+ }
+
+ for (;;)
+ {
+ CPpmd_State *s, *s2;
+ UInt32 freqSum, count, hiCnt;
+
+ CPpmd_See *see;
+ CPpmd7_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
+ do
+ {
+ p->OrderFall++;
+ if (!mc->Suffix)
+ return PPMD7_SYM_END;
+ mc = Ppmd7_GetContext(p, mc->Suffix);
+ }
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd7_GetStats(p, mc);
+
+ {
+ unsigned num = mc->NumStats;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
+ }
+
+ see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
+ freqSum += hiCnt;
+
+ if (freqSum > R->Range)
+ return PPMD7_SYM_ERROR;
+
+ count = RC_GetThreshold(freqSum);
+
+ if (count < hiCnt)
+ {
+ Byte sym;
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ hiCnt = count;
+ // count -= s->Freq & (unsigned)(MASK(s->Symbol));
+ // if ((Int32)count >= 0)
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ };
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sym = s->Symbol;
+ Ppmd7_Update2(p);
+ return sym;
+ }
+
+ if (count >= freqSum)
+ return PPMD7_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
+ see->Summ = (UInt16)(see->Summ + freqSum);
+
+ s = Ppmd7_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
+ }
+}
diff --git a/multiarc/src/formats/7z/C/Ppmd8.c b/multiarc/src/formats/7z/C/Ppmd8.c
index 58141633..fda8b88a 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8.c
+++ b/multiarc/src/formats/7z/C/Ppmd8.c
@@ -1,5 +1,5 @@
/* Ppmd8.c -- PPMdI codec
-2018-07-04 : Igor Pavlov : Public domain
+2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,7 +8,12 @@ This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
#include "Ppmd8.h"
-const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+
+
+
+MY_ALIGN(16)
+static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
+MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124
@@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
-#define I2U(indx) (p->Indx2Units[indx])
+#define I2U(indx) ((unsigned)p->Indx2Units[indx])
-#ifdef PPMD_32BIT
- #define REF(ptr) (ptr)
-#else
- #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
-#endif
+
+#define REF(ptr) Ppmd_GetRef(p, ptr)
#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
@@ -35,34 +37,23 @@ typedef CPpmd8_Context * CTX_PTR;
struct CPpmd8_Node_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd8_Node_ *
- #else
- UInt32
- #endif
- CPpmd8_Node_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd8_Node_) CPpmd8_Node_Ref;
typedef struct CPpmd8_Node_
{
UInt32 Stamp;
+
CPpmd8_Node_Ref Next;
UInt32 NU;
} CPpmd8_Node;
-#ifdef PPMD_32BIT
- #define NODE(ptr) (ptr)
-#else
- #define NODE(offs) ((CPpmd8_Node *)(p->Base + (offs)))
-#endif
-
-#define EMPTY_NODE 0xFFFFFFFF
+#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd8_Node)
void Ppmd8_Construct(CPpmd8 *p)
{
unsigned i, k, m;
- p->Base = 0;
+ p->Base = NULL;
for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
{
@@ -78,39 +69,51 @@ void Ppmd8_Construct(CPpmd8 *p)
for (i = 0; i < 5; i++)
p->NS2Indx[i] = (Byte)i;
+
for (m = i, k = 1; i < 260; i++)
{
p->NS2Indx[i] = (Byte)m;
if (--k == 0)
k = (++m) - 4;
}
+
+ memcpy(p->ExpEscape, PPMD8_kExpEscape, 16);
}
+
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Base);
p->Size = 0;
- p->Base = 0;
+ p->Base = NULL;
}
+
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc)
{
if (!p->Base || p->Size != size)
{
Ppmd8_Free(p, alloc);
- p->AlignOffset =
- #ifdef PPMD_32BIT
- (4 - size) & 3;
- #else
- 4 - (size & 3);
- #endif
- if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == 0)
+ p->AlignOffset = (4 - size) & 3;
+ if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
return False;
p->Size = size;
}
return True;
}
+
+
+// ---------- Internal Memory Allocator ----------
+
+
+
+
+
+
+#define EMPTY_NODE 0xFFFFFFFF
+
+
static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
{
((CPpmd8_Node *)node)->Stamp = EMPTY_NODE;
@@ -120,14 +123,17 @@ static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
p->Stamps[indx]++;
}
+
static void *RemoveNode(CPpmd8 *p, unsigned indx)
{
CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]);
p->FreeList[indx] = node->Next;
p->Stamps[indx]--;
+
return node;
}
+
static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
@@ -140,51 +146,96 @@ static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
InsertNode(p, ptr, i);
}
+
+
+
+
+
+
+
+
+
+
+
+
+
static void GlueFreeBlocks(CPpmd8 *p)
{
- CPpmd8_Node_Ref head = 0;
- CPpmd8_Node_Ref *prev = &head;
- unsigned i;
+ /*
+ we use first UInt32 field of 12-bytes UNITs as record type stamp
+ CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0xFF
+ CPpmd8_Context { Byte NumStats; Byte Flags; UInt16 SummFreq; : Flags != 0xFF ???
+ CPpmd8_Node { UInt32 Stamp : Stamp == 0xFFFFFFFF for free record
+ : Stamp == 0 for guard
+ Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd8_Context record
+ */
+ CPpmd8_Node_Ref n;
p->GlueCount = 1 << 13;
memset(p->Stamps, 0, sizeof(p->Stamps));
- /* Order-0 context is always at top UNIT, so we don't need guard NODE at the end.
- All blocks up to p->LoUnit can be free, so we need guard NODE at LoUnit. */
+ /* we set guard NODE at LoUnit */
if (p->LoUnit != p->HiUnit)
- ((CPpmd8_Node *)p->LoUnit)->Stamp = 0;
+ ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0;
- /* Glue free blocks */
- for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i];
- p->FreeList[i] = 0;
- while (next != 0)
+ /* Glue free blocks */
+ CPpmd8_Node_Ref *prev = &n;
+ unsigned i;
+ for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node *node = NODE(next);
- if (node->NU != 0)
+
+ CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i];
+ p->FreeList[i] = 0;
+ while (next != 0)
{
- CPpmd8_Node *node2;
+ CPpmd8_Node *node = NODE(next);
+ UInt32 nu = node->NU;
*prev = next;
- prev = &(node->Next);
- while ((node2 = node + node->NU)->Stamp == EMPTY_NODE)
+ next = node->Next;
+ if (nu != 0)
{
- node->NU += node2->NU;
- node2->NU = 0;
+ CPpmd8_Node *node2;
+ prev = &(node->Next);
+ while ((node2 = node + nu)->Stamp == EMPTY_NODE)
+ {
+ nu += node2->NU;
+ node2->NU = 0;
+ node->NU = nu;
+ }
}
}
- next = node->Next;
}
+
+ *prev = 0;
}
- *prev = 0;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
/* Fill lists of free blocks */
- while (head != 0)
+ while (n != 0)
{
- CPpmd8_Node *node = NODE(head);
- unsigned nu;
- head = node->Next;
- nu = node->NU;
+ CPpmd8_Node *node = NODE(n);
+ UInt32 nu = node->NU;
+ unsigned i;
+ n = node->Next;
if (nu == 0)
continue;
for (; nu > 128; nu -= 128, node += 128)
@@ -192,57 +243,70 @@ static void GlueFreeBlocks(CPpmd8 *p)
if (I2U(i = U2I(nu)) != nu)
{
unsigned k = I2U(--i);
- InsertNode(p, node + k, nu - k - 1);
+ InsertNode(p, node + k, (unsigned)nu - k - 1);
}
InsertNode(p, node, i);
}
}
+
+MY_NO_INLINE
static void *AllocUnitsRare(CPpmd8 *p, unsigned indx)
{
unsigned i;
- void *retVal;
+
if (p->GlueCount == 0)
{
GlueFreeBlocks(p);
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
}
+
i = indx;
+
do
{
if (++i == PPMD_NUM_INDEXES)
{
UInt32 numBytes = U2B(I2U(indx));
+ Byte *us = p->UnitsStart;
p->GlueCount--;
- return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL);
+ return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : (NULL);
}
}
while (p->FreeList[i] == 0);
- retVal = RemoveNode(p, i);
- SplitBlock(p, retVal, i, indx);
- return retVal;
+
+ {
+ void *block = RemoveNode(p, i);
+ SplitBlock(p, block, i, indx);
+ return block;
+ }
}
+
static void *AllocUnits(CPpmd8 *p, unsigned indx)
{
- UInt32 numBytes;
if (p->FreeList[indx] != 0)
return RemoveNode(p, indx);
- numBytes = U2B(I2U(indx));
- if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit))
{
- void *retVal = p->LoUnit;
- p->LoUnit += numBytes;
- return retVal;
+ UInt32 numBytes = U2B(I2U(indx));
+ Byte *lo = p->LoUnit;
+ if ((UInt32)(p->HiUnit - lo) >= numBytes)
+ {
+ p->LoUnit = lo + numBytes;
+ return lo;
+ }
}
return AllocUnitsRare(p, indx);
}
+
#define MyMem12Cpy(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
+
+
static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
{
unsigned i0 = U2I(oldNU);
@@ -260,11 +324,13 @@ static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU
return oldPtr;
}
+
static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu)
{
InsertNode(p, ptr, U2I(nu));
}
+
static void SpecialFreeUnit(CPpmd8 *p, void *ptr)
{
if ((Byte *)ptr != p->UnitsStart)
@@ -272,77 +338,91 @@ static void SpecialFreeUnit(CPpmd8 *p, void *ptr)
else
{
#ifdef PPMD8_FREEZE_SUPPORT
- *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts */
+ *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts() */
#endif
p->UnitsStart += UNIT_SIZE;
}
}
+
+/*
static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu)
{
unsigned indx = U2I(nu);
void *ptr;
- if ((Byte *)oldPtr > p->UnitsStart + 16 * 1024 || REF(oldPtr) > p->FreeList[indx])
+ if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx])
return oldPtr;
ptr = RemoveNode(p, indx);
MyMem12Cpy(ptr, oldPtr, nu);
- if ((Byte*)oldPtr != p->UnitsStart)
+ if ((Byte *)oldPtr != p->UnitsStart)
InsertNode(p, oldPtr, indx);
else
p->UnitsStart += U2B(I2U(indx));
return ptr;
}
+*/
static void ExpandTextArea(CPpmd8 *p)
{
UInt32 count[PPMD_NUM_INDEXES];
unsigned i;
+
memset(count, 0, sizeof(count));
if (p->LoUnit != p->HiUnit)
- ((CPpmd8_Node *)p->LoUnit)->Stamp = 0;
+ ((CPpmd8_Node *)(void *)p->LoUnit)->Stamp = 0;
{
- CPpmd8_Node *node = (CPpmd8_Node *)p->UnitsStart;
- for (; node->Stamp == EMPTY_NODE; node += node->NU)
+ CPpmd8_Node *node = (CPpmd8_Node *)(void *)p->UnitsStart;
+ while (node->Stamp == EMPTY_NODE)
{
+ UInt32 nu = node->NU;
node->Stamp = 0;
- count[U2I(node->NU)]++;
+ count[U2I(nu)]++;
+ node += nu;
}
p->UnitsStart = (Byte *)node;
}
for (i = 0; i < PPMD_NUM_INDEXES; i++)
{
- CPpmd8_Node_Ref *next = (CPpmd8_Node_Ref *)&p->FreeList[i];
- while (count[i] != 0)
+ UInt32 cnt = count[i];
+ if (cnt == 0)
+ continue;
{
- CPpmd8_Node *node = NODE(*next);
- while (node->Stamp == 0)
+ CPpmd8_Node_Ref *prev = (CPpmd8_Node_Ref *)&p->FreeList[i];
+ CPpmd8_Node_Ref n = *prev;
+ p->Stamps[i] -= cnt;
+ for (;;)
{
- *next = node->Next;
- node = NODE(*next);
- p->Stamps[i]--;
- if (--count[i] == 0)
+ CPpmd8_Node *node = NODE(n);
+ n = node->Next;
+ if (node->Stamp != 0)
+ {
+ prev = &node->Next;
+ continue;
+ }
+ *prev = n;
+ if (--cnt == 0)
break;
}
- next = &node->Next;
}
}
}
-#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16)))
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{
- (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF);
- (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF);
+ Ppmd_SET_SUCCESSOR(p, v);
}
#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); }
-static void RestartModel(CPpmd8 *p)
+MY_NO_INLINE
+static
+void RestartModel(CPpmd8 *p)
{
- unsigned i, k, m, r;
+ unsigned i, k, m;
memset(p->FreeList, 0, sizeof(p->FreeList));
memset(p->Stamps, 0, sizeof(p->Stamps));
@@ -355,30 +435,47 @@ static void RestartModel(CPpmd8 *p)
p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
p->PrevSuccess = 0;
- p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
- p->MinContext->Suffix = 0;
- p->MinContext->NumStats = 255;
- p->MinContext->Flags = 0;
- p->MinContext->SummFreq = 256 + 1;
- p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
- p->LoUnit += U2B(256 / 2);
- p->MinContext->Stats = REF(p->FoundState);
- for (i = 0; i < 256; i++)
{
- CPpmd_State *s = &p->FoundState[i];
- s->Symbol = (Byte)i;
- s->Freq = 1;
- SetSuccessor(s, 0);
+ CPpmd8_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
+ CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
+
+ p->LoUnit += U2B(256 / 2);
+ p->MaxContext = p->MinContext = mc;
+ p->FoundState = s;
+ mc->Flags = 0;
+ mc->NumStats = 256 - 1;
+ mc->Union2.SummFreq = 256 + 1;
+ mc->Union4.Stats = REF(s);
+ mc->Suffix = 0;
+
+ for (i = 0; i < 256; i++, s++)
+ {
+ s->Symbol = (Byte)i;
+ s->Freq = 1;
+ SetSuccessor(s, 0);
+ }
}
+
+
+
+
+
+
+
+
+
+
+
for (i = m = 0; m < 25; m++)
{
while (p->NS2Indx[i] == m)
i++;
for (k = 0; k < 8; k++)
{
- UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1));
+ unsigned r;
UInt16 *dest = p->BinSumm[m] + k;
+ UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1));
for (r = 0; r < 64; r += 8)
dest[r] = val;
}
@@ -386,50 +483,104 @@ static void RestartModel(CPpmd8 *p)
for (i = m = 0; m < 24; m++)
{
+ unsigned summ;
+ CPpmd_See *s;
while (p->NS2Indx[(size_t)i + 3] == m + 3)
i++;
- for (k = 0; k < 32; k++)
+ s = p->See[m];
+ summ = ((2 * i + 5) << (PPMD_PERIOD_BITS - 4));
+ for (k = 0; k < 32; k++, s++)
{
- CPpmd_See *s = &p->See[m][k];
- s->Summ = (UInt16)((2 * i + 5) << (s->Shift = PPMD_PERIOD_BITS - 4));
+ s->Summ = (UInt16)summ;
+ s->Shift = (PPMD_PERIOD_BITS - 4);
s->Count = 7;
}
}
+
+ p->DummySee.Summ = 0; /* unused */
+ p->DummySee.Shift = PPMD_PERIOD_BITS;
+ p->DummySee.Count = 64; /* unused */
}
+
void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod)
{
p->MaxOrder = maxOrder;
p->RestoreMethod = restoreMethod;
RestartModel(p);
- p->DummySee.Shift = PPMD_PERIOD_BITS;
- p->DummySee.Summ = 0; /* unused */
- p->DummySee.Count = 64; /* unused */
}
+
+#define FLAG_RESCALED (1 << 2)
+// #define FLAG_SYM_HIGH (1 << 3)
+#define FLAG_PREV_HIGH (1 << 4)
+
+#define HiBits_Prepare(sym) ((unsigned)(sym) + 0xC0)
+
+#define HiBits_Convert_3(flags) (((flags) >> (8 - 3)) & (1 << 3))
+#define HiBits_Convert_4(flags) (((flags) >> (8 - 4)) & (1 << 4))
+
+#define PPMD8_HiBitsFlag_3(sym) HiBits_Convert_3(HiBits_Prepare(sym))
+#define PPMD8_HiBitsFlag_4(sym) HiBits_Convert_4(HiBits_Prepare(sym))
+
+// #define PPMD8_HiBitsFlag_3(sym) (0x08 * ((sym) >= 0x40))
+// #define PPMD8_HiBitsFlag_4(sym) (0x10 * ((sym) >= 0x40))
+
+/*
+Refresh() is called when we remove some symbols (successors) in context.
+It increases Escape_Freq for sum of all removed symbols.
+*/
+
static void Refresh(CPpmd8 *p, CTX_PTR ctx, unsigned oldNU, unsigned scale)
{
unsigned i = ctx->NumStats, escFreq, sumFreq, flags;
CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1);
- ctx->Stats = REF(s);
- #ifdef PPMD8_FREEZE_SUPPORT
- /* fixed over Shkarin's code. Fixed code is not compatible with original code for some files in FREEZE mode. */
- scale |= (ctx->SummFreq >= ((UInt32)1 << 15));
- #endif
- flags = (ctx->Flags & (0x10 + 0x04 * scale)) + 0x08 * (s->Symbol >= 0x40);
- escFreq = ctx->SummFreq - s->Freq;
- sumFreq = (s->Freq = (Byte)((s->Freq + scale) >> scale));
+ ctx->Union4.Stats = REF(s);
+
+ // #ifdef PPMD8_FREEZE_SUPPORT
+ /*
+ (ctx->Union2.SummFreq >= ((UInt32)1 << 15)) can be in FREEZE mode for some files.
+ It's not good for range coder. So new versions of support fix:
+ - original PPMdI code rev.1
+ + original PPMdI code rev.2
+ - 7-Zip default ((PPMD8_FREEZE_SUPPORT is not defined)
+ + 7-Zip (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE)
+ if we use that fixed line, we can lose compatibility with some files created before fix
+ if we don't use that fixed line, the program can work incorrectly in FREEZE mode in rare case.
+ */
+ // if (p->RestoreMethod >= PPMD8_RESTORE_METHOD_FREEZE)
+ {
+ scale |= (ctx->Union2.SummFreq >= ((UInt32)1 << 15));
+ }
+ // #endif
+
+
+
+ flags = HiBits_Prepare(s->Symbol);
+ {
+ unsigned freq = s->Freq;
+ escFreq = ctx->Union2.SummFreq - freq;
+ freq = (freq + scale) >> scale;
+ sumFreq = freq;
+ s->Freq = (Byte)freq;
+ }
+
do
{
- escFreq -= (++s)->Freq;
- sumFreq += (s->Freq = (Byte)((s->Freq + scale) >> scale));
- flags |= 0x08 * (s->Symbol >= 0x40);
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + scale) >> scale;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ flags |= HiBits_Prepare(s->Symbol);
}
while (--i);
- ctx->SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale));
- ctx->Flags = (Byte)flags;
+
+ ctx->Union2.SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale));
+ ctx->Flags = (Byte)((ctx->Flags & (FLAG_PREV_HIGH + FLAG_RESCALED * scale)) + HiBits_Convert_3(flags));
}
+
static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
{
CPpmd_State tmp = *t1;
@@ -437,98 +588,169 @@ static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
*t2 = tmp;
}
+
+/*
+CutOff() reduces contexts:
+ It conversts Successors at MaxOrder to another Contexts to NULL-Successors
+ It removes RAW-Successors and NULL-Successors that are not Order-0
+ and it removes contexts when it has no Successors.
+ if the (Union4.Stats) is close to (UnitsStart), it moves it up.
+*/
+
static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
{
- int i;
- unsigned tmp;
- CPpmd_State *s;
+ int ns = ctx->NumStats;
+ unsigned nu;
+ CPpmd_State *stats;
- if (!ctx->NumStats)
+ if (ns == 0)
{
- s = ONE_STATE(ctx);
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart)
+ CPpmd_State *s = ONE_STATE(ctx);
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart)
{
if (order < p->MaxOrder)
- SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1));
+ successor = CutOff(p, CTX(successor), order + 1);
else
- SetSuccessor(s, 0);
- if (SUCCESSOR(s) || order <= 9) /* O_BOUND */
+ successor = 0;
+ SetSuccessor(s, successor);
+ if (successor || order <= 9) /* O_BOUND */
return REF(ctx);
}
SpecialFreeUnit(p, ctx);
return 0;
}
- ctx->Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), tmp = ((unsigned)ctx->NumStats + 2) >> 1));
+ nu = ((unsigned)ns + 2) >> 1;
+ // ctx->Union4.Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), nu));
+ {
+ unsigned indx = U2I(nu);
+ stats = STATS(ctx);
- for (s = STATS(ctx) + (i = ctx->NumStats); s >= STATS(ctx); s--)
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) < p->UnitsStart)
+ if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14)
+ && (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx])
{
- CPpmd_State *s2 = STATS(ctx) + (i--);
- SetSuccessor(s, 0);
- SwapStates(s, s2);
+ void *ptr = RemoveNode(p, indx);
+ ctx->Union4.Stats = STATS_REF(ptr);
+ MyMem12Cpy(ptr, (const void *)stats, nu);
+ if ((Byte *)stats != p->UnitsStart)
+ InsertNode(p, stats, indx);
+ else
+ p->UnitsStart += U2B(I2U(indx));
+ stats = ptr;
}
- else if (order < p->MaxOrder)
- SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1));
- else
- SetSuccessor(s, 0);
-
- if (i != ctx->NumStats && order)
+ }
+
+ {
+ CPpmd_State *s = stats + (unsigned)ns;
+ do
+ {
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) < p->UnitsStart)
+ {
+ CPpmd_State *s2 = stats + (unsigned)(ns--);
+ if (order)
+ {
+ if (s != s2)
+ *s = *s2;
+ }
+ else
+ {
+ SwapStates(s, s2);
+ SetSuccessor(s2, 0);
+ }
+ }
+ else
+ {
+ if (order < p->MaxOrder)
+ SetSuccessor(s, CutOff(p, CTX(successor), order + 1));
+ else
+ SetSuccessor(s, 0);
+ }
+ }
+ while (--s >= stats);
+ }
+
+ if (ns != ctx->NumStats && order)
{
- ctx->NumStats = (Byte)i;
- s = STATS(ctx);
- if (i < 0)
+ if (ns < 0)
{
- FreeUnits(p, s, tmp);
+ FreeUnits(p, stats, nu);
SpecialFreeUnit(p, ctx);
return 0;
}
- if (i == 0)
+ ctx->NumStats = (Byte)ns;
+ if (ns == 0)
{
- ctx->Flags = (Byte)((ctx->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40));
- *ONE_STATE(ctx) = *s;
- FreeUnits(p, s, tmp);
- /* 9.31: the code was fixed. It's was not BUG, if Freq <= MAX_FREQ = 124 */
- ONE_STATE(ctx)->Freq = (Byte)(((unsigned)ONE_STATE(ctx)->Freq + 11) >> 3);
+ const Byte sym = stats->Symbol;
+ ctx->Flags = (Byte)((ctx->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(sym));
+ // *ONE_STATE(ctx) = *stats;
+ ctx->Union2.State2.Symbol = sym;
+ ctx->Union2.State2.Freq = (Byte)(((unsigned)stats->Freq + 11) >> 3);
+ ctx->Union4.State4.Successor_0 = stats->Successor_0;
+ ctx->Union4.State4.Successor_1 = stats->Successor_1;
+ FreeUnits(p, stats, nu);
}
else
- Refresh(p, ctx, tmp, ctx->SummFreq > 16 * i);
+ {
+ Refresh(p, ctx, nu, ctx->Union2.SummFreq > 16 * (unsigned)ns);
+ }
}
+
return REF(ctx);
}
+
+
#ifdef PPMD8_FREEZE_SUPPORT
+
+/*
+RemoveBinContexts()
+ It conversts Successors at MaxOrder to another Contexts to NULL-Successors
+ It changes RAW-Successors to NULL-Successors
+ removes Bin Context without Successor, if suffix of that context is also binary.
+*/
+
static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order)
{
- CPpmd_State *s;
if (!ctx->NumStats)
{
- s = ONE_STATE(ctx);
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder)
- SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1));
+ CPpmd_State *s = ONE_STATE(ctx);
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder)
+ successor = RemoveBinContexts(p, CTX(successor), order + 1);
else
- SetSuccessor(s, 0);
+ successor = 0;
+ SetSuccessor(s, successor);
/* Suffix context can be removed already, since different (high-order)
Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */
- if (!SUCCESSOR(s) && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF))
+ if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF))
{
FreeUnits(p, ctx, 1);
return 0;
}
- else
- return REF(ctx);
}
-
- for (s = STATS(ctx) + ctx->NumStats; s >= STATS(ctx); s--)
- if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder)
- SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1));
- else
- SetSuccessor(s, 0);
+ else
+ {
+ CPpmd_State *s = STATS(ctx) + ctx->NumStats;
+ do
+ {
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder)
+ SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1));
+ else
+ SetSuccessor(s, 0);
+ }
+ while (--s >= STATS(ctx));
+ }
return REF(ctx);
}
+
#endif
+
+
static UInt32 GetUsedMemory(const CPpmd8 *p)
{
UInt32 v = 0;
@@ -544,7 +766,8 @@ static UInt32 GetUsedMemory(const CPpmd8 *p)
#define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1)
#endif
-static void RestoreModel(CPpmd8 *p, CTX_PTR c1
+
+static void RestoreModel(CPpmd8 *p, CTX_PTR ctxError
#ifdef PPMD8_FREEZE_SUPPORT
, CTX_PTR fSuccessor
#endif
@@ -553,36 +776,55 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR c1
CTX_PTR c;
CPpmd_State *s;
RESET_TEXT(0);
- for (c = p->MaxContext; c != c1; c = SUFFIX(c))
+
+ // we go here in cases of error of allocation for context (c1)
+ // Order(MinContext) < Order(ctxError) <= Order(MaxContext)
+
+ // We remove last symbol from each of contexts [p->MaxContext ... ctxError) contexts
+ // So we rollback all created (symbols) before error.
+ for (c = p->MaxContext; c != ctxError; c = SUFFIX(c))
if (--(c->NumStats) == 0)
{
s = STATS(c);
- c->Flags = (Byte)((c->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40));
- *ONE_STATE(c) = *s;
+ c->Flags = (Byte)((c->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(s->Symbol));
+ // *ONE_STATE(c) = *s;
+ c->Union2.State2.Symbol = s->Symbol;
+ c->Union2.State2.Freq = (Byte)(((unsigned)s->Freq + 11) >> 3);
+ c->Union4.State4.Successor_0 = s->Successor_0;
+ c->Union4.State4.Successor_1 = s->Successor_1;
+
SpecialFreeUnit(p, s);
- ONE_STATE(c)->Freq = (Byte)(((unsigned)ONE_STATE(c)->Freq + 11) >> 3);
}
else
- Refresh(p, c, (c->NumStats+3) >> 1, 0);
+ {
+ /* Refresh() can increase Escape_Freq on value of Freq of last symbol, that was added before error.
+ so the largest possible increase for Escape_Freq is (8) from value before ModelUpoadet() */
+ Refresh(p, c, ((unsigned)c->NumStats + 3) >> 1, 0);
+ }
+ // increase Escape Freq for context [ctxError ... p->MinContext)
for (; c != p->MinContext; c = SUFFIX(c))
- if (!c->NumStats)
- ONE_STATE(c)->Freq = (Byte)(ONE_STATE(c)->Freq - (ONE_STATE(c)->Freq >> 1));
- else if ((c->SummFreq += 4) > 128 + 4 * c->NumStats)
- Refresh(p, c, (c->NumStats + 2) >> 1, 1);
+ if (c->NumStats == 0)
+ {
+ // ONE_STATE(c)
+ c->Union2.State2.Freq = (Byte)(((unsigned)c->Union2.State2.Freq + 1) >> 1);
+ }
+ else if ((c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 4)) > 128 + 4 * c->NumStats)
+ Refresh(p, c, ((unsigned)c->NumStats + 2) >> 1, 1);
#ifdef PPMD8_FREEZE_SUPPORT
if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
{
p->MaxContext = fSuccessor;
- p->GlueCount += !(p->Stamps[1] & 1);
+ p->GlueCount += !(p->Stamps[1] & 1); // why?
}
else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE)
{
while (p->MaxContext->Suffix)
p->MaxContext = SUFFIX(p->MaxContext);
RemoveBinContexts(p, p->MaxContext, 0);
- p->RestoreMethod++;
+ // we change the current mode to (PPMD8_RESTORE_METHOD_FREEZE + 1)
+ p->RestoreMethod = PPMD8_RESTORE_METHOD_FREEZE + 1;
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
}
@@ -603,16 +845,19 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR c1
p->GlueCount = 0;
p->OrderFall = p->MaxOrder;
}
+ p->MinContext = p->MaxContext;
}
+
+
+MY_NO_INLINE
static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PTR c)
{
- CPpmd_State upState;
- Byte flags;
+
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
- /* fixed over Shkarin's code. Maybe it could work without + 1 too. */
- CPpmd_State *ps[PPMD8_MAX_ORDER + 1];
+ Byte newSym, newFreq, flags;
unsigned numPs = 0;
+ CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; /* fixed over Shkarin's code. Maybe it could work without + 1 too. */
if (!skip)
ps[numPs++] = p->FoundState;
@@ -622,19 +867,13 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
CPpmd_Void_Ref successor;
CPpmd_State *s;
c = SUFFIX(c);
- if (s1)
- {
- s = s1;
- s1 = NULL;
- }
+
+ if (s1) { s = s1; s1 = NULL; }
else if (c->NumStats != 0)
{
- for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++);
- if (s->Freq < MAX_FREQ - 9)
- {
- s->Freq++;
- c->SummFreq++;
- }
+ Byte sym = p->FoundState->Symbol;
+ for (s = STATS(c); s->Symbol != sym; s++);
+ if (s->Freq < MAX_FREQ - 9) { s->Freq++; c->Union2.SummFreq++; }
}
else
{
@@ -644,36 +883,54 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
successor = SUCCESSOR(s);
if (successor != upBranch)
{
+
c = CTX(successor);
if (numPs == 0)
+ {
+
+
return c;
+ }
break;
}
ps[numPs++] = s;
}
- upState.Symbol = *(const Byte *)Ppmd8_GetPtr(p, upBranch);
- SetSuccessor(&upState, upBranch + 1);
- flags = (Byte)(0x10 * (p->FoundState->Symbol >= 0x40) + 0x08 * (upState.Symbol >= 0x40));
-
+
+
+
+
+ newSym = *(const Byte *)Ppmd8_GetPtr(p, upBranch);
+ upBranch++;
+ flags = (Byte)(PPMD8_HiBitsFlag_4(p->FoundState->Symbol) + PPMD8_HiBitsFlag_3(newSym));
+
if (c->NumStats == 0)
- upState.Freq = ONE_STATE(c)->Freq;
+ newFreq = c->Union2.State2.Freq;
else
{
UInt32 cf, s0;
CPpmd_State *s;
- for (s = STATS(c); s->Symbol != upState.Symbol; s++);
- cf = s->Freq - 1;
- s0 = c->SummFreq - c->NumStats - cf;
- upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0)));
+ for (s = STATS(c); s->Symbol != newSym; s++);
+ cf = (UInt32)s->Freq - 1;
+ s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
+ /*
+
+
+ max(newFreq)= (s->Freq - 1), when (s0 == 1)
+
+
+ */
+ newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0)));
}
+
+
do
{
- /* Create Child */
- CTX_PTR c1; /* = AllocContext(p); */
+ CTX_PTR c1;
+ /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit)
- c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE);
+ c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0);
else
@@ -682,9 +939,11 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
if (!c1)
return NULL;
}
- c1->NumStats = 0;
c1->Flags = flags;
- *ONE_STATE(c1) = upState;
+ c1->NumStats = 0;
+ c1->Union2.State2.Symbol = newSym;
+ c1->Union2.State2.Freq = newFreq;
+ SetSuccessor(ONE_STATE(c1), upBranch);
c1->Suffix = REF(c);
SetSuccessor(ps[--numPs], REF(c1));
c = c1;
@@ -694,6 +953,7 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
return c;
}
+
static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
{
CPpmd_State *s = NULL;
@@ -739,8 +999,8 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
do { s++; } while (s->Symbol != p->FoundState->Symbol);
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
else
@@ -776,33 +1036,42 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
p->FoundState = s;
successor = CreateSuccessors(p, False, NULL, c);
- if (successor == NULL)
+ if (!successor)
SetSuccessor(s, 0);
else
SetSuccessor(s, REF(successor));
p->FoundState = s2;
}
- if (p->OrderFall == 1 && c1 == p->MaxContext)
{
- SetSuccessor(p->FoundState, SUCCESSOR(s));
- p->Text--;
+ CPpmd_Void_Ref successor = SUCCESSOR(s);
+ if (p->OrderFall == 1 && c1 == p->MaxContext)
+ {
+ SetSuccessor(p->FoundState, successor);
+ p->Text--;
+ }
+ if (successor == 0)
+ return NULL;
+ return CTX(successor);
}
- if (SUCCESSOR(s) == 0)
- return NULL;
- return CTX(SUCCESSOR(s));
}
-static void UpdateModel(CPpmd8 *p)
+
+
+void Ppmd8_UpdateModel(CPpmd8 *p);
+MY_NO_INLINE
+void Ppmd8_UpdateModel(CPpmd8 *p)
{
- CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState);
+ CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState);
CTX_PTR c;
unsigned s0, ns, fFreq = p->FoundState->Freq;
Byte flag, fSymbol = p->FoundState->Symbol;
+ {
CPpmd_State *s = NULL;
-
if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
{
+ /* Update Freqs in Suffix Context */
+
c = SUFFIX(p->MinContext);
if (c->NumStats == 0)
@@ -813,91 +1082,134 @@ static void UpdateModel(CPpmd8 *p)
}
else
{
+ Byte sym = p->FoundState->Symbol;
s = STATS(c);
- if (s->Symbol != p->FoundState->Symbol)
+
+ if (s->Symbol != sym)
{
- do { s++; } while (s->Symbol != p->FoundState->Symbol);
+ do
+ {
+
+ s++;
+ }
+ while (s->Symbol != sym);
+
if (s[0].Freq >= s[-1].Freq)
{
SwapStates(&s[0], &s[-1]);
s--;
}
}
+
if (s->Freq < MAX_FREQ - 9)
{
- s->Freq += 2;
- c->SummFreq += 2;
+ s->Freq = (Byte)(s->Freq + 2);
+ c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
}
}
}
c = p->MaxContext;
- if (p->OrderFall == 0 && fSuccessor)
+ if (p->OrderFall == 0 && minSuccessor)
{
CTX_PTR cs = CreateSuccessors(p, True, s, p->MinContext);
- if (cs == 0)
+ if (!cs)
{
SetSuccessor(p->FoundState, 0);
- RESTORE_MODEL(c, CTX(fSuccessor));
- }
- else
- {
- SetSuccessor(p->FoundState, REF(cs));
- p->MaxContext = cs;
+ RESTORE_MODEL(c, CTX(minSuccessor));
+ return;
}
+ SetSuccessor(p->FoundState, REF(cs));
+ p->MinContext = p->MaxContext = cs;
return;
}
- *p->Text++ = p->FoundState->Symbol;
- successor = REF(p->Text);
- if (p->Text >= p->UnitsStart)
+
+
+
{
- RESTORE_MODEL(c, CTX(fSuccessor)); /* check it */
- return;
+ Byte *text = p->Text;
+ *text++ = p->FoundState->Symbol;
+ p->Text = text;
+ if (text >= p->UnitsStart)
+ {
+ RESTORE_MODEL(c, CTX(minSuccessor)); /* check it */
+ return;
+ }
+ maxSuccessor = REF(text);
}
-
- if (!fSuccessor)
+
+ if (!minSuccessor)
{
CTX_PTR cs = ReduceOrder(p, s, p->MinContext);
- if (cs == NULL)
+ if (!cs)
{
- RESTORE_MODEL(c, 0);
+ RESTORE_MODEL(c, NULL);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
- else if ((Byte *)Ppmd8_GetPtr(p, fSuccessor) < p->UnitsStart)
+ else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart)
{
CTX_PTR cs = CreateSuccessors(p, False, s, p->MinContext);
- if (cs == NULL)
+ if (!cs)
{
- RESTORE_MODEL(c, 0);
+ RESTORE_MODEL(c, NULL);
return;
}
- fSuccessor = REF(cs);
+ minSuccessor = REF(cs);
}
if (--p->OrderFall == 0)
{
- successor = fSuccessor;
+ maxSuccessor = minSuccessor;
p->Text -= (p->MaxContext != p->MinContext);
}
#ifdef PPMD8_FREEZE_SUPPORT
else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
{
- successor = fSuccessor;
+ maxSuccessor = minSuccessor;
RESET_TEXT(0);
p->OrderFall = 0;
}
#endif
+ }
+
+
+
+
+
+
+
+
+
+
- s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - fFreq;
- flag = (Byte)(0x08 * (fSymbol >= 0x40));
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ flag = (Byte)(PPMD8_HiBitsFlag_3(fSymbol));
+ s0 = p->MinContext->Union2.SummFreq - (ns = p->MinContext->NumStats) - fFreq;
for (; c != p->MinContext; c = SUFFIX(c))
{
unsigned ns1;
- UInt32 cf, sf;
+ UInt32 sum;
+
if ((ns1 = c->NumStats) != 0)
{
if ((ns1 & 1) != 0)
@@ -911,91 +1223,133 @@ static void UpdateModel(CPpmd8 *p)
void *oldPtr;
if (!ptr)
{
- RESTORE_MODEL(c, CTX(fSuccessor));
+ RESTORE_MODEL(c, CTX(minSuccessor));
return;
}
oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU);
InsertNode(p, oldPtr, i);
- c->Stats = STATS_REF(ptr);
+ c->Union4.Stats = STATS_REF(ptr);
}
}
- c->SummFreq = (UInt16)(c->SummFreq + (3 * ns1 + 1 < ns));
+ sum = c->Union2.SummFreq;
+ /* max increase of Escape_Freq is 1 here.
+ an average increase is 1/3 per symbol */
+ sum += (3 * ns1 + 1 < ns);
+ /* original PPMdH uses 16-bit variable for (sum) here.
+ But (sum < ???). Do we need to truncate (sum) to 16-bit */
+ // sum = (UInt16)sum;
}
else
{
- CPpmd_State *s2 = (CPpmd_State*)AllocUnits(p, 0);
- if (!s2)
+
+ CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
+ if (!s)
{
- RESTORE_MODEL(c, CTX(fSuccessor));
+ RESTORE_MODEL(c, CTX(minSuccessor));
return;
}
- *s2 = *ONE_STATE(c);
- c->Stats = REF(s2);
- if (s2->Freq < MAX_FREQ / 4 - 1)
- s2->Freq <<= 1;
- else
- s2->Freq = MAX_FREQ - 4;
- c->SummFreq = (UInt16)(s2->Freq + p->InitEsc + (ns > 2));
- }
- cf = 2 * fFreq * (c->SummFreq + 6);
- sf = (UInt32)s0 + c->SummFreq;
- if (cf < 6 * sf)
- {
- cf = 1 + (cf > sf) + (cf >= 4 * sf);
- c->SummFreq += 4;
- }
- else
- {
- cf = 4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf);
- c->SummFreq = (UInt16)(c->SummFreq + cf);
+ {
+ unsigned freq = c->Union2.State2.Freq;
+ // s = *ONE_STATE(c);
+ s->Symbol = c->Union2.State2.Symbol;
+ s->Successor_0 = c->Union4.State4.Successor_0;
+ s->Successor_1 = c->Union4.State4.Successor_1;
+ // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
+ // (Successor_0 and Successor_1) in LE/BE.
+ c->Union4.Stats = REF(s);
+ if (freq < MAX_FREQ / 4 - 1)
+ freq <<= 1;
+ else
+ freq = MAX_FREQ - 4;
+
+ s->Freq = (Byte)freq;
+
+ sum = freq + p->InitEsc + (ns > 2); // Ppmd8 (> 2)
+ }
}
+
{
- CPpmd_State *s2 = STATS(c) + ns1 + 1;
- SetSuccessor(s2, successor);
- s2->Symbol = fSymbol;
- s2->Freq = (Byte)cf;
- c->Flags |= flag;
+ CPpmd_State *s = STATS(c) + ns1 + 1;
+ UInt32 cf = 2 * (sum + 6) * (UInt32)fFreq;
+ UInt32 sf = (UInt32)s0 + sum;
+ s->Symbol = fSymbol;
c->NumStats = (Byte)(ns1 + 1);
+ SetSuccessor(s, maxSuccessor);
+ c->Flags |= flag;
+ if (cf < 6 * sf)
+ {
+ cf = (unsigned)1 + (cf > sf) + (cf >= 4 * sf);
+ sum += 4;
+ /* It can add (1, 2, 3) to Escape_Freq */
+ }
+ else
+ {
+ cf = (unsigned)4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf);
+ sum += cf;
+ }
+
+ c->Union2.SummFreq = (UInt16)sum;
+ s->Freq = (Byte)cf;
}
+
}
- p->MaxContext = p->MinContext = CTX(fSuccessor);
+ p->MaxContext = p->MinContext = CTX(minSuccessor);
}
+
+
+MY_NO_INLINE
static void Rescale(CPpmd8 *p)
{
unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext);
CPpmd_State *s = p->FoundState;
+
+ /* Sort the list by Freq */
+ if (s != stats)
{
CPpmd_State tmp = *s;
- for (; s != stats; s--)
+ do
s[0] = s[-1];
+ while (--s != stats);
*s = tmp;
}
- escFreq = p->MinContext->SummFreq - s->Freq;
- s->Freq += 4;
- adder = (p->OrderFall != 0
- #ifdef PPMD8_FREEZE_SUPPORT
- || p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE
- #endif
- );
- s->Freq = (Byte)((s->Freq + adder) >> 1);
+
sumFreq = s->Freq;
+ escFreq = p->MinContext->Union2.SummFreq - sumFreq;
+
+
+
+
+
+ adder = (p->OrderFall != 0);
+
+ #ifdef PPMD8_FREEZE_SUPPORT
+ adder |= (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE);
+ #endif
+
+ sumFreq = (sumFreq + 4 + adder) >> 1;
i = p->MinContext->NumStats;
+ s->Freq = (Byte)sumFreq;
+
do
{
- escFreq -= (++s)->Freq;
- s->Freq = (Byte)((s->Freq + adder) >> 1);
- sumFreq += s->Freq;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = (++s)->Freq;
+ escFreq -= freq;
+ freq = (freq + adder) >> 1;
+ sumFreq += freq;
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
+ CPpmd_State tmp = *s;
CPpmd_State *s1 = s;
- CPpmd_State tmp = *s1;
do
+ {
s1[0] = s1[-1];
- while (--s1 != stats && tmp.Freq > s1[-1].Freq);
+ }
+ while (--s1 != stats && freq > s1[-1].Freq);
*s1 = tmp;
}
}
@@ -1003,49 +1357,89 @@ static void Rescale(CPpmd8 *p)
if (s->Freq == 0)
{
- unsigned numStats = p->MinContext->NumStats;
- unsigned n0, n1;
- do { i++; } while ((--s)->Freq == 0);
+ /* Remove all items with Freq == 0 */
+ CPpmd8_Context *mc;
+ unsigned numStats, numStatsNew, n0, n1;
+
+ i = 0; do { i++; } while ((--s)->Freq == 0);
+
+
+
+
escFreq += i;
- p->MinContext->NumStats = (Byte)(p->MinContext->NumStats - i);
- if (p->MinContext->NumStats == 0)
+ mc = p->MinContext;
+ numStats = mc->NumStats;
+ numStatsNew = numStats - i;
+ mc->NumStats = (Byte)(numStatsNew);
+ n0 = (numStats + 2) >> 1;
+
+ if (numStatsNew == 0)
{
- CPpmd_State tmp = *stats;
- tmp.Freq = (Byte)((2 * tmp.Freq + escFreq - 1) / escFreq);
- if (tmp.Freq > MAX_FREQ / 3)
- tmp.Freq = MAX_FREQ / 3;
- InsertNode(p, stats, U2I((numStats + 2) >> 1));
- p->MinContext->Flags = (Byte)((p->MinContext->Flags & 0x10) + 0x08 * (tmp.Symbol >= 0x40));
- *(p->FoundState = ONE_STATE(p->MinContext)) = tmp;
+
+ unsigned freq = (2 * (unsigned)stats->Freq + escFreq - 1) / escFreq;
+ if (freq > MAX_FREQ / 3)
+ freq = MAX_FREQ / 3;
+ mc->Flags = (Byte)((mc->Flags & FLAG_PREV_HIGH) + PPMD8_HiBitsFlag_3(stats->Symbol));
+
+
+
+
+
+ s = ONE_STATE(mc);
+ *s = *stats;
+ s->Freq = (Byte)freq;
+ p->FoundState = s;
+ InsertNode(p, stats, U2I(n0));
return;
}
- n0 = (numStats + 2) >> 1;
- n1 = (p->MinContext->NumStats + 2) >> 1;
+
+ n1 = (numStatsNew + 2) >> 1;
if (n0 != n1)
- p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
- p->MinContext->Flags &= ~0x08;
- p->MinContext->Flags |= 0x08 * ((s = STATS(p->MinContext))->Symbol >= 0x40);
- i = p->MinContext->NumStats;
- do { p->MinContext->Flags |= 0x08*((++s)->Symbol >= 0x40); } while (--i);
+ mc->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
+ {
+ // here we are for max order only. So Ppmd8_MakeEscFreq() doesn't use mc->Flags
+ // but we still need current (Flags & FLAG_PREV_HIGH), if we will convert context to 1-symbol context later.
+ /*
+ unsigned flags = HiBits_Prepare((s = STATS(mc))->Symbol);
+ i = mc->NumStats;
+ do { flags |= HiBits_Prepare((++s)->Symbol); } while (--i);
+ mc->Flags = (Byte)((mc->Flags & ~FLAG_SYM_HIGH) + HiBits_Convert_3(flags));
+ */
+ }
+ }
+
+
+
+
+
+
+ {
+ CPpmd8_Context *mc = p->MinContext;
+ mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
+ mc->Flags |= FLAG_RESCALED;
+ p->FoundState = STATS(mc);
}
- p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
- p->MinContext->Flags |= 0x4;
- p->FoundState = STATS(p->MinContext);
}
+
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
{
CPpmd_See *see;
- if (p->MinContext->NumStats != 0xFF)
+ const CPpmd8_Context *mc = p->MinContext;
+ unsigned numStats = mc->NumStats;
+ if (numStats != 0xFF)
{
- see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)(unsigned)p->MinContext->NumStats + 2] - 3] +
- (p->MinContext->SummFreq > 11 * ((unsigned)p->MinContext->NumStats + 1)) +
- 2 * (unsigned)(2 * (unsigned)p->MinContext->NumStats <
- ((unsigned)SUFFIX(p->MinContext)->NumStats + numMasked1)) +
- p->MinContext->Flags;
+ // (3 <= numStats + 2 <= 256) (3 <= NS2Indx[3] and NS2Indx[256] === 26)
+ see = p->See[(size_t)(unsigned)p->NS2Indx[(size_t)numStats + 2] - 3]
+ + (mc->Union2.SummFreq > 11 * (numStats + 1))
+ + 2 * (unsigned)(2 * numStats < ((unsigned)SUFFIX(mc)->NumStats + numMasked1))
+ + mc->Flags;
+
{
- unsigned r = (see->Summ >> see->Shift);
- see->Summ = (UInt16)(see->Summ - r);
+ // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
+ unsigned summ = (UInt16)see->Summ; // & 0xFFFF
+ unsigned r = (summ >> see->Shift);
+ see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
}
}
@@ -1057,67 +1451,87 @@ CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
return see;
}
+
static void NextContext(CPpmd8 *p)
{
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
- if (p->OrderFall == 0 && (Byte *)c >= p->UnitsStart)
- p->MinContext = p->MaxContext = c;
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
else
- {
- UpdateModel(p);
- p->MinContext = p->MaxContext;
- }
+ Ppmd8_UpdateModel(p);
}
+
void Ppmd8_Update1(CPpmd8 *p)
{
CPpmd_State *s = p->FoundState;
- s->Freq += 4;
- p->MinContext->SummFreq += 4;
- if (s[0].Freq > s[-1].Freq)
+ unsigned freq = s->Freq;
+ freq += 4;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > s[-1].Freq)
{
- SwapStates(&s[0], &s[-1]);
+ SwapStates(s, &s[-1]);
p->FoundState = --s;
- if (s->Freq > MAX_FREQ)
+ if (freq > MAX_FREQ)
Rescale(p);
}
NextContext(p);
}
+
void Ppmd8_Update1_0(CPpmd8 *p)
{
- p->PrevSuccess = (2 * p->FoundState->Freq >= p->MinContext->SummFreq);
- p->RunLength += p->PrevSuccess;
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
+ CPpmd_State *s = p->FoundState;
+ CPpmd8_Context *mc = p->MinContext;
+ unsigned freq = s->Freq;
+ unsigned summFreq = mc->Union2.SummFreq;
+ p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=)
+ p->RunLength += (int)p->PrevSuccess;
+ mc->Union2.SummFreq = (UInt16)(summFreq + 4);
+ freq += 4;
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
Rescale(p);
NextContext(p);
}
+
+/*
void Ppmd8_UpdateBin(CPpmd8 *p)
{
- p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 196));
+ unsigned freq = p->FoundState->Freq;
+ p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
p->PrevSuccess = 1;
p->RunLength++;
NextContext(p);
}
+*/
void Ppmd8_Update2(CPpmd8 *p)
{
- p->MinContext->SummFreq += 4;
- if ((p->FoundState->Freq += 4) > MAX_FREQ)
- Rescale(p);
+ CPpmd_State *s = p->FoundState;
+ unsigned freq = s->Freq;
+ freq += 4;
p->RunLength = p->InitRL;
- UpdateModel(p);
- p->MinContext = p->MaxContext;
+ p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
+ s->Freq = (Byte)freq;
+ if (freq > MAX_FREQ)
+ Rescale(p);
+ Ppmd8_UpdateModel(p);
}
/* H->I changes:
NS2Indx
- GlewCount, and Glue method
+ GlueCount, and Glue method
BinSum
See / EscFreq
CreateSuccessors updates more suffix contexts
- UpdateModel consts.
+ Ppmd8_UpdateModel consts.
PrevSuccess Update
+
+Flags:
+ (1 << 2) - the Context was Rescaled
+ (1 << 3) - there is symbol in Stats with (sym >= 0x40) in
+ (1 << 4) - main symbol of context is (sym >= 0x40)
*/
diff --git a/multiarc/src/formats/7z/C/Ppmd8.h b/multiarc/src/formats/7z/C/Ppmd8.h
index 51c497dc..fe93fe7c 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8.h
+++ b/multiarc/src/formats/7z/C/Ppmd8.h
@@ -1,5 +1,5 @@
-/* Ppmd8.h -- PPMdI codec
-2018-07-04 : Igor Pavlov : Public domain
+/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -14,35 +14,45 @@ EXTERN_C_BEGIN
#define PPMD8_MIN_ORDER 2
#define PPMD8_MAX_ORDER 16
+
+
+
struct CPpmd8_Context_;
-typedef
- #ifdef PPMD_32BIT
- struct CPpmd8_Context_ *
- #else
- UInt32
- #endif
- CPpmd8_Context_Ref;
+typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref;
-#pragma pack(push, 1)
+// MY_CPU_pragma_pack_push_1
typedef struct CPpmd8_Context_
{
Byte NumStats;
Byte Flags;
- UInt16 SummFreq;
- CPpmd_State_Ref Stats;
+
+ union
+ {
+ UInt16 SummFreq;
+ CPpmd_State2 State2;
+ } Union2;
+
+ union
+ {
+ CPpmd_State_Ref Stats;
+ CPpmd_State4 State4;
+ } Union4;
+
CPpmd8_Context_Ref Suffix;
} CPpmd8_Context;
-#pragma pack(pop)
+// MY_CPU_pragma_pop
-#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
+#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
-/* The BUG in Shkarin's code for FREEZE mode was fixed, but that fixed
- code is not compatible with original code for some files compressed
+/* PPMdI code rev.2 contains the fix over PPMdI code rev.1.
+ But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed
in FREEZE mode. So we disable FREEZE mode support. */
+// #define PPMD8_FREEZE_SUPPORT
+
enum
{
PPMD8_RESTORE_METHOD_RESTART,
@@ -50,22 +60,28 @@ enum
#ifdef PPMD8_FREEZE_SUPPORT
, PPMD8_RESTORE_METHOD_FREEZE
#endif
+ , PPMD8_RESTORE_METHOD_UNSUPPPORTED
};
+
+
+
+
+
+
+
typedef struct
{
CPpmd8_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
- unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder;
+ unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
- Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
- unsigned RestoreMethod;
+ Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
- /* Range Coder */
UInt32 Range;
UInt32 Code;
UInt32 Low;
@@ -75,16 +91,18 @@ typedef struct
IByteOut *Out;
} Stream;
- Byte Indx2Units[PPMD_NUM_INDEXES];
+ Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
UInt32 Stamps[PPMD_NUM_INDEXES];
-
Byte NS2BSIndx[256], NS2Indx[260];
+ Byte ExpEscape[16];
CPpmd_See DummySee, See[24][32];
UInt16 BinSumm[25][64];
+
} CPpmd8;
+
void Ppmd8_Construct(CPpmd8 *p);
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc);
@@ -94,43 +112,69 @@ void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod);
/* ---------- Internal Functions ---------- */
-extern const Byte PPMD8_kExpEscape[16];
-
-#ifdef PPMD_32BIT
- #define Ppmd8_GetPtr(p, ptr) (ptr)
- #define Ppmd8_GetContext(p, ptr) (ptr)
- #define Ppmd8_GetStats(p, ctx) ((ctx)->Stats)
-#else
- #define Ppmd8_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
- #define Ppmd8_GetContext(p, offs) ((CPpmd8_Context *)Ppmd8_GetPtr((p), (offs)))
- #define Ppmd8_GetStats(p, ctx) ((CPpmd_State *)Ppmd8_GetPtr((p), ((ctx)->Stats)))
-#endif
+#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
+#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context)
+#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd8_Update1(CPpmd8 *p);
void Ppmd8_Update1_0(CPpmd8 *p);
void Ppmd8_Update2(CPpmd8 *p);
-void Ppmd8_UpdateBin(CPpmd8 *p);
+
+
+
+
+
#define Ppmd8_GetBinSumm(p) \
- &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]][ \
- p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
- p->PrevSuccess + p->MinContext->Flags + ((p->RunLength >> 26) & 0x20)]
+ &p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \
+ [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
+ + p->MinContext->Flags ]
+
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale);
+/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases,
+ where the original PPMdI code can give "Divide by Zero" operation.
+ We use the following fix to allow correct working of encoder and decoder in any cases.
+ We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */
+#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range;
+
+
/* ---------- Decode ---------- */
-BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p);
+#define PPMD8_SYM_END (-1)
+#define PPMD8_SYM_ERROR (-2)
+
+/*
+You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init()
+
+Ppmd8_DecodeSymbol()
+out:
+ >= 0 : decoded byte
+ -1 : PPMD8_SYM_END : End of payload marker
+ -2 : PPMD8_SYM_ERROR : Data error
+*/
+
+
+BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p);
#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
-int Ppmd8_DecodeSymbol(CPpmd8 *p); /* returns: -1 as EndMarker, -2 as DataError */
+int Ppmd8_DecodeSymbol(CPpmd8 *p);
+
+
+
+
+
+
/* ---------- Encode ---------- */
-#define Ppmd8_RangeEnc_Init(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
-void Ppmd8_RangeEnc_FlushData(CPpmd8 *p);
-void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); /* symbol = -1 means EndMarker */
+#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
+void Ppmd8_Flush_RangeEnc(CPpmd8 *p);
+void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol);
+
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Ppmd8Dec.c b/multiarc/src/formats/7z/C/Ppmd8Dec.c
index a18ec677..d205de28 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8Dec.c
+++ b/multiarc/src/formats/7z/C/Ppmd8Dec.c
@@ -1,5 +1,5 @@
-/* Ppmd8Dec.c -- PPMdI Decoder
-2018-07-04 : Igor Pavlov : Public domain
+/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,147 +11,269 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
-BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p)
+#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
+
+BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
{
unsigned i;
- p->Low = 0;
- p->Range = 0xFFFFFFFF;
p->Code = 0;
+ p->Range = 0xFFFFFFFF;
+ p->Low = 0;
+
for (i = 0; i < 4; i++)
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
+ p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
-static UInt32 RangeDec_GetThreshold(CPpmd8 *p, UInt32 total)
-{
- return p->Code / (p->Range /= total);
-}
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
+ p->Code = (p->Code << 8) | READ_BYTE(p); \
+ p->Range <<= 8; p->Low <<= 8; }
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+#define R p
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
{
- start *= p->Range;
- p->Low += start;
- p->Code -= start;
- p->Range *= size;
-
- while ((p->Low ^ (p->Low + p->Range)) < kTop ||
- (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
- {
- p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
- p->Range <<= 8;
- p->Low <<= 8;
- }
+ start *= R->Range;
+ R->Low += start;
+ R->Code -= start;
+ R->Range *= size;
+ RC_NORM_LOCAL(R)
}
-#define MASK(sym) ((signed char *)charMask)[sym]
+#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
+#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
+#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
+
+
+#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
+typedef CPpmd8_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+void Ppmd8_UpdateModel(CPpmd8 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
int Ppmd8_DecodeSymbol(CPpmd8 *p)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
- if ((count = RangeDec_GetThreshold(p, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ PPMD8_CORRECT_SUM_RANGE(p, summFreq)
+
+
+ count = RC_GetThreshold(summFreq);
+ hiCnt = count;
+
+ if ((Int32)(count -= s->Freq) < 0)
{
- Byte symbol;
- RangeDec_Decode(p, 0, s->Freq);
+ Byte sym;
+ RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update1_0(p);
- return symbol;
+ return sym;
}
+
p->PrevSuccess = 0;
i = p->MinContext->NumStats;
+
do
{
- if ((hiCnt += (++s)->Freq) > count)
+ if ((Int32)(count -= (++s)->Freq) < 0)
{
- Byte symbol;
- RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update1(p);
- return symbol;
+ return sym;
}
}
while (--i);
- if (count >= p->MinContext->SummFreq)
- return -2;
- RangeDec_Decode(p, hiCnt, p->MinContext->SummFreq - hiCnt);
+
+ if (hiCnt >= summFreq)
+ return PPMD8_SYM_ERROR;
+
+ hiCnt -= count;
+ RC_Decode(hiCnt, summFreq - hiCnt);
+
+
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats;
- do { MASK((--s)->Symbol) = 0; } while (--i);
+ // i = p->MinContext->NumStats - 1;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
+ CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt16 *prob = Ppmd8_GetBinSumm(p);
- if (((p->Code / (p->Range >>= 14)) < *prob))
+ UInt32 pr = *prob;
+ UInt32 size0 = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
+
+ if (R->Code < size0)
{
- Byte symbol;
- RangeDec_Decode(p, 0, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- symbol = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
- Ppmd8_UpdateBin(p);
- return symbol;
+ Byte sym;
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+
+ // RangeDec_DecodeBit0(size0);
+ R->Range = size0;
+ RC_NORM(R)
+
+
+
+ // sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
+ // Ppmd8_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ sym = s->Symbol;
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 196));
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd8_UpdateModel(p);
+ }
+ return sym;
}
- RangeDec_Decode(p, *prob, (1 << 14) - *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+
+ // RangeDec_DecodeBit1(rc2, size0);
+ R->Low += size0;
+ R->Code -= size0;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
+ RC_NORM_LOCAL(R)
+
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
+
for (;;)
{
- CPpmd_State *ps[256], *s;
+ CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
+ UInt32 freqSum2;
CPpmd_See *see;
- unsigned i, num, numMasked = p->MinContext->NumStats;
+ CPpmd8_Context *mc;
+ unsigned numMasked;
+ RC_NORM_REMOTE(R)
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
- return -1;
- p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
+ if (!mc->Suffix)
+ return PPMD8_SYM_END;
+ mc = Ppmd8_GetContext(p, mc->Suffix);
}
- while (p->MinContext->NumStats == numMasked);
- hiCnt = 0;
- s = Ppmd8_GetStats(p, p->MinContext);
- i = 0;
- num = p->MinContext->NumStats - numMasked;
- do
+ while (mc->NumStats == numMasked);
+
+ s = Ppmd8_GetStats(p, mc);
+
{
- int k = (int)(MASK(s->Symbol));
- hiCnt += (s->Freq & k);
- ps[i] = s++;
- i -= k;
+ unsigned num = (unsigned)mc->NumStats + 1;
+ unsigned num2 = num / 2;
+
+ num &= 1;
+ hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
+ s += num;
+ p->MinContext = mc;
+
+ do
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ }
+ while (--num2);
}
- while (i != num);
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
- count = RangeDec_GetThreshold(p, freqSum);
+ freqSum2 = freqSum;
+ PPMD8_CORRECT_SUM_RANGE(R, freqSum2);
+
+
+ count = RC_GetThreshold(freqSum2);
if (count < hiCnt)
{
- Byte symbol;
- CPpmd_State **pps = ps;
- for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
- s = *pps;
- RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
+ Byte sym;
+ // Ppmd_See_Update(see); // new (see->Summ) value can overflow over 16-bits in some rare cases
+ s = Ppmd8_GetStats(p, p->MinContext);
+ hiCnt = count;
+
+
+ {
+ for (;;)
+ {
+ count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
+ }
+ }
+ s--;
+ RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
+
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
- symbol = s->Symbol;
+ sym = s->Symbol;
Ppmd8_Update2(p);
- return symbol;
+ return sym;
}
- if (count >= freqSum)
- return -2;
- RangeDec_Decode(p, hiCnt, freqSum - hiCnt);
+
+ if (count >= freqSum2)
+ return PPMD8_SYM_ERROR;
+
+ RC_Decode(hiCnt, freqSum2 - hiCnt);
+
+ // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
+ // new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
- do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
+
+ s = Ppmd8_GetStats(p, p->MinContext);
+ s2 = s + p->MinContext->NumStats + 1;
+ do
+ {
+ MASK(s->Symbol) = 0;
+ s++;
+ }
+ while (s != s2);
}
}
diff --git a/multiarc/src/formats/7z/C/Ppmd8Enc.c b/multiarc/src/formats/7z/C/Ppmd8Enc.c
index 1cbc17f1..32ff8052 100644..100755
--- a/multiarc/src/formats/7z/C/Ppmd8Enc.c
+++ b/multiarc/src/formats/7z/C/Ppmd8Enc.c
@@ -1,5 +1,5 @@
-/* Ppmd8Enc.c -- PPMdI Encoder
-2017-04-03 : Igor Pavlov : Public domain
+/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
+2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,59 +11,100 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
-void Ppmd8_RangeEnc_FlushData(CPpmd8 *p)
+#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
+
+void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
{
unsigned i;
for (i = 0; i < 4; i++, p->Low <<= 8 )
- IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
+ WRITE_BYTE(p);
}
-static void RangeEnc_Normalize(CPpmd8 *p)
-{
- while ((p->Low ^ (p->Low + p->Range)) < kTop ||
- (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
- {
- IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
- p->Range <<= 8;
- p->Low <<= 8;
- }
-}
+
+
+
+
+#define RC_NORM(p) \
+ while ((p->Low ^ (p->Low + p->Range)) < kTop \
+ || (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \
+ { WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; }
+
+
+
+
+
+
+
+
+
+
+
+
+
+// we must use only one type of Normalization from two: LOCAL or REMOTE
+#define RC_NORM_LOCAL(p) // RC_NORM(p)
+#define RC_NORM_REMOTE(p) RC_NORM(p)
+
+// #define RC_PRE(total) p->Range /= total;
+// #define RC_PRE(total)
+
+#define R p
+
+
+
+
+MY_FORCE_INLINE
+// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
{
- p->Low += start * (p->Range /= total);
- p->Range *= size;
- RangeEnc_Normalize(p);
+ R->Low += start * (R->Range /= total);
+ R->Range *= size;
+ RC_NORM_LOCAL(R);
}
-static void RangeEnc_EncodeBit_0(CPpmd8 *p, UInt32 size0)
-{
- p->Range >>= 14;
- p->Range *= size0;
- RangeEnc_Normalize(p);
-}
-static void RangeEnc_EncodeBit_1(CPpmd8 *p, UInt32 size0)
-{
- p->Low += size0 * (p->Range >>= 14);
- p->Range *= ((1 << 14) - size0);
- RangeEnc_Normalize(p);
-}
-#define MASK(sym) ((signed char *)charMask)[sym]
+
+
+
+
+
+#define RC_Encode(start, size, total) RangeEnc_Encode(p, start, size, total);
+#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total); RC_NORM_REMOTE(p);
+
+#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
+
+typedef CPpmd8_Context * CTX_PTR;
+#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
+
+void Ppmd8_UpdateModel(CPpmd8 *p);
+
+#define MASK(sym) ((unsigned char *)charMask)[sym]
+
+// MY_FORCE_INLINE
+// static
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
+
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
+ UInt32 summFreq = p->MinContext->Union2.SummFreq;
+
+ PPMD8_CORRECT_SUM_RANGE(p, summFreq)
+
+ // RC_PRE(summFreq);
+
if (s->Symbol == symbol)
{
- RangeEnc_Encode(p, 0, s->Freq, p->MinContext->SummFreq);
+
+ RC_EncodeFinal(0, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1_0(p);
return;
@@ -75,7 +116,8 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
if ((++s)->Symbol == symbol)
{
- RangeEnc_Encode(p, sum, s->Freq, p->MinContext->SummFreq);
+
+ RC_EncodeFinal(sum, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1(p);
return;
@@ -84,80 +126,189 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
}
while (--i);
+
+ RC_Encode(sum, summFreq - sum, summFreq);
+
+
PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- i = p->MinContext->NumStats;
- do { MASK((--s)->Symbol) = 0; } while (--i);
- RangeEnc_Encode(p, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
+ // MASK(s->Symbol) = 0;
+ // i = p->MinContext->NumStats;
+ // do { MASK((--s)->Symbol) = 0; } while (--i);
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
else
{
UInt16 *prob = Ppmd8_GetBinSumm(p);
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
+ UInt32 pr = *prob;
+ UInt32 bound = (R->Range >> 14) * pr;
+ pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
- RangeEnc_EncodeBit_0(p, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
- p->FoundState = s;
- Ppmd8_UpdateBin(p);
+ *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
+ // RangeEnc_EncodeBit_0(p, bound);
+ R->Range = bound;
+ RC_NORM(R);
+
+ // p->FoundState = s;
+ // Ppmd8_UpdateBin(p);
+ {
+ unsigned freq = s->Freq;
+ CTX_PTR c = CTX(SUCCESSOR(s));
+ p->FoundState = s;
+ p->PrevSuccess = 1;
+ p->RunLength++;
+ s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
+ // NextContext(p);
+ if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
+ p->MaxContext = p->MinContext = c;
+ else
+ Ppmd8_UpdateModel(p);
+ }
return;
}
- else
- {
- RangeEnc_EncodeBit_1(p, *prob);
- *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
- p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
- PPMD_SetAllBitsIn256Bytes(charMask);
- MASK(s->Symbol) = 0;
- p->PrevSuccess = 0;
- }
+
+ *prob = (UInt16)pr;
+ p->InitEsc = p->ExpEscape[pr >> 10];
+ // RangeEnc_EncodeBit_1(p, bound);
+ R->Low += bound;
+ R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
+ RC_NORM_LOCAL(R)
+
+ PPMD_SetAllBitsIn256Bytes(charMask);
+ MASK(s->Symbol) = 0;
+ p->PrevSuccess = 0;
}
+
for (;;)
{
- UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
- UInt32 sum;
- unsigned i, numMasked = p->MinContext->NumStats;
+ UInt32 sum, escFreq;
+ CPpmd8_Context *mc;
+ unsigned i, numMasked;
+
+ RC_NORM_REMOTE(p)
+
+ mc = p->MinContext;
+ numMasked = mc->NumStats;
+
do
{
p->OrderFall++;
- if (!p->MinContext->Suffix)
+ if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
- p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
+ mc = Ppmd8_GetContext(p, mc->Suffix);
+
}
- while (p->MinContext->NumStats == numMasked);
+ while (mc->NumStats == numMasked);
+ p->MinContext = mc;
+
see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
s = Ppmd8_GetStats(p, p->MinContext);
sum = 0;
- i = p->MinContext->NumStats + 1;
+ i = (unsigned)p->MinContext->NumStats + 1;
+
do
{
- int cur = s->Symbol;
- if (cur == symbol)
+ unsigned cur = s->Symbol;
+ if ((int)cur == symbol)
{
UInt32 low = sum;
- CPpmd_State *s1 = s;
- do
+ UInt32 freq = s->Freq;
+ unsigned num2;
+
+ Ppmd_See_Update(see);
+ p->FoundState = s;
+ sum += escFreq;
+
+ num2 = i / 2;
+ i &= 1;
+ sum += freq & (0 - (UInt32)i);
+ if (num2 != 0)
{
- sum += (s->Freq & (int)(MASK(s->Symbol)));
- s++;
+ s += i;
+ for (;;)
+ {
+ unsigned sym0 = s[0].Symbol;
+ unsigned sym1 = s[1].Symbol;
+ s += 2;
+ sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
+ sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
+ if (--num2 == 0)
+ break;
+ }
}
- while (--i);
- RangeEnc_Encode(p, low, s1->Freq, sum + escFreq);
- Ppmd_See_Update(see);
- p->FoundState = s1;
+
+ PPMD8_CORRECT_SUM_RANGE(p, sum);
+
+ RC_EncodeFinal(low, freq, sum);
Ppmd8_Update2(p);
return;
}
- sum += (s->Freq & (int)(MASK(cur)));
- MASK(cur) = 0;
+ sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
- RangeEnc_Encode(p, sum, escFreq, sum + escFreq);
- see->Summ = (UInt16)(see->Summ + sum + escFreq);
+ {
+ UInt32 total = sum + escFreq;
+ see->Summ = (UInt16)(see->Summ + total);
+ PPMD8_CORRECT_SUM_RANGE(p, total);
+
+ RC_Encode(sum, total - sum, total);
+ }
+
+ {
+ CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
+ s--;
+ MASK(s->Symbol) = 0;
+ do
+ {
+ unsigned sym0 = s2[0].Symbol;
+ unsigned sym1 = s2[1].Symbol;
+ s2 += 2;
+ MASK(sym0) = 0;
+ MASK(sym1) = 0;
+ }
+ while (s2 < s);
+ }
}
}
diff --git a/multiarc/src/formats/7z/C/Precomp.h b/multiarc/src/formats/7z/C/Precomp.h
index e8ff8b40..e8ff8b40 100644..100755
--- a/multiarc/src/formats/7z/C/Precomp.h
+++ b/multiarc/src/formats/7z/C/Precomp.h
diff --git a/multiarc/src/formats/7z/C/RotateDefs.h b/multiarc/src/formats/7z/C/RotateDefs.h
index 8f01d1a6..8f01d1a6 100644..100755
--- a/multiarc/src/formats/7z/C/RotateDefs.h
+++ b/multiarc/src/formats/7z/C/RotateDefs.h
diff --git a/multiarc/src/formats/7z/C/Sha1.c b/multiarc/src/formats/7z/C/Sha1.c
index 96b5e787..9665b5b5 100644..100755
--- a/multiarc/src/formats/7z/C/Sha1.c
+++ b/multiarc/src/formats/7z/C/Sha1.c
@@ -1,5 +1,5 @@
/* Sha1.c -- SHA-1 Hash
-2017-04-03 : Igor Pavlov : Public domain
+2021-07-13 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,331 +10,464 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l
#include "RotateDefs.h"
#include "Sha1.h"
-// define it for speed optimization
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// #define USE_MY_MM
+#endif
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1200
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#elif defined(MY_CPU_ARM_OR_ARM64)
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#endif
+
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+#ifdef _SHA_SUPPORTED
+ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+ static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
+ static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
+
+ #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
+#else
+ #define UPDATE_BLOCKS(p) Sha1_UpdateBlocks
+#endif
+
+
+BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
+{
+ SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
+
+ #ifdef _SHA_SUPPORTED
+ if (algo != SHA1_ALGO_SW)
+ {
+ if (algo == SHA1_ALGO_DEFAULT)
+ func = g_FUNC_UPDATE_BLOCKS;
+ else
+ {
+ if (algo != SHA1_ALGO_HW)
+ return False;
+ func = g_FUNC_UPDATE_BLOCKS_HW;
+ if (!func)
+ return False;
+ }
+ }
+ #else
+ if (algo > 1)
+ return False;
+ #endif
+
+ p->func_UpdateBlocks = func;
+ return True;
+}
+
+
+/* define it for speed optimization */
// #define _SHA1_UNROLL
+// allowed unroll steps: (1, 2, 4, 5, 20)
+
#ifdef _SHA1_UNROLL
- #define kNumW 16
- #define WW(i) W[(i)&15]
+ #define STEP_PRE 20
+ #define STEP_MAIN 20
#else
+ #define _SHA1_BIG_W
+ #define STEP_PRE 5
+ #define STEP_MAIN 5
+#endif
+
+
+#ifdef _SHA1_BIG_W
#define kNumW 80
- #define WW(i) W[i]
+ #define w(i) W[i]
+#else
+ #define kNumW 16
+ #define w(i) W[(i)&15]
#endif
-#define w0(i) (W[i] = data[i])
+#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
+#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))
-#define w1(i) (WW(i) = rotlFixed(WW((i)-3) ^ WW((i)-8) ^ WW((i)-14) ^ WW((i)-16), 1))
+#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) )
+#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) )
+#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
+#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) )
-#define f1(x,y,z) (z^(x&(y^z)))
-#define f2(x,y,z) (x^y^z)
-#define f3(x,y,z) ((x&y)|(z&(x|y)))
-#define f4(x,y,z) (x^y^z)
+/*
+#define T1(fx, ww) \
+ tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \
+ e = d; \
+ d = c; \
+ c = rotlFixed(b, 30); \
+ b = a; \
+ a = tmp; \
+*/
-#define RK(a,b,c,d,e, fx, w, k) e += fx(b,c,d) + w + k + rotlFixed(a,5); b = rotlFixed(b,30);
+#define T5(a,b,c,d,e, fx, ww) \
+ e += fx(b,c,d) + ww + rotlFixed(a, 5); \
+ b = rotlFixed(b, 30); \
-#define R0(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w0(i), 0x5A827999)
-#define R1(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w1(i), 0x5A827999)
-#define R2(a,b,c,d,e, i) RK(a,b,c,d,e, f2, w1(i), 0x6ED9EBA1)
-#define R3(a,b,c,d,e, i) RK(a,b,c,d,e, f3, w1(i), 0x8F1BBCDC)
-#define R4(a,b,c,d,e, i) RK(a,b,c,d,e, f4, w1(i), 0xCA62C1D6)
-#define RX_1_4(rx1, rx4, i) \
- rx1(a,b,c,d,e, i); \
- rx4(e,a,b,c,d, i+1); \
- rx4(d,e,a,b,c, i+2); \
- rx4(c,d,e,a,b, i+3); \
- rx4(b,c,d,e,a, i+4); \
+/*
+#define R1(i, fx, wx) \
+ T1 ( fx, wx(i)); \
-#define RX_5(rx, i) RX_1_4(rx, rx, i);
+#define R2(i, fx, wx) \
+ R1 ( (i) , fx, wx); \
+ R1 ( (i) + 1, fx, wx); \
-#ifdef _SHA1_UNROLL
+#define R4(i, fx, wx) \
+ R2 ( (i) , fx, wx); \
+ R2 ( (i) + 2, fx, wx); \
+*/
+
+#define M5(i, fx, wx0, wx1) \
+ T5 ( a,b,c,d,e, fx, wx0((i) ) ); \
+ T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
+ T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
+ T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
+ T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \
- #define RX_15 \
- RX_5(R0, 0); \
- RX_5(R0, 5); \
- RX_5(R0, 10);
+#define R5(i, fx, wx) \
+ M5 ( i, fx, wx, wx) \
+
+
+#if STEP_PRE > 5
+
+ #define R20_START \
+ R5 ( 0, f0, w0); \
+ R5 ( 5, f0, w0); \
+ R5 ( 10, f0, w0); \
+ M5 ( 15, f0, w0, w1); \
+
+ #elif STEP_PRE == 5
- #define RX_20(rx, i) \
- RX_5(rx, i); \
- RX_5(rx, i + 5); \
- RX_5(rx, i + 10); \
- RX_5(rx, i + 15);
+ #define R20_START \
+ { size_t i; for (i = 0; i < 15; i += STEP_PRE) \
+ { R5(i, f0, w0); } } \
+ M5 ( 15, f0, w0, w1); \
#else
-
-#define RX_15 { size_t i; for (i = 0; i < 15; i += 5) { RX_5(R0, i); } }
-#define RX_20(rx, ii) { size_t i; i = ii; for (; i < ii + 20; i += 5) { RX_5(rx, i); } }
+
+ #if STEP_PRE == 1
+ #define R_PRE R1
+ #elif STEP_PRE == 2
+ #define R_PRE R2
+ #elif STEP_PRE == 4
+ #define R_PRE R4
+ #endif
+
+ #define R20_START \
+ { size_t i; for (i = 0; i < 16; i += STEP_PRE) \
+ { R_PRE(i, f0, w0); } } \
+ R4 ( 16, f0, w1); \
#endif
-void Sha1_Init(CSha1 *p)
+
+#if STEP_MAIN > 5
+
+ #define R20(ii, fx) \
+ R5 ( (ii) , fx, w1); \
+ R5 ( (ii) + 5 , fx, w1); \
+ R5 ( (ii) + 10, fx, w1); \
+ R5 ( (ii) + 15, fx, w1); \
+
+#else
+
+ #if STEP_MAIN == 1
+ #define R_MAIN R1
+ #elif STEP_MAIN == 2
+ #define R_MAIN R2
+ #elif STEP_MAIN == 4
+ #define R_MAIN R4
+ #elif STEP_MAIN == 5
+ #define R_MAIN R5
+ #endif
+
+ #define R20(ii, fx) \
+ { size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
+ { R_MAIN(i, fx, w1); } } \
+
+#endif
+
+
+
+void Sha1_InitState(CSha1 *p)
{
+ p->count = 0;
p->state[0] = 0x67452301;
p->state[1] = 0xEFCDAB89;
p->state[2] = 0x98BADCFE;
p->state[3] = 0x10325476;
p->state[4] = 0xC3D2E1F0;
- p->count = 0;
}
-void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest)
+void Sha1_Init(CSha1 *p)
{
- UInt32 a, b, c, d, e;
- UInt32 W[kNumW];
-
- a = p->state[0];
- b = p->state[1];
- c = p->state[2];
- d = p->state[3];
- e = p->state[4];
-
- RX_15
-
- RX_1_4(R0, R1, 15);
-
- RX_20(R2, 20);
- RX_20(R3, 40);
- RX_20(R4, 60);
-
- destDigest[0] = p->state[0] + a;
- destDigest[1] = p->state[1] + b;
- destDigest[2] = p->state[2] + c;
- destDigest[3] = p->state[3] + d;
- destDigest[4] = p->state[4] + e;
+ p->func_UpdateBlocks =
+ #ifdef _SHA_SUPPORTED
+ g_FUNC_UPDATE_BLOCKS;
+ #else
+ NULL;
+ #endif
+ Sha1_InitState(p);
}
-void Sha1_UpdateBlock_Rar(CSha1 *p, UInt32 *data, int returnRes)
+
+MY_NO_INLINE
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
+ // if (numBlocks != 0x1264378347) return;
+ if (numBlocks == 0)
+ return;
- a = p->state[0];
- b = p->state[1];
- c = p->state[2];
- d = p->state[3];
- e = p->state[4];
-
- RX_15
-
- RX_1_4(R0, R1, 15);
-
- RX_20(R2, 20);
- RX_20(R3, 40);
- RX_20(R4, 60);
-
- p->state[0] += a;
- p->state[1] += b;
- p->state[2] += c;
- p->state[3] += d;
- p->state[4] += e;
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
- if (returnRes)
+ do
{
- size_t i;
- for (i = 0 ; i < SHA1_NUM_BLOCK_WORDS; i++)
- data[i] = W[kNumW - SHA1_NUM_BLOCK_WORDS + i];
+ #if STEP_PRE < 5 || STEP_MAIN < 5
+ UInt32 tmp;
+ #endif
+
+ R20_START
+ R20(20, f1);
+ R20(40, f2);
+ R20(60, f3);
+
+ a += state[0];
+ b += state[1];
+ c += state[2];
+ d += state[3];
+ e += state[4];
+
+ state[0] = a;
+ state[1] = b;
+ state[2] = c;
+ state[3] = d;
+ state[4] = e;
+
+ data += 64;
}
+ while (--numBlocks);
}
-#define Sha1_UpdateBlock(p) Sha1_GetBlockDigest(p, p->buffer, p->state)
+
+#define Sha1_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
- unsigned pos, pos2;
if (size == 0)
return;
- pos = (unsigned)p->count & 0x3F;
- p->count += size;
- pos2 = pos & 3;
- pos >>= 2;
-
- if (pos2 != 0)
- {
- UInt32 w;
- pos2 = (3 - pos2) * 8;
- w = ((UInt32)*data++) << pos2;
- if (--size && pos2)
- {
- pos2 -= 8;
- w |= ((UInt32)*data++) << pos2;
- if (--size && pos2)
- {
- pos2 -= 8;
- w |= ((UInt32)*data++) << pos2;
- size--;
- }
- }
- p->buffer[pos] |= w;
- if (pos2 == 0)
- pos++;
- }
- for (;;)
{
- if (pos == SHA1_NUM_BLOCK_WORDS)
+ unsigned pos = (unsigned)p->count & 0x3F;
+ unsigned num;
+
+ p->count += size;
+
+ num = 64 - pos;
+ if (num > size)
{
- for (;;)
- {
- size_t i;
- Sha1_UpdateBlock(p);
- if (size < SHA1_BLOCK_SIZE)
- break;
- size -= SHA1_BLOCK_SIZE;
- for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i += 2)
- {
- p->buffer[i ] = GetBe32(data);
- p->buffer[i + 1] = GetBe32(data + 4);
- data += 8;
- }
- }
- pos = 0;
+ memcpy(p->buffer + pos, data, size);
+ return;
}
- if (size < 4)
- break;
-
- p->buffer[pos] = GetBe32(data);
- data += 4;
- size -= 4;
- pos++;
- }
-
- if (size != 0)
- {
- UInt32 w = ((UInt32)data[0]) << 24;
- if (size > 1)
+
+ if (pos != 0)
{
- w |= ((UInt32)data[1]) << 16;
- if (size > 2)
- w |= ((UInt32)data[2]) << 8;
+ size -= num;
+ memcpy(p->buffer + pos, data, num);
+ data += num;
+ Sha1_UpdateBlock(p);
}
- p->buffer[pos] = w;
}
-}
-
-void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */)
-{
- int returnRes = False;
-
- unsigned pos = (unsigned)p->count & 0x3F;
- p->count += size;
-
- while (size--)
{
- unsigned pos2 = (pos & 3);
- UInt32 v = ((UInt32)*data++) << (8 * (3 - pos2));
- UInt32 *ref = &(p->buffer[pos >> 2]);
- pos++;
- if (pos2 == 0)
- {
- *ref = v;
- continue;
- }
- *ref |= v;
-
- if (pos == SHA1_BLOCK_SIZE)
- {
- pos = 0;
- Sha1_UpdateBlock_Rar(p, p->buffer, returnRes);
- if (returnRes)
- {
- size_t i;
- for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i++)
- {
- UInt32 d = p->buffer[i];
- Byte *prev = data + i * 4 - SHA1_BLOCK_SIZE;
- SetUi32(prev, d);
- }
- }
- // returnRes = rar350Mode;
- returnRes = True;
- }
+ size_t numBlocks = size >> 6;
+ UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+ size &= 0x3F;
+ if (size == 0)
+ return;
+ data += (numBlocks << 6);
+ memcpy(p->buffer, data, size);
}
}
+
void Sha1_Final(CSha1 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
- unsigned pos2 = (pos & 3);
- UInt64 numBits;
- UInt32 w;
- unsigned i;
- pos >>= 2;
+
+ p->buffer[pos++] = 0x80;
- w = 0;
- if (pos2 != 0)
- w = p->buffer[pos];
- p->buffer[pos++] = w | (((UInt32)0x80000000) >> (8 * pos2));
+ if (pos > (64 - 8))
+ {
+ while (pos != 64) { p->buffer[pos++] = 0; }
+ // memset(&p->buf.buffer[pos], 0, 64 - pos);
+ Sha1_UpdateBlock(p);
+ pos = 0;
+ }
- while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
+ /*
+ if (pos & 3)
{
- pos &= 0xF;
- if (pos == 0)
- Sha1_UpdateBlock(p);
- p->buffer[pos++] = 0;
+ p->buffer[pos] = 0;
+ p->buffer[pos + 1] = 0;
+ p->buffer[pos + 2] = 0;
+ pos += 3;
+ pos &= ~3;
}
-
- numBits = (p->count << 3);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
- Sha1_UpdateBlock(p);
+ {
+ for (; pos < 64 - 8; pos += 4)
+ *(UInt32 *)(&p->buffer[pos]) = 0;
+ }
+ */
+
+ memset(&p->buffer[pos], 0, (64 - 8) - pos);
- for (i = 0; i < SHA1_NUM_DIGEST_WORDS; i++)
{
- UInt32 v = p->state[i];
- SetBe32(digest, v);
- digest += 4;
+ UInt64 numBits = (p->count << 3);
+ SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
+ SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
+
+ Sha1_UpdateBlock(p);
- Sha1_Init(p);
-}
+ SetBe32(digest, p->state[0]);
+ SetBe32(digest + 4, p->state[1]);
+ SetBe32(digest + 8, p->state[2]);
+ SetBe32(digest + 12, p->state[3]);
+ SetBe32(digest + 16, p->state[4]);
+
-void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size)
-{
- const UInt64 numBits = (p->count + size) << 5;
- block[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- block[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
- block[size++] = 0x80000000;
- while (size != (SHA1_NUM_BLOCK_WORDS - 2))
- block[size++] = 0;
+
+ Sha1_InitState(p);
}
-void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size)
+
+void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
- unsigned pos = (unsigned)p->count & 0xF;
- p->count += size;
- while (size--)
+ const UInt64 numBits = (p->count + size) << 3;
+ SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
+ SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
+ // SetBe32((UInt32 *)(block + size), 0x80000000);
+ SetUi32((UInt32 *)(void *)(block + size), 0x80);
+ size += 4;
+ while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
{
- p->buffer[pos++] = *data++;
- if (pos == SHA1_NUM_BLOCK_WORDS)
- {
- pos = 0;
- Sha1_UpdateBlock(p);
- }
+ *((UInt32 *)(void *)(block + size)) = 0;
+ size += 4;
}
}
-void Sha1_32_Final(CSha1 *p, UInt32 *digest)
+void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
- UInt64 numBits;
- unsigned pos = (unsigned)p->count & 0xF;
- p->buffer[pos++] = 0x80000000;
+ MY_ALIGN (16)
+ UInt32 st[SHA1_NUM_DIGEST_WORDS];
+
+ st[0] = p->state[0];
+ st[1] = p->state[1];
+ st[2] = p->state[2];
+ st[3] = p->state[3];
+ st[4] = p->state[4];
+
+ UPDATE_BLOCKS(p)(st, data, 1);
- while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
+ SetBe32(destDigest + 0 , st[0]);
+ SetBe32(destDigest + 1 * 4, st[1]);
+ SetBe32(destDigest + 2 * 4, st[2]);
+ SetBe32(destDigest + 3 * 4, st[3]);
+ SetBe32(destDigest + 4 * 4, st[4]);
+}
+
+
+void Sha1Prepare()
+{
+ #ifdef _SHA_SUPPORTED
+ SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
+ f = Sha1_UpdateBlocks;
+ f_hw = NULL;
+ #ifdef MY_CPU_X86_OR_AMD64
+ #ifndef USE_MY_MM
+ if (CPU_IsSupported_SHA()
+ && CPU_IsSupported_SSSE3()
+ // && CPU_IsSupported_SSE41()
+ )
+ #endif
+ #else
+ if (CPU_IsSupported_SHA1())
+ #endif
{
- pos &= 0xF;
- if (pos == 0)
- Sha1_UpdateBlock(p);
- p->buffer[pos++] = 0;
- }
-
- numBits = (p->count << 5);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
- p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
+ // printf("\n========== HW SHA1 ======== \n");
+ #if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
+ /* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
+ It generated incorrect SHA-1 code.
+ 21.03 : we test sha1-hardware code at runtime initialization */
+
+ #pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
+
+ UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
+ Byte data[64];
+ unsigned i;
+ for (i = 0; i < sizeof(data); i += 2)
+ {
+ data[i ] = (Byte)(i);
+ data[i + 1] = (Byte)(i + 1);
+ }
- Sha1_GetBlockDigest(p, p->buffer, digest);
-
- Sha1_Init(p);
+ Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
+
+ if ( state[0] != 0x9acd7297
+ || state[1] != 0x4624d898
+ || state[2] != 0x0bf079f0
+ || state[3] != 0x031e61b3
+ || state[4] != 0x8323fe20)
+ {
+ // printf("\n========== SHA-1 hardware version failure ======== \n");
+ }
+ else
+ #endif
+ {
+ f = f_hw = Sha1_UpdateBlocks_HW;
+ }
+ }
+ g_FUNC_UPDATE_BLOCKS = f;
+ g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+ #endif
}
diff --git a/multiarc/src/formats/7z/C/Sha1.h b/multiarc/src/formats/7z/C/Sha1.h
index aa22ec36..345a816a 100644..100755
--- a/multiarc/src/formats/7z/C/Sha1.h
+++ b/multiarc/src/formats/7z/C/Sha1.h
@@ -1,5 +1,5 @@
/* Sha1.h -- SHA-1 Hash
-2016-05-20 : Igor Pavlov : Public domain */
+2021-02-08 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA1_H
#define __7Z_SHA1_H
@@ -14,24 +14,62 @@ EXTERN_C_BEGIN
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
+typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+/*
+ if (the system supports different SHA1 code implementations)
+ {
+ (CSha1::func_UpdateBlocks) will be used
+ (CSha1::func_UpdateBlocks) can be set by
+ Sha1_Init() - to default (fastest)
+ Sha1_SetFunction() - to any algo
+ }
+ else
+ {
+ (CSha1::func_UpdateBlocks) is ignored.
+ }
+*/
+
typedef struct
{
- UInt32 state[SHA1_NUM_DIGEST_WORDS];
+ SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
- UInt32 buffer[SHA1_NUM_BLOCK_WORDS];
+ UInt64 __pad_2[2];
+ UInt32 state[SHA1_NUM_DIGEST_WORDS];
+ UInt32 __pad_3[3];
+ Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;
-void Sha1_Init(CSha1 *p);
-void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest);
+#define SHA1_ALGO_DEFAULT 0
+#define SHA1_ALGO_SW 1
+#define SHA1_ALGO_HW 2
+
+/*
+Sha1_SetFunction()
+return:
+ 0 - (algo) value is not supported, and func_UpdateBlocks was not changed
+ 1 - func_UpdateBlocks was set according (algo) value.
+*/
+
+BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo);
+
+void Sha1_InitState(CSha1 *p);
+void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final(CSha1 *p, Byte *digest);
-void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */);
+void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
+void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
+
+// void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+/*
+call Sha1Prepare() once at program start.
+It prepares all supported implementations, and detects the fastest implementation.
+*/
-void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size);
-void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size);
-void Sha1_32_Final(CSha1 *p, UInt32 *digest);
+void Sha1Prepare(void);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/Sha1Opt.c b/multiarc/src/formats/7z/C/Sha1Opt.c
new file mode 100755
index 00000000..63132da3
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Sha1Opt.c
@@ -0,0 +1,373 @@
+/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+// #define USE_MY_MM
+#endif
+#endif
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ #if defined(_MSC_VER)
+ // SSSE3: for clang-cl:
+ #include <tmmintrin.h>
+ #define __SHA__
+ #endif
+ #endif
+ #pragma clang diagnostic ignored "-Wvector-conversion"
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ // #pragma GCC target("sha,ssse3")
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #ifdef USE_MY_MM
+ #define USE_VER_MIN 1300
+ #else
+ #define USE_VER_MIN 1910
+ #endif
+ #if _MSC_VER >= USE_VER_MIN
+ #define USE_HW_SHA
+ #endif
+ #endif
+// #endif // MY_CPU_X86_OR_AMD64
+
+#ifdef USE_HW_SHA
+
+// #pragma message("Sha1 HW")
+// #include <wmmintrin.h>
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include <immintrin.h>
+#else
+#include <emmintrin.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+// #include <intrin.h>
+#endif
+
+#ifdef USE_MY_MM
+#include "My_mm.h"
+#endif
+
+#endif
+
+/*
+SHA1 uses:
+SSE2:
+ _mm_loadu_si128
+ _mm_storeu_si128
+ _mm_set_epi32
+ _mm_add_epi32
+ _mm_shuffle_epi32 / pshufd
+ _mm_xor_si128
+ _mm_cvtsi128_si32
+ _mm_cvtsi32_si128
+SSSE3:
+ _mm_shuffle_epi8 / pshufb
+
+SHA:
+ _mm_sha1*
+*/
+
+#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
+#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
+#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
+
+#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
+#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
+
+
+
+
+
+#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
+#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
+
+
+#define LOAD_SHUFFLE(m, k) \
+ m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+ SHUFFLE_EPI8(m, mask); \
+
+#define SM1(m0, m1, m2, m3) \
+ SHA1_MSG1(m0, m1); \
+
+#define SM2(m0, m1, m2, m3) \
+ XOR_SI128(m3, m1); \
+ SHA1_MSG2(m3, m2); \
+
+#define SM3(m0, m1, m2, m3) \
+ XOR_SI128(m3, m1); \
+ SM1(m0, m1, m2, m3) \
+ SHA1_MSG2(m3, m2); \
+
+#define NNN(m0, m1, m2, m3)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
+ e1 = abcd; \
+ SHA1_RND4(abcd, e0, (k) / 5); \
+ SHA1_NEXTE(e1, m1); \
+ OP(m0, m1, m2, m3); \
+
+#define R16(k, mx, OP0, OP1, OP2, OP3) \
+ R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
+ R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
+ R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
+ R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
+
+#define PREPARE_STATE \
+ SHUFFLE_EPI32 (abcd, 0x1B); \
+ SHUFFLE_EPI32 (e0, 0x1B); \
+
+
+
+
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
+{
+ const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
+
+ __m128i abcd, e0;
+
+ if (numBlocks == 0)
+ return;
+
+ abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca
+ e0 = _mm_cvtsi32_si128((int)state[4]); // 000e
+
+ PREPARE_STATE
+
+ do
+ {
+ __m128i abcd_save, e2;
+ __m128i m0, m1, m2, m3;
+ __m128i e1;
+
+
+ abcd_save = abcd;
+ e2 = e0;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ ADD_EPI32(e0, m0);
+
+ R16 ( 0, m0, SM1, SM3, SM3, SM3 );
+ R16 ( 1, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 2, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 3, m0, SM3, SM3, SM3, SM3 );
+ R16 ( 4, e2, SM2, NNN, NNN, NNN );
+
+ ADD_EPI32(abcd, abcd_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ PREPARE_STATE
+
+ _mm_storeu_si128((__m128i *) (void *) state, abcd);
+ *(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
+}
+
+#endif // USE_HW_SHA
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_SHA
+ #endif
+ #endif
+
+#ifdef USE_HW_SHA
+
+// #pragma message("=== Sha1 HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+
+typedef uint32x4_t v128;
+// typedef __n128 v128; // MSVC
+
+#ifdef MY_CPU_BE
+ #define MY_rev32_for_LE(x)
+#else
+ #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
+#endif
+
+#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
+#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
+
+#define LOAD_SHUFFLE(m, k) \
+ m = LOAD_128((data + (k) * 16)); \
+ MY_rev32_for_LE(m); \
+
+#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
+#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
+#define C(e) abcd = vsha1cq_u32(abcd, e, t);
+#define P(e) abcd = vsha1pq_u32(abcd, e, t);
+#define M(e) abcd = vsha1mq_u32(abcd, e, t);
+#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
+#define T(m, c) t = vaddq_u32(m, c)
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ v128 abcd;
+ v128 c0, c1, c2, c3;
+ uint32_t e0;
+
+ if (numBlocks == 0)
+ return;
+
+ c0 = vdupq_n_u32(0x5a827999);
+ c1 = vdupq_n_u32(0x6ed9eba1);
+ c2 = vdupq_n_u32(0x8f1bbcdc);
+ c3 = vdupq_n_u32(0xca62c1d6);
+
+ abcd = LOAD_128(&state[0]);
+ e0 = state[4];
+
+ do
+ {
+ v128 abcd_save;
+ v128 m0, m1, m2, m3;
+ v128 t;
+ uint32_t e0_save, e1;
+
+ abcd_save = abcd;
+ e0_save = e0;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ T(m0, c0); H(e1); C(e0);
+ T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
+ T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
+ T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
+ T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
+ T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
+ T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
+ T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
+ T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
+ T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
+ T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
+ T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
+ T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
+ T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
+ T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
+ T(m1, c3); SU1(m3, m2); H(e0); P(e1);
+ T(m2, c3); H(e1); P(e0);
+ T(m3, c3); H(e0); P(e1);
+
+ abcd = vaddq_u32(abcd, abcd_save);
+ e0 += e0_save;
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ STORE_128(&state[0], abcd);
+ state[4] = e0;
+}
+
+#endif // USE_HW_SHA
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#ifndef USE_HW_SHA
+
+// #error Stop_Compiling_UNSUPPORTED_SHA
+// #include <stdlib.h>
+
+// #include "Sha1.h"
+void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
+
+#pragma message("Sha1 HW-SW stub was used")
+
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
+void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
+{
+ Sha1_UpdateBlocks(state, data, numBlocks);
+ /*
+ UNUSED_VAR(state);
+ UNUSED_VAR(data);
+ UNUSED_VAR(numBlocks);
+ exit(1);
+ return;
+ */
+}
+
+#endif
diff --git a/multiarc/src/formats/7z/C/Sha256.c b/multiarc/src/formats/7z/C/Sha256.c
index 04b688c6..8b3983ea 100644..100755
--- a/multiarc/src/formats/7z/C/Sha256.c
+++ b/multiarc/src/formats/7z/C/Sha256.c
@@ -1,5 +1,5 @@
-/* Crypto/Sha256.c -- SHA-256 Hash
-2017-04-03 : Igor Pavlov : Public domain
+/* Sha256.c -- SHA-256 Hash
+2021-04-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "RotateDefs.h"
#include "Sha256.h"
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// #define USE_MY_MM
+#endif
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1200
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#elif defined(MY_CPU_ARM_OR_ARM64)
+ #ifdef _MSC_VER
+ #if _MSC_VER >= 1910
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define _SHA_SUPPORTED
+ #endif
+ #endif
+#endif
+
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#ifdef _SHA_SUPPORTED
+ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+ static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
+ static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
+
+ #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
+#else
+ #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
+#endif
+
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+{
+ SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
+
+ #ifdef _SHA_SUPPORTED
+ if (algo != SHA256_ALGO_SW)
+ {
+ if (algo == SHA256_ALGO_DEFAULT)
+ func = g_FUNC_UPDATE_BLOCKS;
+ else
+ {
+ if (algo != SHA256_ALGO_HW)
+ return False;
+ func = g_FUNC_UPDATE_BLOCKS_HW;
+ if (!func)
+ return False;
+ }
+ }
+ #else
+ if (algo > 1)
+ return False;
+ #endif
+
+ p->func_UpdateBlocks = func;
+ return True;
+}
+
+
/* define it for speed optimization */
-#ifndef _SFX
-#define _SHA256_UNROLL
-#define _SHA256_UNROLL2
+
+#ifdef _SFX
+ #define STEP_PRE 1
+ #define STEP_MAIN 1
+#else
+ #define STEP_PRE 2
+ #define STEP_MAIN 4
+ // #define _SHA256_UNROLL
#endif
-/* #define _SHA256_UNROLL2 */
+#if STEP_MAIN != 16
+ #define _SHA256_BIG_W
+#endif
-void Sha256_Init(CSha256 *p)
+
+
+
+void Sha256_InitState(CSha256 *p)
{
+ p->count = 0;
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
@@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p)
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
- p->count = 0;
+}
+
+void Sha256_Init(CSha256 *p)
+{
+ p->func_UpdateBlocks =
+ #ifdef _SHA_SUPPORTED
+ g_FUNC_UPDATE_BLOCKS;
+ #else
+ NULL;
+ #endif
+ Sha256_InitState(p);
}
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
@@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p)
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
-#define blk0(i) (W[i])
-#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15]))
-
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
-#ifdef _SHA256_UNROLL2
-
-#define R(a,b,c,d,e,f,g,h, i) \
- h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
- d += h; \
- h += S0(a) + Maj(a, b, c)
-#define RX_8(i) \
- R(a,b,c,d,e,f,g,h, i); \
- R(h,a,b,c,d,e,f,g, i+1); \
- R(g,h,a,b,c,d,e,f, i+2); \
- R(f,g,h,a,b,c,d,e, i+3); \
- R(e,f,g,h,a,b,c,d, i+4); \
- R(d,e,f,g,h,a,b,c, i+5); \
- R(c,d,e,f,g,h,a,b, i+6); \
- R(b,c,d,e,f,g,h,a, i+7)
+#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
-#define RX_16 RX_8(0); RX_8(8);
+#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
+#ifdef _SHA256_BIG_W
+ // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
+ #define w(j, i) W[(size_t)(j) + i]
+ #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
#else
+ #if STEP_MAIN == 16
+ #define w(j, i) W[(i) & 15]
+ #else
+ #define w(j, i) W[((size_t)(j) + (i)) & 15]
+ #endif
+ #define blk2(j, i) (w(j, i) += blk2_main(j, i))
+#endif
-#define a(i) T[(0-(i))&7]
-#define b(i) T[(1-(i))&7]
-#define c(i) T[(2-(i))&7]
-#define d(i) T[(3-(i))&7]
-#define e(i) T[(4-(i))&7]
-#define f(i) T[(5-(i))&7]
-#define g(i) T[(6-(i))&7]
-#define h(i) T[(7-(i))&7]
-
-#define R(i) \
- h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
- d(i) += h(i); \
- h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \
+#define W_MAIN(i) blk2(j, i)
-#ifdef _SHA256_UNROLL
-#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
-#define RX_16 RX_8(0); RX_8(8);
+#define T1(wx, i) \
+ tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ h = g; \
+ g = f; \
+ f = e; \
+ e = d + tmp; \
+ tmp += S0(a) + Maj(a, b, c); \
+ d = c; \
+ c = b; \
+ b = a; \
+ a = tmp; \
-#else
+#define R1_PRE(i) T1( W_PRE, i)
+#define R1_MAIN(i) T1( W_MAIN, i)
-#define RX_16 unsigned i; for (i = 0; i < 16; i++) { R(i); }
+#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
+#define R2_MAIN(i) \
+ R1_MAIN(i) \
+ R1_MAIN(i + 1) \
#endif
+
+
+#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+#define T4( a,b,c,d,e,f,g,h, wx, i) \
+ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ tmp = h; \
+ h += d; \
+ d = tmp + S0(a) + Maj(a, b, c); \
+
+#define R4( wx, i) \
+ T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
+ T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
+ T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
+ T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
+
+#define R4_PRE(i) R4( W_PRE, i)
+#define R4_MAIN(i) R4( W_MAIN, i)
+
+
+#define T8( a,b,c,d,e,f,g,h, wx, i) \
+ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+ d += h; \
+ h += S0(a) + Maj(a, b, c); \
+
+#define R8( wx, i) \
+ T8 ( a,b,c,d,e,f,g,h, wx, i ); \
+ T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
+ T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
+ T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
+ T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
+ T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
+ T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
+ T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
+
+#define R8_PRE(i) R8( W_PRE, i)
+#define R8_MAIN(i) R8( W_MAIN, i)
+
#endif
-static const UInt32 K[64] = {
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+// static
+extern MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -109,30 +249,27 @@ static const UInt32 K[64] = {
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
-static void Sha256_WriteByteBlock(CSha256 *p)
-{
- UInt32 W[16];
- unsigned j;
- UInt32 *state;
+#define K SHA256_K_ARRAY
- #ifdef _SHA256_UNROLL2
- UInt32 a,b,c,d,e,f,g,h;
+
+MY_NO_INLINE
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ UInt32 W
+ #ifdef _SHA256_BIG_W
+ [64];
#else
- UInt32 T[8];
+ [16];
#endif
- for (j = 0; j < 16; j += 4)
- {
- const Byte *ccc = p->buffer + j * 4;
- W[j ] = GetBe32(ccc);
- W[j + 1] = GetBe32(ccc + 4);
- W[j + 2] = GetBe32(ccc + 8);
- W[j + 3] = GetBe32(ccc + 12);
- }
+ unsigned j;
- state = p->state;
+ UInt32 a,b,c,d,e,f,g,h;
- #ifdef _SHA256_UNROLL2
+ #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
+ UInt32 tmp;
+ #endif
+
a = state[0];
b = state[1];
c = state[2];
@@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p)
f = state[5];
g = state[6];
h = state[7];
- #else
- for (j = 0; j < 8; j++)
- T[j] = state[j];
- #endif
- for (j = 0; j < 64; j += 16)
+ while (numBlocks)
{
- RX_16
+
+ for (j = 0; j < 16; j += STEP_PRE)
+ {
+ #if STEP_PRE > 4
+
+ #if STEP_PRE < 8
+ R4_PRE(0);
+ #else
+ R8_PRE(0);
+ #if STEP_PRE == 16
+ R8_PRE(8);
+ #endif
+ #endif
+
+ #else
+
+ R1_PRE(0);
+ #if STEP_PRE >= 2
+ R1_PRE(1);
+ #if STEP_PRE >= 4
+ R1_PRE(2);
+ R1_PRE(3);
+ #endif
+ #endif
+
+ #endif
+ }
+
+ for (j = 16; j < 64; j += STEP_MAIN)
+ {
+ #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
+
+ #if STEP_MAIN < 8
+ R4_MAIN(0);
+ #else
+ R8_MAIN(0);
+ #if STEP_MAIN == 16
+ R8_MAIN(8);
+ #endif
+ #endif
+
+ #else
+
+ R1_MAIN(0);
+ #if STEP_MAIN >= 2
+ R1_MAIN(1);
+ #if STEP_MAIN >= 4
+ R2_MAIN(2);
+ #if STEP_MAIN >= 8
+ R2_MAIN(4);
+ R2_MAIN(6);
+ #if STEP_MAIN >= 16
+ R2_MAIN(8);
+ R2_MAIN(10);
+ R2_MAIN(12);
+ R2_MAIN(14);
+ #endif
+ #endif
+ #endif
+ #endif
+ #endif
+ }
+
+ a += state[0]; state[0] = a;
+ b += state[1]; state[1] = b;
+ c += state[2]; state[2] = c;
+ d += state[3]; state[3] = d;
+ e += state[4]; state[4] = e;
+ f += state[5]; state[5] = f;
+ g += state[6]; state[6] = g;
+ h += state[7]; state[7] = h;
+
+ data += 64;
+ numBlocks--;
}
- #ifdef _SHA256_UNROLL2
- state[0] += a;
- state[1] += b;
- state[2] += c;
- state[3] += d;
- state[4] += e;
- state[5] += f;
- state[6] += g;
- state[7] += h;
- #else
- for (j = 0; j < 8; j++)
- state[j] += T[j];
- #endif
-
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
- /* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
+#undef K
+
+#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
{
@@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
return;
}
- size -= num;
- memcpy(p->buffer + pos, data, num);
- data += num;
+ if (pos != 0)
+ {
+ size -= num;
+ memcpy(p->buffer + pos, data, num);
+ data += num;
+ Sha256_UpdateBlock(p);
+ }
}
-
- for (;;)
{
- Sha256_WriteByteBlock(p);
- if (size < 64)
- break;
- size -= 64;
- memcpy(p->buffer, data, 64);
- data += 64;
- }
-
- if (size != 0)
+ size_t numBlocks = size >> 6;
+ UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+ size &= 0x3F;
+ if (size == 0)
+ return;
+ data += (numBlocks << 6);
memcpy(p->buffer, data, size);
+ }
}
+
void Sha256_Final(CSha256 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
@@ -219,13 +414,30 @@ void Sha256_Final(CSha256 *p, Byte *digest)
p->buffer[pos++] = 0x80;
- while (pos != (64 - 8))
+ if (pos > (64 - 8))
+ {
+ while (pos != 64) { p->buffer[pos++] = 0; }
+ // memset(&p->buf.buffer[pos], 0, 64 - pos);
+ Sha256_UpdateBlock(p);
+ pos = 0;
+ }
+
+ /*
+ if (pos & 3)
{
- pos &= 0x3F;
- if (pos == 0)
- Sha256_WriteByteBlock(p);
- p->buffer[pos++] = 0;
+ p->buffer[pos] = 0;
+ p->buffer[pos + 1] = 0;
+ p->buffer[pos + 2] = 0;
+ pos += 3;
+ pos &= ~3;
}
+ {
+ for (; pos < 64 - 8; pos += 4)
+ *(UInt32 *)(&p->buffer[pos]) = 0;
+ }
+ */
+
+ memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
@@ -233,16 +445,42 @@ void Sha256_Final(CSha256 *p, Byte *digest)
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
- Sha256_WriteByteBlock(p);
+ Sha256_UpdateBlock(p);
for (i = 0; i < 8; i += 2)
{
UInt32 v0 = p->state[i];
- UInt32 v1 = p->state[i + 1];
+ UInt32 v1 = p->state[(size_t)i + 1];
SetBe32(digest , v0);
SetBe32(digest + 4, v1);
digest += 8;
}
- Sha256_Init(p);
+ Sha256_InitState(p);
+}
+
+
+void Sha256Prepare()
+{
+ #ifdef _SHA_SUPPORTED
+ SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
+ f = Sha256_UpdateBlocks;
+ f_hw = NULL;
+ #ifdef MY_CPU_X86_OR_AMD64
+ #ifndef USE_MY_MM
+ if (CPU_IsSupported_SHA()
+ && CPU_IsSupported_SSSE3()
+ // && CPU_IsSupported_SSE41()
+ )
+ #endif
+ #else
+ if (CPU_IsSupported_SHA2())
+ #endif
+ {
+ // printf("\n========== HW SHA256 ======== \n");
+ f = f_hw = Sha256_UpdateBlocks_HW;
+ }
+ g_FUNC_UPDATE_BLOCKS = f;
+ g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+ #endif
}
diff --git a/multiarc/src/formats/7z/C/Sha256.h b/multiarc/src/formats/7z/C/Sha256.h
index 3f455dbc..aa38501e 100644..100755
--- a/multiarc/src/formats/7z/C/Sha256.h
+++ b/multiarc/src/formats/7z/C/Sha256.h
@@ -1,26 +1,76 @@
/* Sha256.h -- SHA-256 Hash
-2013-01-18 : Igor Pavlov : Public domain */
+2021-01-01 : Igor Pavlov : Public domain */
-#ifndef __CRYPTO_SHA256_H
-#define __CRYPTO_SHA256_H
+#ifndef __7Z_SHA256_H
+#define __7Z_SHA256_H
#include "7zTypes.h"
EXTERN_C_BEGIN
-#define SHA256_DIGEST_SIZE 32
+#define SHA256_NUM_BLOCK_WORDS 16
+#define SHA256_NUM_DIGEST_WORDS 8
+
+#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
+#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
+
+typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+ if (the system supports different SHA256 code implementations)
+ {
+ (CSha256::func_UpdateBlocks) will be used
+ (CSha256::func_UpdateBlocks) can be set by
+ Sha256_Init() - to default (fastest)
+ Sha256_SetFunction() - to any algo
+ }
+ else
+ {
+ (CSha256::func_UpdateBlocks) is ignored.
+ }
+*/
typedef struct
{
- UInt32 state[8];
+ SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
- Byte buffer[64];
+ UInt64 __pad_2[2];
+ UInt32 state[SHA256_NUM_DIGEST_WORDS];
+
+ Byte buffer[SHA256_BLOCK_SIZE];
} CSha256;
+
+#define SHA256_ALGO_DEFAULT 0
+#define SHA256_ALGO_SW 1
+#define SHA256_ALGO_HW 2
+
+/*
+Sha256_SetFunction()
+return:
+ 0 - (algo) value is not supported, and func_UpdateBlocks was not changed
+ 1 - func_UpdateBlocks was set according (algo) value.
+*/
+
+BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);
+
+void Sha256_InitState(CSha256 *p);
void Sha256_Init(CSha256 *p);
void Sha256_Update(CSha256 *p, const Byte *data, size_t size);
void Sha256_Final(CSha256 *p, Byte *digest);
+
+
+
+// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+/*
+call Sha256Prepare() once at program start.
+It prepares all supported implementations, and detects the fastest implementation.
+*/
+
+void Sha256Prepare(void);
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/Sha256Opt.c b/multiarc/src/formats/7z/C/Sha256Opt.c
new file mode 100755
index 00000000..decc1382
--- /dev/null
+++ b/multiarc/src/formats/7z/C/Sha256Opt.c
@@ -0,0 +1,373 @@
+/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
+2021-04-01 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+// #define USE_MY_MM
+#endif
+#endif
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ #if defined(_MSC_VER)
+ // SSSE3: for clang-cl:
+ #include <tmmintrin.h>
+ #define __SHA__
+ #endif
+ #endif
+
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #ifndef __SHA__
+ #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+ // #pragma GCC target("sha,ssse3")
+ #endif
+ #endif
+ #elif defined(__INTEL_COMPILER)
+ #if (__INTEL_COMPILER >= 1800) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #ifdef USE_MY_MM
+ #define USE_VER_MIN 1300
+ #else
+ #define USE_VER_MIN 1910
+ #endif
+ #if _MSC_VER >= USE_VER_MIN
+ #define USE_HW_SHA
+ #endif
+ #endif
+// #endif // MY_CPU_X86_OR_AMD64
+
+#ifdef USE_HW_SHA
+
+// #pragma message("Sha256 HW")
+// #include <wmmintrin.h>
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+#include <immintrin.h>
+#else
+#include <emmintrin.h>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+// #include <intrin.h>
+#endif
+
+#ifdef USE_MY_MM
+#include "My_mm.h"
+#endif
+
+#endif
+
+/*
+SHA256 uses:
+SSE2:
+ _mm_loadu_si128
+ _mm_storeu_si128
+ _mm_set_epi32
+ _mm_add_epi32
+ _mm_shuffle_epi32 / pshufd
+
+
+
+SSSE3:
+ _mm_shuffle_epi8 / pshufb
+ _mm_alignr_epi8
+SHA:
+ _mm_sha256*
+*/
+
+// K array must be aligned for 16-bytes at least.
+// The compiler can look align attribute and selects
+// movdqu - for code without align attribute
+// movdqa - for code with align attribute
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
+#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
+
+
+#define LOAD_SHUFFLE(m, k) \
+ m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+ m = _mm_shuffle_epi8(m, mask); \
+
+#define SM1(g0, g1, g2, g3) \
+ SHA256_MSG1(g3, g0); \
+
+#define SM2(g0, g1, g2, g3) \
+ tmp = _mm_alignr_epi8(g1, g0, 4); \
+ ADD_EPI32(g2, tmp); \
+ SHA25G_MSG2(g2, g1); \
+
+// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
+// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
+
+
+#define NNN(g0, g1, g2, g3)
+
+
+#define RND2(t0, t1) \
+ t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
+
+#define RND2_0(m, k) \
+ msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
+ RND2(state0, state1); \
+ msg = _mm_shuffle_epi32(msg, 0x0E); \
+
+
+#define RND2_1 \
+ RND2(state1, state0); \
+
+
+// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+ RND2_0(g0, k); \
+ OP0(g0, g1, g2, g3); \
+ RND2_1; \
+ OP1(g0, g1, g2, g3); \
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+ R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+ R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+ R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+ R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+#define PREPARE_STATE \
+ tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
+ state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
+ state1 = state0; \
+ state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
+ state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+ __m128i tmp;
+ __m128i state0, state1;
+
+ if (numBlocks == 0)
+ return;
+
+ state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
+ state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
+
+ PREPARE_STATE
+
+ do
+ {
+ __m128i state0_save, state1_save;
+ __m128i m0, m1, m2, m3;
+ __m128i msg;
+ // #define msg tmp
+
+ state0_save = state0;
+ state1_save = state1;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+
+
+ R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+ R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+
+ ADD_EPI32(state0, state0_save);
+ ADD_EPI32(state1, state1_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ PREPARE_STATE
+
+ _mm_storeu_si128((__m128i *) (void *) &state[0], state0);
+ _mm_storeu_si128((__m128i *) (void *) &state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#elif defined(MY_CPU_ARM_OR_ARM64)
+
+ #if defined(__clang__)
+ #if (__clang_major__ >= 8) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(__GNUC__)
+ #if (__GNUC__ >= 6) // fix that check
+ #define USE_HW_SHA
+ #endif
+ #elif defined(_MSC_VER)
+ #if _MSC_VER >= 1910
+ #define USE_HW_SHA
+ #endif
+ #endif
+
+#ifdef USE_HW_SHA
+
+// #pragma message("=== Sha256 HW === ")
+
+#if defined(__clang__) || defined(__GNUC__)
+ #ifdef MY_CPU_ARM64
+ #define ATTRIB_SHA __attribute__((__target__("+crypto")))
+ #else
+ #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+#else
+ // _MSC_VER
+ // for arm32
+ #define _ARM_USE_NEW_NEON_INTRINSICS
+#endif
+
+#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+#include <arm64_neon.h>
+#else
+#include <arm_neon.h>
+#endif
+
+typedef uint32x4_t v128;
+// typedef __n128 v128; // MSVC
+
+#ifdef MY_CPU_BE
+ #define MY_rev32_for_LE(x)
+#else
+ #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
+#endif
+
+#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
+#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
+
+#define LOAD_SHUFFLE(m, k) \
+ m = LOAD_128((data + (k) * 16)); \
+ MY_rev32_for_LE(m); \
+
+// K array must be aligned for 16-bytes at least.
+extern
+MY_ALIGN(64)
+const UInt32 SHA256_K_ARRAY[64];
+
+#define K SHA256_K_ARRAY
+
+
+#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
+#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
+
+#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0)
+#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1)
+#define NNN(g0, g1, g2, g3)
+
+
+#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+ msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
+ tmp = state0; \
+ state0 = vsha256hq_u32( state0, state1, msg ); \
+ state1 = vsha256h2q_u32( state1, tmp, msg ); \
+ OP0(g0, g1, g2, g3); \
+ OP1(g0, g1, g2, g3); \
+
+
+#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+ R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+ R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+ R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+ R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+#ifdef ATTRIB_SHA
+ATTRIB_SHA
+#endif
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ v128 state0, state1;
+
+ if (numBlocks == 0)
+ return;
+
+ state0 = LOAD_128(&state[0]);
+ state1 = LOAD_128(&state[4]);
+
+ do
+ {
+ v128 state0_save, state1_save;
+ v128 m0, m1, m2, m3;
+ v128 msg, tmp;
+
+ state0_save = state0;
+ state1_save = state1;
+
+ LOAD_SHUFFLE (m0, 0)
+ LOAD_SHUFFLE (m1, 1)
+ LOAD_SHUFFLE (m2, 2)
+ LOAD_SHUFFLE (m3, 3)
+
+ R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+ R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+ R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
+
+ state0 = vaddq_u32(state0, state0_save);
+ state1 = vaddq_u32(state1, state1_save);
+
+ data += 64;
+ }
+ while (--numBlocks);
+
+ STORE_128(&state[0], state0);
+ STORE_128(&state[4], state1);
+}
+
+#endif // USE_HW_SHA
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
+#ifndef USE_HW_SHA
+
+// #error Stop_Compiling_UNSUPPORTED_SHA
+// #include <stdlib.h>
+
+// #include "Sha256.h"
+void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+
+#pragma message("Sha256 HW-SW stub was used")
+
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+{
+ Sha256_UpdateBlocks(state, data, numBlocks);
+ /*
+ UNUSED_VAR(state);
+ UNUSED_VAR(data);
+ UNUSED_VAR(numBlocks);
+ exit(1);
+ return;
+ */
+}
+
+#endif
diff --git a/multiarc/src/formats/7z/C/Sort.c b/multiarc/src/formats/7z/C/Sort.c
index e1097e38..e1097e38 100644..100755
--- a/multiarc/src/formats/7z/C/Sort.c
+++ b/multiarc/src/formats/7z/C/Sort.c
diff --git a/multiarc/src/formats/7z/C/Sort.h b/multiarc/src/formats/7z/C/Sort.h
index 2e2963a2..2e2963a2 100644..100755
--- a/multiarc/src/formats/7z/C/Sort.h
+++ b/multiarc/src/formats/7z/C/Sort.h
diff --git a/multiarc/src/formats/7z/C/Threads.c b/multiarc/src/formats/7z/C/Threads.c
index 930ad271..58eb90ff 100644..100755
--- a/multiarc/src/formats/7z/C/Threads.c
+++ b/multiarc/src/formats/7z/C/Threads.c
@@ -1,9 +1,11 @@
/* Threads.c -- multithreading library
-2017-06-26 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
-#ifndef UNDER_CE
+#ifdef _WIN32
+
+#ifndef USE_THREADS_CreateThread
#include <process.h>
#endif
@@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p)
return 0;
}
-WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
+WRes Handle_WaitObject(HANDLE h)
+{
+ DWORD dw = WaitForSingleObject(h, INFINITE);
+ /*
+ (dw) result:
+ WAIT_OBJECT_0 // 0
+ WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
+ WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
+ WAIT_FAILED // 0xFFFFFFFF
+ */
+ if (dw == WAIT_FAILED)
+ {
+ dw = GetLastError();
+ if (dw == 0)
+ return WAIT_FAILED;
+ }
+ return (WRes)dw;
+}
+
+#define Thread_Wait(p) Handle_WaitObject(*(p))
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ WRes res = Thread_Wait(p);
+ WRes res2 = Thread_Close(p);
+ return (res != 0 ? res : res2);
+}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
-
- #ifdef UNDER_CE
-
- DWORD threadId;
- *p = CreateThread(0, 0, func, param, 0, &threadId);
- #else
+ #ifdef USE_THREADS_CreateThread
+ DWORD threadId;
+ *p = CreateThread(NULL, 0, func, param, 0, &threadId);
+
+ #else
+
unsigned threadId;
- *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
-
+ *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
+
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return HandleToWRes(*p);
}
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ #ifdef USE_THREADS_CreateThread
+
+ UNUSED_VAR(affinity)
+ return Thread_Create(p, func, param);
+
+ #else
+
+ /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+ HANDLE h;
+ WRes wres;
+ unsigned threadId;
+ h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
+ *p = h;
+ wres = HandleToWRes(h);
+ if (h)
+ {
+ {
+ // DWORD_PTR prevMask =
+ SetThreadAffinityMask(h, (DWORD_PTR)affinity);
+ /*
+ if (prevMask == 0)
+ {
+ // affinity change is non-critical error, so we can ignore it
+ // wres = GetError();
+ }
+ */
+ }
+ {
+ DWORD prevSuspendCount = ResumeThread(h);
+ /* ResumeThread() returns:
+ 0 : was_not_suspended
+ 1 : was_resumed
+ -1 : error
+ */
+ if (prevSuspendCount == (DWORD)-1)
+ wres = GetError();
+ }
+ }
+
+ /* maybe we must use errno here, but probably GetLastError() is also OK. */
+ return wres;
+
+ #endif
+}
+
+
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
@@ -68,10 +145,22 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
+ // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ // if (Semaphore_IsCreated(p))
+ {
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
@@ -80,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
- /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
+ /* InitializeCriticalSection() can raise exception:
+ Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
+ Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
@@ -89,7 +180,361 @@ WRes CriticalSection_Init(CCriticalSection *p)
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
- __except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
+ __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
+
+
+
+
+#else // _WIN32
+
+// ---------- POSIX ----------
+
+#ifndef __APPLE__
+#ifndef _7ZIP_AFFINITY_DISABLE
+// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
+#define _GNU_SOURCE
+#endif
+#endif
+
+#include "Threads.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+// #include <sched.h>
+#endif
+
+
+// #include <stdio.h>
+// #define PRF(p) p
+#define PRF(p)
+
+#define Print(s) PRF(printf("\n%s\n", s))
+
+// #include <stdio.h>
+
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
+{
+ // new thread in Posix probably inherits affinity from parrent thread
+ Print("Thread_Create_With_CpuSet");
+
+ pthread_attr_t attr;
+ int ret;
+ // int ret2;
+
+ p->_created = 0;
+
+ RINOK(pthread_attr_init(&attr));
+
+ ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+ if (!ret)
+ {
+ if (cpuSet)
+ {
+ #ifdef _7ZIP_AFFINITY_SUPPORTED
+
+ /*
+ printf("\n affinity :");
+ unsigned i;
+ for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
+ {
+ Byte b = *((const Byte *)cpuSet + i);
+ char temp[32];
+ #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+ temp[0] = GET_HEX_CHAR((b & 0xF));
+ temp[1] = GET_HEX_CHAR((b >> 4));
+ // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
+ // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
+ temp[2] = 0;
+ printf("%s", temp);
+ }
+ printf("\n");
+ */
+
+ // ret2 =
+ pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ #endif
+ }
+
+ ret = pthread_create(&p->_tid, &attr, func, param);
+
+ if (!ret)
+ {
+ p->_created = 1;
+ /*
+ if (cpuSet)
+ {
+ // ret2 =
+ pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
+ // if (ret2) ret = ret2;
+ }
+ */
+ }
+ }
+ // ret2 =
+ pthread_attr_destroy(&attr);
+ // if (ret2 != 0) ret = ret2;
+ return ret;
+}
+
+
+WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
+{
+ return Thread_Create_With_CpuSet(p, func, param, NULL);
+}
+
+
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
+{
+ Print("Thread_Create_WithAffinity");
+ CCpuSet cs;
+ unsigned i;
+ CpuSet_Zero(&cs);
+ for (i = 0; i < sizeof(affinity) * 8; i++)
+ {
+ if (affinity == 0)
+ break;
+ if (affinity & 1)
+ {
+ CpuSet_Set(&cs, i);
+ }
+ affinity >>= 1;
+ }
+ return Thread_Create_With_CpuSet(p, func, param, &cs);
+}
+
+
+WRes Thread_Close(CThread *p)
+{
+ // Print("Thread_Close");
+ int ret;
+ if (!p->_created)
+ return 0;
+
+ ret = pthread_detach(p->_tid);
+ p->_tid = 0;
+ p->_created = 0;
+ return ret;
+}
+
+
+WRes Thread_Wait_Close(CThread *p)
+{
+ // Print("Thread_Wait_Close");
+ void *thread_return;
+ int ret;
+ if (!p->_created)
+ return EINVAL;
+
+ ret = pthread_join(p->_tid, &thread_return);
+ // probably we can't use that (_tid) after pthread_join(), so we close thread here
+ p->_created = 0;
+ p->_tid = 0;
+ return ret;
+}
+
+
+
+static WRes Event_Create(CEvent *p, int manualReset, int signaled)
+{
+ RINOK(pthread_mutex_init(&p->_mutex, NULL));
+ RINOK(pthread_cond_init(&p->_cond, NULL));
+ p->_manual_reset = manualReset;
+ p->_state = (signaled ? True : False);
+ p->_created = 1;
+ return 0;
+}
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
+ { return Event_Create(p, True, signaled); }
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
+ { return ManualResetEvent_Create(p, 0); }
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
+ { return Event_Create(p, False, signaled); }
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
+ { return AutoResetEvent_Create(p, 0); }
+
+
+WRes Event_Set(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ p->_state = True;
+ int res1 = pthread_cond_broadcast(&p->_cond);
+ int res2 = pthread_mutex_unlock(&p->_mutex);
+ return (res2 ? res2 : res1);
+}
+
+WRes Event_Reset(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ p->_state = False;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Wait(CEvent *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ while (p->_state == False)
+ {
+ // ETIMEDOUT
+ // ret =
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ // if (ret != 0) break;
+ }
+ if (p->_manual_reset == False)
+ {
+ p->_state = False;
+ }
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Event_Close(CEvent *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ RINOK(pthread_mutex_init(&p->_mutex, NULL));
+ RINOK(pthread_cond_init(&p->_cond, NULL));
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ p->_created = 1;
+ return 0;
+}
+
+
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (Semaphore_IsCreated(p))
+ {
+ /*
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ */
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ // return EINVAL; // for debug
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ return 0;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
+
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
+{
+ UInt32 newCount;
+ int ret;
+
+ if (releaseCount < 1)
+ return EINVAL;
+
+ RINOK(pthread_mutex_lock(&p->_mutex));
+
+ newCount = p->_count + releaseCount;
+ if (newCount > p->_maxCount)
+ ret = ERROR_TOO_MANY_POSTS; // EINVAL;
+ else
+ {
+ p->_count = newCount;
+ ret = pthread_cond_broadcast(&p->_cond);
+ }
+ RINOK(pthread_mutex_unlock(&p->_mutex));
+ return ret;
+}
+
+WRes Semaphore_Wait(CSemaphore *p)
+{
+ RINOK(pthread_mutex_lock(&p->_mutex));
+ while (p->_count < 1)
+ {
+ pthread_cond_wait(&p->_cond, &p->_mutex);
+ }
+ p->_count--;
+ return pthread_mutex_unlock(&p->_mutex);
+}
+
+WRes Semaphore_Close(CSemaphore *p)
+{
+ if (!p->_created)
+ return 0;
+ p->_created = 0;
+ {
+ int res1 = pthread_mutex_destroy(&p->_mutex);
+ int res2 = pthread_cond_destroy(&p->_cond);
+ return (res1 ? res1 : res2);
+ }
+}
+
+
+
+WRes CriticalSection_Init(CCriticalSection *p)
+{
+ // Print("CriticalSection_Init");
+ if (!p)
+ return EINTR;
+ return pthread_mutex_init(&p->_mutex, NULL);
+}
+
+void CriticalSection_Enter(CCriticalSection *p)
+{
+ // Print("CriticalSection_Enter");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_lock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Leave(CCriticalSection *p)
+{
+ // Print("CriticalSection_Leave");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_unlock(&p->_mutex);
+ }
+}
+
+void CriticalSection_Delete(CCriticalSection *p)
+{
+ // Print("CriticalSection_Delete");
+ if (p)
+ {
+ // int ret =
+ pthread_mutex_destroy(&p->_mutex);
+ }
+}
+
+LONG InterlockedIncrement(LONG volatile *addend)
+{
+ // Print("InterlockedIncrement");
+ #ifdef USE_HACK_UNSAFE_ATOMIC
+ LONG val = *addend + 1;
+ *addend = val;
+ return val;
+ #else
+ return __sync_add_and_fetch(addend, 1);
+ #endif
+}
+
+#endif // _WIN32
diff --git a/multiarc/src/formats/7z/C/Threads.h b/multiarc/src/formats/7z/C/Threads.h
index e53ace43..89ecb92b 100644..100755
--- a/multiarc/src/formats/7z/C/Threads.h
+++ b/multiarc/src/formats/7z/C/Threads.h
@@ -1,38 +1,139 @@
/* Threads.h -- multithreading library
-2017-06-18 : Igor Pavlov : Public domain */
+2021-12-21 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
#ifdef _WIN32
-#include <windows.h>
+#include <Windows.h>
+#else
+
+#if defined(__linux__)
+#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
+#ifndef _7ZIP_AFFINITY_DISABLE
+#define _7ZIP_AFFINITY_SUPPORTED
+// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")
+// #define _GNU_SOURCE
+#endif
+#endif
+#endif
+
+#include <pthread.h>
+
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
+#ifdef _WIN32
+
WRes HandlePtr_Close(HANDLE *h);
WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread;
-#define Thread_Construct(p) *(p) = NULL
+
+#define Thread_Construct(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p)
-#define Thread_Wait(p) Handle_WaitObject(*(p))
+// #define Thread_Wait(p) Handle_WaitObject(*(p))
-typedef
#ifdef UNDER_CE
- DWORD
+ // if (USE_THREADS_CreateThread is defined), we use _beginthreadex()
+ // if (USE_THREADS_CreateThread is not definned), we use CreateThread()
+ #define USE_THREADS_CreateThread
+#endif
+
+typedef
+ #ifdef USE_THREADS_CreateThread
+ DWORD
+ #else
+ unsigned
+ #endif
+ THREAD_FUNC_RET_TYPE;
+
+typedef DWORD_PTR CAffinityMask;
+typedef DWORD_PTR CCpuSet;
+
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }
+
+#else // _WIN32
+
+typedef struct _CThread
+{
+ pthread_t _tid;
+ int _created;
+} CThread;
+
+#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }
+#define Thread_WasCreated(p) ((p)->_created != 0)
+WRes Thread_Close(CThread *p);
+// #define Thread_Wait Thread_Wait_Close
+
+typedef void * THREAD_FUNC_RET_TYPE;
+
+typedef UInt64 CAffinityMask;
+
+#ifdef _7ZIP_AFFINITY_SUPPORTED
+
+typedef cpu_set_t CCpuSet;
+#define CpuSet_Zero(p) CPU_ZERO(p)
+#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
+#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
+
#else
- unsigned
+
+typedef UInt64 CCpuSet;
+#define CpuSet_Zero(p) { *(p) = 0; }
+#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }
+#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
+
#endif
- THREAD_FUNC_RET_TYPE;
+
+
+#endif // _WIN32
+
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL
-#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+
+#if defined(_WIN32) && defined(__GNUC__)
+/* GCC compiler for x86 32-bit uses the rule:
+ the stack is 16-byte aligned before CALL instruction for function calling.
+ But only root function main() contains instructions that
+ set 16-byte alignment for stack pointer. And another functions
+ just keep alignment, if it was set in some parent function.
+
+ The problem:
+ if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(),
+ the root function of thread doesn't set 16-byte alignment.
+ And stack frames in all child functions also will be unaligned in that case.
+
+ Here we set (force_align_arg_pointer) attribute for root function of new thread.
+ Do we need (force_align_arg_pointer) also for another systems? */
+
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer))
+ // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions
+#else
+ #define THREAD_FUNC_ATTRIB_ALIGN_ARG
+#endif
+
+#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
+
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
+WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
+WRes Thread_Wait_Close(CThread *p);
+
+#ifdef _WIN32
+#define Thread_Create_With_CpuSet(p, func, param, cs) \
+ Thread_Create_With_Affinity(p, func, param, *cs)
+#else
+WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
+#endif
+
+
+#ifdef _WIN32
typedef HANDLE CEvent;
typedef CEvent CAutoResetEvent;
@@ -54,6 +155,7 @@ typedef HANDLE CSemaphore;
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
@@ -63,6 +165,68 @@ WRes CriticalSection_Init(CCriticalSection *p);
#define CriticalSection_Enter(p) EnterCriticalSection(p)
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
+
+#else // _WIN32
+
+typedef struct _CEvent
+{
+ int _created;
+ int _manual_reset;
+ int _state;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CEvent;
+
+typedef CEvent CAutoResetEvent;
+typedef CEvent CManualResetEvent;
+
+#define Event_Construct(p) (p)->_created = 0
+#define Event_IsCreated(p) ((p)->_created)
+
+WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
+WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
+WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
+WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
+WRes Event_Set(CEvent *p);
+WRes Event_Reset(CEvent *p);
+WRes Event_Wait(CEvent *p);
+WRes Event_Close(CEvent *p);
+
+
+typedef struct _CSemaphore
+{
+ int _created;
+ UInt32 _count;
+ UInt32 _maxCount;
+ pthread_mutex_t _mutex;
+ pthread_cond_t _cond;
+} CSemaphore;
+
+#define Semaphore_Construct(p) (p)->_created = 0
+#define Semaphore_IsCreated(p) ((p)->_created)
+
+WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
+#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
+WRes Semaphore_Wait(CSemaphore *p);
+WRes Semaphore_Close(CSemaphore *p);
+
+
+typedef struct _CCriticalSection
+{
+ pthread_mutex_t _mutex;
+} CCriticalSection;
+
+WRes CriticalSection_Init(CCriticalSection *p);
+void CriticalSection_Delete(CCriticalSection *cs);
+void CriticalSection_Enter(CCriticalSection *cs);
+void CriticalSection_Leave(CCriticalSection *cs);
+
+LONG InterlockedIncrement(LONG volatile *addend);
+
+#endif // _WIN32
+
EXTERN_C_END
#endif
diff --git a/multiarc/src/formats/7z/C/Xz.c b/multiarc/src/formats/7z/C/Xz.c
index d9f83df1..7c53b600 100644..100755
--- a/multiarc/src/formats/7z/C/Xz.c
+++ b/multiarc/src/formats/7z/C/Xz.c
@@ -1,5 +1,5 @@
/* Xz.c - Xz
-2017-05-12 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc)
unsigned XzFlags_GetCheckSize(CXzStreamFlags f)
{
unsigned t = XzFlags_GetCheckType(f);
- return (t == 0) ? 0 : (4 << ((t - 1) / 3));
+ return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3));
}
void XzCheck_Init(CXzCheck *p, unsigned mode)
diff --git a/multiarc/src/formats/7z/C/Xz.h b/multiarc/src/formats/7z/C/Xz.h
index 544ee18f..849b944b 100644..100755
--- a/multiarc/src/formats/7z/C/Xz.h
+++ b/multiarc/src/formats/7z/C/Xz.h
@@ -1,5 +1,5 @@
/* Xz.h - Xz interface
-2018-07-04 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#ifndef __XZ_H
#define __XZ_H
@@ -47,7 +47,7 @@ typedef struct
CXzFilter filters[XZ_NUM_FILTERS_MAX];
} CXzBlock;
-#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
+#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
#define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0)
#define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
@@ -277,7 +277,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
{
XzUnpacker_Init()
for()
+ {
XzUnpacker_Code();
+ }
+ XzUnpacker_IsStreamWasFinished()
}
Interface-2 : Direct output buffer:
@@ -288,7 +291,10 @@ void XzUnpacker_Free(CXzUnpacker *p);
XzUnpacker_Init()
XzUnpacker_SetOutBufMode(); // to set output buffer and size
for()
+ {
XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
+ }
+ XzUnpacker_IsStreamWasFinished()
}
Interface-3 : Direct output buffer : One call full decoding
@@ -296,6 +302,7 @@ void XzUnpacker_Free(CXzUnpacker *p);
It uses Interface-2 internally.
{
XzUnpacker_CodeFull()
+ XzUnpacker_IsStreamWasFinished()
}
*/
@@ -309,8 +316,12 @@ Returns:
SZ_OK
status:
CODER_STATUS_NOT_FINISHED,
- CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams,
- call XzUnpacker_IsStreamWasFinished to check that current stream was finished
+ CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
+ 1) it needs more input data to finish current xz stream
+ 2) xz stream was finished successfully. But the decoder supports multiple
+ concatented xz streams. So it expects more input data for new xz streams.
+ Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
+
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_DATA - Data error
SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
@@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen,
ECoderFinishMode finishMode, ECoderStatus *status);
+/*
+If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
+after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
+*/
+
BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
/*
-XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes,
+XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
-These bytes can be some bytes after xz archive, or
+These bytes can be some data after xz archive, or
it can be start of new xz stream.
Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
@@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
-/* ---------- Multi Threading Decoding ---------- */
+
+
+
+/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
+
+/*
+ if (CXzDecMtProps::numThreads > 1), the decoder can try to use
+ Multi-Threading. The decoder analyses xz block header, and if
+ there are pack size and unpack size values stored in xz block header,
+ the decoder reads compressed data of block to internal buffers,
+ and then it can start parallel decoding, if there are another blocks.
+ The decoder can switch back to Single-Thread decoding after some conditions.
+
+ The sequence of calls for xz decoding with in/out Streams:
+ {
+ XzDecMt_Create()
+ XzDecMtProps_Init(XzDecMtProps) to set default values of properties
+ // then you can change some XzDecMtProps parameters with required values
+ // here you can set the number of threads and (memUseMax) - the maximum
+ Memory usage for multithreading decoding.
+ for()
+ {
+ XzDecMt_Decode() // one call per one file
+ }
+ XzDecMt_Destroy()
+ }
+*/
typedef struct
{
- size_t inBufSize_ST;
- size_t outStep_ST;
- BoolInt ignoreErrors;
+ size_t inBufSize_ST; // size of input buffer for Single-Thread decoding
+ size_t outStep_ST; // size of output buffer for Single-Thread decoding
+ BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
#ifndef _7ZIP_ST
- unsigned numThreads;
- size_t inBufSize_MT;
- size_t memUseMax;
+ unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
+ size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
+ size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding.
+ // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
#endif
} CXzDecMtProps;
@@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p);
typedef void * CXzDecMtHandle;
/*
- alloc : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc).
+ alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
allocMid : for big allocations, aligned allocation is better
*/
@@ -407,33 +450,46 @@ typedef struct
Byte NumStreams_Defined;
Byte NumBlocks_Defined;
- Byte DataAfterEnd;
+ Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream.
Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
- UInt64 InSize; // pack size processed
+ UInt64 InSize; // pack size processed. That value doesn't include the data after
+ // end of xz stream, if that data was not correct
UInt64 OutSize;
UInt64 NumStreams;
UInt64 NumBlocks;
- SRes DecodeRes;
- SRes ReadRes;
- SRes ProgressRes;
- SRes CombinedRes;
- SRes CombinedRes_Type;
+ SRes DecodeRes; // the error code of xz streams data decoding
+ SRes ReadRes; // error code from ISeqInStream:Read()
+ SRes ProgressRes; // error code from ICompressProgress:Progress()
+ SRes CombinedRes; // Combined result error code that shows main rusult
+ // = S_OK, if there is no error.
+ // but check also (DataAfterEnd) that can show additional minor errors.
+
+ SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream
+ // = SZ_ERROR_PROGRESS, if error from ICompressProgress
+ // = SZ_ERROR_WRITE, if error from ISeqOutStream
+ // = SZ_ERROR_* codes for decoding
} CXzStatInfo;
void XzStatInfo_Clear(CXzStatInfo *p);
/*
+
XzDecMt_Decode()
-SRes:
- SZ_OK - OK
+SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
+
+ SZ_OK - no error
+ check also output value in (stat->DataAfterEnd)
+ that can show additional possible error
+
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_NO_ARCHIVE - is not xz archive
SZ_ERROR_ARCHIVE - Headers error
SZ_ERROR_DATA - Data Error
+ SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
SZ_ERROR_CRC - CRC Error
SZ_ERROR_INPUT_EOF - it needs more input data
SZ_ERROR_WRITE - ISeqOutStream error
@@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
// Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
// const Byte *inData, size_t inDataSize,
- CXzStatInfo *stat,
- int *isMT, // 0 means that ST (Single-Thread) version was used
+ CXzStatInfo *stat, // out: decoding results and statistics
+ int *isMT, // out: 0 means that ST (Single-Thread) version was used
+ // 1 means that MT (Multi-Thread) version was used
ICompressProgress *progress);
EXTERN_C_END
diff --git a/multiarc/src/formats/7z/C/XzCrc64.c b/multiarc/src/formats/7z/C/XzCrc64.c
index b6d02cbe..b6d02cbe 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64.c
+++ b/multiarc/src/formats/7z/C/XzCrc64.c
diff --git a/multiarc/src/formats/7z/C/XzCrc64.h b/multiarc/src/formats/7z/C/XzCrc64.h
index 08dbc330..08dbc330 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64.h
+++ b/multiarc/src/formats/7z/C/XzCrc64.h
diff --git a/multiarc/src/formats/7z/C/XzCrc64Opt.c b/multiarc/src/formats/7z/C/XzCrc64Opt.c
index b2852de4..93a9ffff 100644..100755
--- a/multiarc/src/formats/7z/C/XzCrc64Opt.c
+++ b/multiarc/src/formats/7z/C/XzCrc64Opt.c
@@ -1,5 +1,5 @@
/* XzCrc64Opt.c -- CRC64 calculation
-2017-06-30 : Igor Pavlov : Public domain */
+2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- UInt32 d = (UInt32)v ^ *(const UInt32 *)p;
+ UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
v = (v >> 32)
^ (table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
+UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
- UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p;
+ UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
v = (v << 32)
^ (table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
diff --git a/multiarc/src/formats/7z/C/XzDec.c b/multiarc/src/formats/7z/C/XzDec.c
index 395e83f6..3f96a37f 100644..100755
--- a/multiarc/src/formats/7z/C/XzDec.c
+++ b/multiarc/src/formats/7z/C/XzDec.c
@@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-09-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp,
}
+SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc);
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc)
{
CBraState *decoder;
@@ -772,7 +773,8 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
- if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
+ if (s == 0) return SZ_ERROR_ARCHIVE; \
+ pos += s; }
static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
@@ -1038,7 +1040,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
(p->outBuf ? NULL : dest), &destLen2, destFinish,
src, &srcLen2, srcFinished2,
finishMode2);
-
+
*status = p->decoder.status;
XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2);
if (!p->outBuf)
@@ -1275,9 +1277,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
else
{
+ const Byte *ptr = p->buf;
p->state = XZ_STATE_STREAM_FOOTER;
p->pos = 0;
- if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf))
+ if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr))
return SZ_ERROR_CRC;
}
break;
@@ -1456,7 +1459,6 @@ typedef struct
ISeqInStream *inStream;
ISeqOutStream *outStream;
ICompressProgress *progress;
- // CXzStatInfo *stat;
BoolInt finishMode;
BoolInt outSize_Defined;
@@ -1492,8 +1494,9 @@ typedef struct
UInt64 numBlocks;
// UInt64 numBadBlocks;
- SRes mainErrorCode;
-
+ SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage
+ // it can be = SZ_ERROR_INPUT_EOF
+ // it can be = SZ_ERROR_DATA, in some another cases
BoolInt isBlockHeaderState_Parse;
BoolInt isBlockHeaderState_Write;
UInt64 outProcessed_Parse;
@@ -1877,7 +1880,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex)
{
// if (res == SZ_ERROR_MEM) return res;
if (me->props.ignoreErrors && res != SZ_ERROR_MEM)
- return S_OK;
+ return SZ_OK;
return res;
}
}
@@ -1898,15 +1901,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
*outCodePos = coder->outCodeSize;
*stop = True;
+ if (srcSize > coder->inPreSize - coder->inCodeSize)
+ return SZ_ERROR_FAIL;
+
if (coder->inCodeSize < coder->inPreHeaderSize)
{
- UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize;
- size_t step = srcSize;
- if (step > rem)
- step = (size_t)rem;
+ size_t step = coder->inPreHeaderSize - coder->inCodeSize;
+ if (step > srcSize)
+ step = srcSize;
src += step;
srcSize -= step;
coder->inCodeSize += step;
+ *inCodePos = coder->inCodeSize;
if (coder->inCodeSize < coder->inPreHeaderSize)
{
*stop = False;
@@ -1956,7 +1962,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
{
*inCodePos = coder->inPreSize;
*outCodePos = coder->outPreSize;
- return S_OK;
+ return SZ_OK;
}
return coder->codeRes;
}
@@ -1966,7 +1972,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
- const Byte *src, size_t srcSize,
+ const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode)
@@ -1985,7 +1991,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (!coder->dec.headerParsedOk || !coder->outBuf)
{
if (me->finishedDecoderIndex < 0)
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2077,7 +2083,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->codeRes != SZ_OK)
if (!me->props.ignoreErrors)
{
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return res;
}
@@ -2086,7 +2092,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->inPreSize != coder->inCodeSize
|| coder->blockPackTotal != coder->inCodeSize)
{
- me->finishedDecoderIndex = coderIndex;
+ me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2125,22 +2131,41 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
return SZ_OK;
}
+ /*
+ We have processed all xz-blocks of stream,
+ And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where
+ (src) is a pointer to xz-Index structure.
+ We finish reading of current xz-Stream, including Zero padding after xz-Stream.
+ We exit, if we reach extra byte (first byte of new-Stream or another data).
+ But we don't update input stream pointer for that new extra byte.
+ If extra byte is not correct first byte of xz-signature,
+ we have SZ_ERROR_NO_ARCHIVE error here.
+ */
+
res = XzUnpacker_Code(dec,
NULL, &outSizeCur,
src, &srcProcessed,
me->mtc.readWasFinished, // srcFinished
CODER_FINISH_END, // CODER_FINISH_ANY,
&status);
+
+ // res = SZ_ERROR_ARCHIVE; // for failure test
me->status = status;
me->codeRes = res;
+ if (isCross)
+ me->mtc.crossStart += srcProcessed;
+
me->mtc.inProcessed += srcProcessed;
me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
+ srcSize -= srcProcessed;
+ src += srcProcessed;
+
if (res != SZ_OK)
{
- return S_OK;
+ return SZ_OK;
// return res;
}
@@ -2149,20 +2174,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
*needContinue = True;
me->isBlockHeaderState_Parse = False;
me->isBlockHeaderState_Write = False;
+
+ if (!isCross)
{
Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc);
if (!crossBuf)
return SZ_ERROR_MEM;
- memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed);
+ if (srcSize != 0)
+ memcpy(crossBuf, src, srcSize);
+ me->mtc.crossStart = 0;
+ me->mtc.crossEnd = srcSize;
}
- me->mtc.crossStart = 0;
- me->mtc.crossEnd = srcSize - srcProcessed;
+
+ PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd);
+
return SZ_OK;
}
- if (status != CODER_STATUS_NEEDS_MORE_INPUT)
+ if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0)
{
- return E_FAIL;
+ return SZ_ERROR_FAIL;
}
if (me->mtc.readWasFinished)
@@ -2174,7 +2205,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
size_t inPos;
size_t inLim;
- const Byte *inData;
+ // const Byte *inData;
UInt64 inProgressPrev = me->mtc.inProcessed;
// XzDecMt_Prepare_InBuf_ST(p);
@@ -2184,9 +2215,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
inPos = 0;
inLim = 0;
- // outProcessed = 0;
- inData = crossBuf;
+ // inData = crossBuf;
for (;;)
{
@@ -2201,7 +2231,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
inPos = 0;
inLim = me->mtc.inBufSize;
- me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim);
+ me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim);
me->mtc.readProcessed += inLim;
if (inLim == 0 || me->mtc.readRes != SZ_OK)
me->mtc.readWasFinished = True;
@@ -2213,7 +2243,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
res = XzUnpacker_Code(dec,
NULL, &outProcessed,
- inData + inPos, &inProcessed,
+ crossBuf + inPos, &inProcessed,
(inProcessed == 0), // srcFinished
CODER_FINISH_END, &status);
@@ -2225,7 +2255,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (res != SZ_OK)
{
- return S_OK;
+ return SZ_OK;
// return res;
}
@@ -2240,7 +2270,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
}
if (status != CODER_STATUS_NEEDS_MORE_INPUT)
- return E_FAIL;
+ return SZ_ERROR_FAIL;
if (me->mtc.progress)
{
@@ -2276,13 +2306,6 @@ void XzStatInfo_Clear(CXzStatInfo *p)
p->NumStreams_Defined = False;
p->NumBlocks_Defined = False;
- // p->IsArc = False;
- // p->UnexpectedEnd = False;
- // p->Unsupported = False;
- // p->HeadersError = False;
- // p->DataError = False;
- // p->CrcError = False;
-
p->DataAfterEnd = False;
p->DecodingTruncated = False;
@@ -2296,6 +2319,16 @@ void XzStatInfo_Clear(CXzStatInfo *p)
+/*
+ XzDecMt_Decode_ST() can return SZ_OK or the following errors
+ - SZ_ERROR_MEM for memory allocation error
+ - error from XzUnpacker_Code() function
+ - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case
+ - ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS.
+ But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors.
+ ISeqInStream::Read() result is set to p->readRes.
+ also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
+*/
static SRes XzDecMt_Decode_ST(CXzDecMt *p
#ifndef _7ZIP_ST
@@ -2384,7 +2417,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
- p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
+ p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim);
p->readProcessed += inLim;
if (inLim == 0 || p->readRes != SZ_OK)
p->readWasFinished = True;
@@ -2426,8 +2459,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (finished || outProcessed >= outSize)
if (outPos != 0)
{
- size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
- p->outProcessed += written;
+ const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
+ // p->outProcessed += written; // 21.01: BUG fixed
if (written != outPos)
{
stat->CombinedRes_Type = SZ_ERROR_WRITE;
@@ -2438,9 +2471,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (p->progress && res == SZ_OK)
{
- UInt64 inDelta = p->inProcessed - inPrev;
- UInt64 outDelta = p->outProcessed - outPrev;
- if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
+ if (p->inProcessed - inPrev >= (1 << 22) ||
+ p->outProcessed - outPrev >= (1 << 22))
{
res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed);
if (res != SZ_OK)
@@ -2455,14 +2487,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
}
if (finished)
- return res;
+ {
+ // p->codeRes is preliminary error from XzUnpacker_Code.
+ // and it can be corrected later as final result
+ // so we return SZ_OK here instead of (res);
+ return SZ_OK;
+ // return res;
+ }
}
}
-static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
+
+
+/*
+XzStatInfo_SetStat() transforms
+ CXzUnpacker return code and status to combined CXzStatInfo results.
+ it can convert SZ_OK to SZ_ERROR_INPUT_EOF
+ it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1)
+*/
+
+static void XzStatInfo_SetStat(const CXzUnpacker *dec,
int finishMode,
- UInt64 readProcessed, UInt64 inProcessed,
- SRes res, ECoderStatus status,
+ // UInt64 readProcessed,
+ UInt64 inProcessed,
+ SRes res, // it's result from CXzUnpacker unpacker
+ ECoderStatus status,
BoolInt decodingTruncated,
CXzStatInfo *stat)
{
@@ -2484,12 +2533,20 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
if (status == CODER_STATUS_NEEDS_MORE_INPUT)
{
// CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams
+ // any extra data is part of correct data
extraSize = 0;
+ // if xz stream was not finished, then we need more data
if (!XzUnpacker_IsStreamWasFinished(dec))
res = SZ_ERROR_INPUT_EOF;
}
- else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED)
- res = SZ_ERROR_DATA;
+ else
+ {
+ // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding
+ // so he we have (status == CODER_STATUS_NOT_FINISHED)
+ // if (status != CODER_STATUS_FINISHED_WITH_MARK)
+ if (!decodingTruncated || finishMode)
+ res = SZ_ERROR_DATA;
+ }
}
else if (res == SZ_ERROR_NO_ARCHIVE)
{
@@ -2497,24 +2554,29 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
SZ_ERROR_NO_ARCHIVE is possible for 2 states:
XZ_STATE_STREAM_HEADER - if bad signature or bad CRC
XZ_STATE_STREAM_PADDING - if non-zero padding data
- extraSize / inProcessed don't include "bad" byte
+ extraSize and inProcessed don't include "bad" byte
*/
- if (inProcessed != extraSize) // if good streams before error
- if (extraSize != 0 || readProcessed != inProcessed)
+ // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error
+ if (inProcessed != extraSize) // if there were good xz streams before error
+ {
+ // if (extraSize != 0 || readProcessed != inProcessed)
{
+ // he we suppose that all xz streams were finsihed OK, and we have
+ // some extra data after all streams
stat->DataAfterEnd = True;
- // there is some good xz stream before. So we set SZ_OK
res = SZ_OK;
}
+ }
}
- stat->DecodeRes = res;
+ if (stat->DecodeRes == SZ_OK)
+ stat->DecodeRes = res;
stat->InSize -= extraSize;
- return res;
}
+
SRes XzDecMt_Decode(CXzDecMtHandle pp,
const CXzDecMtProps *props,
const UInt64 *outDataSize, int finishMode,
@@ -2557,8 +2619,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
p->inProcessed = 0;
p->readProcessed = 0;
p->readWasFinished = False;
+ p->readRes = SZ_OK;
- p->codeRes = 0;
+ p->codeRes = SZ_OK;
p->status = CODER_STATUS_NOT_SPECIFIED;
XzUnpacker_Init(&p->dec);
@@ -2589,8 +2652,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
if (p->props.numThreads > 1)
{
- IMtDecCallback vt;
-
+ IMtDecCallback2 vt;
+ BoolInt needContinue;
+ SRes res;
// we just free ST buffers here
// but we still keep state variables, that was set in XzUnpacker_Init()
XzDecMt_FreeSt(p);
@@ -2628,45 +2692,45 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
vt.Code = XzDecMt_Callback_Code;
vt.Write = XzDecMt_Callback_Write;
- {
- BoolInt needContinue;
-
- SRes res = MtDec_Code(&p->mtc);
-
- stat->InSize = p->mtc.inProcessed;
- p->inProcessed = p->mtc.inProcessed;
- p->readRes = p->mtc.readRes;
- p->readWasFinished = p->mtc.readWasFinished;
- p->readProcessed = p->mtc.readProcessed;
+ res = MtDec_Code(&p->mtc);
- tMode = True;
- needContinue = False;
- if (res == SZ_OK)
+ stat->InSize = p->mtc.inProcessed;
+
+ p->inProcessed = p->mtc.inProcessed;
+ p->readRes = p->mtc.readRes;
+ p->readWasFinished = p->mtc.readWasFinished;
+ p->readProcessed = p->mtc.readProcessed;
+
+ tMode = True;
+ needContinue = False;
+
+ if (res == SZ_OK)
+ {
+ if (p->mtc.mtProgress.res != SZ_OK)
{
- if (p->mtc.mtProgress.res != SZ_OK)
- {
- res = p->mtc.mtProgress.res;
- stat->ProgressRes = res;
- stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
- }
- else
- needContinue = p->mtc.needContinue;
+ res = p->mtc.mtProgress.res;
+ stat->ProgressRes = res;
+ stat->CombinedRes_Type = SZ_ERROR_PROGRESS;
}
-
- if (!needContinue)
+ else
+ needContinue = p->mtc.needContinue;
+ }
+
+ if (!needContinue)
+ {
{
SRes codeRes;
BoolInt truncated = False;
ECoderStatus status;
- CXzUnpacker *dec;
+ const CXzUnpacker *dec;
stat->OutSize = p->outProcessed;
if (p->finishedDecoderIndex >= 0)
{
- CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
+ const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
codeRes = coder->codeRes;
dec = &coder->dec;
status = coder->status;
@@ -2679,41 +2743,46 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
truncated = p->parsing_Truncated;
}
else
- return E_FAIL;
+ return SZ_ERROR_FAIL;
+
+ if (p->mainErrorCode != SZ_OK)
+ stat->DecodeRes = p->mainErrorCode;
XzStatInfo_SetStat(dec, p->finishMode,
- p->mtc.readProcessed, p->mtc.inProcessed,
+ // p->mtc.readProcessed,
+ p->mtc.inProcessed,
codeRes, status,
truncated,
stat);
+ }
- if (res == SZ_OK)
+ if (res == SZ_OK)
+ {
+ stat->ReadRes = p->mtc.readRes;
+
+ if (p->writeRes != SZ_OK)
{
- if (p->writeRes != SZ_OK)
- {
- res = p->writeRes;
- stat->CombinedRes_Type = SZ_ERROR_WRITE;
- }
- else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed)
- {
- res = p->mtc.readRes;
- stat->ReadRes = res;
- stat->CombinedRes_Type = SZ_ERROR_READ;
- }
- else if (p->mainErrorCode != SZ_OK)
- {
- res = p->mainErrorCode;
- }
+ res = p->writeRes;
+ stat->CombinedRes_Type = SZ_ERROR_WRITE;
}
-
- stat->CombinedRes = res;
- if (stat->CombinedRes_Type == SZ_OK)
- stat->CombinedRes_Type = res;
- return res;
+ else if (p->mtc.readRes != SZ_OK
+ // && p->mtc.inProcessed == p->mtc.readProcessed
+ && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
+ {
+ res = p->mtc.readRes;
+ stat->CombinedRes_Type = SZ_ERROR_READ;
+ }
+ else if (stat->DecodeRes != SZ_OK)
+ res = stat->DecodeRes;
}
-
- PRF_STR("----- decoding ST -----");
+
+ stat->CombinedRes = res;
+ if (stat->CombinedRes_Type == SZ_OK)
+ stat->CombinedRes_Type = res;
+ return res;
}
+
+ PRF_STR("----- decoding ST -----");
}
#endif
@@ -2729,33 +2798,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
, stat
);
+ #ifndef _7ZIP_ST
+ // we must set error code from MT decoding at first
+ if (p->mainErrorCode != SZ_OK)
+ stat->DecodeRes = p->mainErrorCode;
+ #endif
+
XzStatInfo_SetStat(&p->dec,
p->finishMode,
- p->readProcessed, p->inProcessed,
+ // p->readProcessed,
+ p->inProcessed,
p->codeRes, p->status,
False, // truncated
stat);
+ stat->ReadRes = p->readRes;
+
if (res == SZ_OK)
{
- /*
- if (p->writeRes != SZ_OK)
- {
- res = p->writeRes;
- stat->CombinedRes_Type = SZ_ERROR_WRITE;
- }
- else
- */
- if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed)
+ if (p->readRes != SZ_OK
+ // && p->inProcessed == p->readProcessed
+ && stat->DecodeRes == SZ_ERROR_INPUT_EOF)
{
+ // we set read error as combined error, only if that error was the reason
+ // of decoding problem
res = p->readRes;
- stat->ReadRes = res;
stat->CombinedRes_Type = SZ_ERROR_READ;
}
- #ifndef _7ZIP_ST
- else if (p->mainErrorCode != SZ_OK)
- res = p->mainErrorCode;
- #endif
+ else if (stat->DecodeRes != SZ_OK)
+ res = stat->DecodeRes;
}
stat->CombinedRes = res;
diff --git a/multiarc/src/formats/7z/C/XzEnc.c b/multiarc/src/formats/7z/C/XzEnc.c
index d0a8b448..be174ccc 100644..100755
--- a/multiarc/src/formats/7z/C/XzEnc.c
+++ b/multiarc/src/formats/7z/C/XzEnc.c
@@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
-2019-02-02 : Igor Pavlov : Public domain */
+2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -36,7 +36,7 @@
#define XzBlock_ClearFlags(p) (p)->flags = 0;
-#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1);
+#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1));
#define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE;
#define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE;
@@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
- t2r = (unsigned)numBlocks;
+ t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -751,7 +751,8 @@ static SRes Xz_CompressBlock(
}
else if (fp->ipDefined)
{
- SetUi32(filter->props, fp->ip);
+ Byte *ptr = filter->props;
+ SetUi32(ptr, fp->ip);
filter->propsSize = 4;
}
}
@@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
p->outBufSize = destBlockSize;
}
- p->mtCoder.numThreadsMax = props->numBlockThreads_Max;
+ p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
RINOK(MtCoder_Code(&p->mtCoder));
diff --git a/multiarc/src/formats/7z/C/XzEnc.h b/multiarc/src/formats/7z/C/XzEnc.h
index 0c29e7e1..0c29e7e1 100644..100755
--- a/multiarc/src/formats/7z/C/XzEnc.h
+++ b/multiarc/src/formats/7z/C/XzEnc.h
diff --git a/multiarc/src/formats/7z/C/XzIn.c b/multiarc/src/formats/7z/C/XzIn.c
index ff48e2dd..84f868ec 100644..100755
--- a/multiarc/src/formats/7z/C/XzIn.c
+++ b/multiarc/src/formats/7z/C/XzIn.c
@@ -1,5 +1,5 @@
/* XzIn.c - Xz input
-2018-07-04 : Igor Pavlov : Public domain */
+2021-09-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -26,7 +26,8 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream)
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
- if (s == 0) return SZ_ERROR_ARCHIVE; pos += s; }
+ if (s == 0) return SZ_ERROR_ARCHIVE; \
+ pos += s; }
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
{
@@ -152,7 +153,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
{
UInt64 indexSize;
Byte buf[XZ_STREAM_FOOTER_SIZE];
- UInt64 pos = *startOffset;
+ UInt64 pos = (UInt64)*startOffset;
if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
@@ -202,8 +203,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
if (!XzFlags_IsSupported(p->flags))
return SZ_ERROR_UNSUPPORTED;
- if (GetUi32(buf) != CrcCalc(buf + 4, 6))
- return SZ_ERROR_ARCHIVE;
+ {
+ /* to eliminate GCC 6.3 warning:
+ dereferencing type-punned pointer will break strict-aliasing rules */
+ const Byte *buf_ptr = buf;
+ if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6))
+ return SZ_ERROR_ARCHIVE;
+ }
indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2;
@@ -222,7 +228,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
return SZ_ERROR_ARCHIVE;
pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
RINOK(LookInStream_SeekTo(stream, pos));
- *startOffset = pos;
+ *startOffset = (Int64)pos;
}
{
CXzStreamFlags headerFlags;
@@ -294,12 +300,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
SRes res;
Xz_Construct(&st);
res = Xz_ReadBackward(&st, stream, startOffset, alloc);
- st.startOffset = *startOffset;
+ st.startOffset = (UInt64)*startOffset;
RINOK(res);
if (p->num == p->numAllocated)
{
- size_t newNum = p->num + p->num / 4 + 1;
- Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
+ const size_t newNum = p->num + p->num / 4 + 1;
+ void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
if (!data)
return SZ_ERROR_MEM;
p->numAllocated = newNum;
@@ -311,8 +317,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
p->streams[p->num++] = st;
if (*startOffset == 0)
break;
- RINOK(LookInStream_SeekTo(stream, *startOffset));
- if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK)
+ RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset));
+ if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS;
}
return SZ_OK;