Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kornelski/7z.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Pavlov <ipavlov@users.sourceforge.net>2021-11-29 06:01:13 +0300
committerfn ⌃ ⌥ <70830482+FnControlOption@users.noreply.github.com>2021-11-29 06:01:13 +0300
commitd789d4137d8a7c16696c5bc1b13f24bb887eb7ea (patch)
tree6a7e0bf0e9b292d92dc91c6ef68a579fe2adcbe7
parent585698650f7257d2cefa6a3a2a49d5bbe84fd9b2 (diff)
21.0321.03
-rw-r--r--Asm/x86/7zAsm.asm27
-rw-r--r--Asm/x86/LzFindOpt.asm513
-rw-r--r--C/7zTypes.h7
-rw-r--r--C/7zVersion.h6
-rw-r--r--C/Aes.c10
-rw-r--r--C/Alloc.c8
-rw-r--r--C/Alloc.h9
-rw-r--r--C/CpuArch.c62
-rw-r--r--C/CpuArch.h29
-rw-r--r--C/LzFind.c994
-rw-r--r--C/LzFind.h41
-rw-r--r--C/LzFindMt.c860
-rw-r--r--C/LzFindMt.h36
-rw-r--r--C/LzFindOpt.c578
-rw-r--r--C/LzmaEnc.c318
-rw-r--r--C/MtCoder.c9
-rw-r--r--C/Sha1.c36
-rw-r--r--C/Threads.c40
-rw-r--r--C/Threads.h10
-rw-r--r--C/Util/LzmaLib/LzmaLib.dsp4
-rw-r--r--C/Util/LzmaLib/makefile1
-rw-r--r--C/var_clang_x64.mak1
-rw-r--r--C/var_clang_x86.mak1
-rw-r--r--C/var_gcc_x86.mak1
-rw-r--r--C/warn_gcc.mak2
-rw-r--r--CPP/7zip/7zip_gcc.mak14
-rw-r--r--CPP/7zip/Archive/7z/7zHandlerOut.cpp2
-rw-r--r--CPP/7zip/Archive/Zip/ZipHeader.h1
-rw-r--r--CPP/7zip/Archive/Zip/ZipItem.cpp1
-rw-r--r--CPP/7zip/Bundles/Alone/Alone.dsp118
-rw-r--r--CPP/7zip/Bundles/Alone/makefile3
-rw-r--r--CPP/7zip/Bundles/Alone/makefile.gcc2
-rw-r--r--CPP/7zip/Bundles/Alone7z/Alone.dsp30
-rw-r--r--CPP/7zip/Bundles/Alone7z/makefile2
-rw-r--r--CPP/7zip/Bundles/Alone7z/makefile.gcc2
-rw-r--r--CPP/7zip/Bundles/Fm/FM.dsp9
-rw-r--r--CPP/7zip/Bundles/Format7z/makefile2
-rw-r--r--CPP/7zip/Bundles/Format7zF/Arc.mak2
-rw-r--r--CPP/7zip/Bundles/Format7zF/Arc_gcc.mak2
-rw-r--r--CPP/7zip/Bundles/Format7zF/Format7z.dsp20
-rw-r--r--CPP/7zip/Bundles/Format7zR/makefile2
-rw-r--r--CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp17
-rw-r--r--CPP/7zip/Bundles/LzmaCon/makefile3
-rw-r--r--CPP/7zip/Bundles/LzmaCon/makefile.gcc3
-rw-r--r--CPP/7zip/Common/MemBlocks.cpp4
-rw-r--r--CPP/7zip/Common/MethodProps.cpp66
-rw-r--r--CPP/7zip/Common/MethodProps.h85
-rw-r--r--CPP/7zip/Common/OffsetStream.cpp2
-rw-r--r--CPP/7zip/Common/StreamBinder.cpp4
-rw-r--r--CPP/7zip/Compress/DeflateDecoder.cpp15
-rw-r--r--CPP/7zip/Compress/DeflateEncoder.cpp24
-rw-r--r--CPP/7zip/Compress/LzmaEncoder.cpp24
-rw-r--r--CPP/7zip/Compress/PpmdEncoder.cpp46
-rw-r--r--CPP/7zip/LzFindOpt.mak7
-rw-r--r--CPP/7zip/UI/Common/ArchiveCommandLine.cpp2
-rw-r--r--CPP/7zip/UI/Common/ArchiveCommandLine.h1
-rw-r--r--CPP/7zip/UI/Common/Bench.cpp614
-rw-r--r--CPP/7zip/UI/Common/Bench.h67
-rw-r--r--CPP/7zip/UI/Common/CompressCall2.cpp6
-rw-r--r--CPP/7zip/UI/Console/Main.cpp68
-rw-r--r--CPP/7zip/UI/FileManager/AboutDialog.cpp2
-rw-r--r--CPP/7zip/UI/FileManager/ProgressDialog.cpp7
-rw-r--r--CPP/7zip/UI/FileManager/ProgressDialog2.cpp34
-rw-r--r--CPP/7zip/UI/FileManager/ProgressDialog2.h2
-rw-r--r--CPP/7zip/UI/FileManager/ProgressDialog2Res.h3
-rw-r--r--CPP/7zip/UI/FileManager/ProgressDialog2a.rc9
-rw-r--r--CPP/7zip/UI/GUI/BenchmarkDialog.cpp1687
-rw-r--r--CPP/7zip/UI/GUI/BenchmarkDialog.h185
-rw-r--r--CPP/7zip/UI/GUI/BenchmarkDialog.rc121
-rw-r--r--CPP/7zip/UI/GUI/BenchmarkDialogRes.h10
-rw-r--r--CPP/7zip/UI/GUI/CompressDialog.cpp367
-rw-r--r--CPP/7zip/UI/GUI/CompressDialog.h25
-rw-r--r--CPP/7zip/UI/GUI/GUI.cpp7
-rw-r--r--CPP/7zip/UI/GUI/GUI.dsp4
-rw-r--r--CPP/7zip/UI/GUI/UpdateGUI.cpp8
-rw-r--r--CPP/7zip/warn_clang_mac.mak2
-rw-r--r--CPP/7zip/warn_gcc.mak4
-rw-r--r--CPP/Common/LzFindPrepare.cpp7
-rw-r--r--CPP/Common/MyBuffer2.h30
-rw-r--r--CPP/Windows/Control/Dialog.cpp9
-rw-r--r--CPP/Windows/Control/Dialog.h20
-rw-r--r--CPP/Windows/ErrorMsg.cpp8
-rw-r--r--CPP/Windows/Registry.cpp36
-rw-r--r--CPP/Windows/Synchronization.h4
-rw-r--r--CPP/Windows/SystemInfo.cpp263
-rw-r--r--CPP/Windows/SystemInfo.h6
-rw-r--r--DOC/7zip.wxs2
-rw-r--r--DOC/readme.txt88
88 files changed, 5972 insertions, 1851 deletions
diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm
index dde40da6..34c278cb 100644
--- a/Asm/x86/7zAsm.asm
+++ b/Asm/x86/7zAsm.asm
@@ -1,5 +1,5 @@
; 7zAsm.asm -- ASM macros
-; 2021-02-07 : Igor Pavlov : Public domain
+; 2021-07-13 : Igor Pavlov : Public domain
ifdef RAX
x64 equ 1
@@ -171,6 +171,7 @@ endm
; for fastcall and for WIN-x64
REG_PARAM_0_x equ x1
REG_PARAM_0 equ r1
+REG_PARAM_1_x equ x2
REG_PARAM_1 equ r2
ifndef x64
@@ -178,6 +179,7 @@ ifndef x64
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
+REG_ABI_PARAM_1_x equ REG_PARAM_1_x
REG_ABI_PARAM_1 equ REG_PARAM_1
else
@@ -186,28 +188,39 @@ else
if (IS_LINUX eq 0)
; for WIN-x64:
-REG_PARAM_2 equ r8
-REG_PARAM_3 equ r9
+REG_PARAM_2_x equ x8
+REG_PARAM_2 equ r8
+REG_PARAM_3 equ r9
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
+REG_ABI_PARAM_1_x equ REG_PARAM_1_x
REG_ABI_PARAM_1 equ REG_PARAM_1
+REG_ABI_PARAM_2_x equ REG_PARAM_2_x
REG_ABI_PARAM_2 equ REG_PARAM_2
REG_ABI_PARAM_3 equ REG_PARAM_3
else
; for LINUX-x64:
REG_LINUX_PARAM_0_x equ x7
-REG_LINUX_PARAM_0 equ r7
-REG_LINUX_PARAM_1 equ r6
-REG_LINUX_PARAM_2 equ r2
-REG_LINUX_PARAM_3 equ r1
+REG_LINUX_PARAM_0 equ r7
+REG_LINUX_PARAM_1_x equ x6
+REG_LINUX_PARAM_1 equ r6
+REG_LINUX_PARAM_2 equ r2
+REG_LINUX_PARAM_3 equ r1
+REG_LINUX_PARAM_4_x equ x8
+REG_LINUX_PARAM_4 equ r8
+REG_LINUX_PARAM_5 equ r9
REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x
REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0
+REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x
REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1
REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2
REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3
+REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x
+REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4
+REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5
MY_ABI_LINUX_TO_WIN_2 macro
mov r2, r6
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm
new file mode 100644
index 00000000..bc3a6fe7
--- /dev/null
+++ b/Asm/x86/LzFindOpt.asm
@@ -0,0 +1,513 @@
+; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
+; 2021-07-13: Igor Pavlov : Public domain
+;
+
+ifndef x64
+; x64=1
+; .err <x64_IS_REQUIRED>
+endif
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
+
+MY_ALIGN macro num:req
+ align num
+endm
+
+MY_ALIGN_32 macro
+ MY_ALIGN 32
+endm
+
+MY_ALIGN_64 macro
+ MY_ALIGN 64
+endm
+
+
+t0_L equ x0_L
+t0_x equ x0
+t0 equ r0
+t1_x equ x3
+t1 equ r3
+
+cp_x equ t1_x
+cp_r equ t1
+m equ x5
+m_r equ r5
+len_x equ x6
+len equ r6
+diff_x equ x7
+diff equ r7
+len0 equ r10
+len1_x equ x11
+len1 equ r11
+maxLen_x equ x12
+maxLen equ r12
+d equ r13
+ptr0 equ r14
+ptr1 equ r15
+
+d_lim equ m_r
+cycSize equ len_x
+hash_lim equ len0
+delta1_x equ len1_x
+delta1_r equ len1
+delta_x equ maxLen_x
+delta_r equ maxLen
+hash equ ptr0
+src equ ptr1
+
+
+
+if (IS_LINUX gt 0)
+
+; r1 r2 r8 r9 : win32
+; r7 r6 r2 r1 r8 r9 : linux
+
+lenLimit equ r8
+lenLimit_x equ x8
+; pos_r equ r2
+pos equ x2
+cur equ r1
+son equ r9
+
+else
+
+lenLimit equ REG_ABI_PARAM_2
+lenLimit_x equ REG_ABI_PARAM_2_x
+pos equ REG_ABI_PARAM_1_x
+cur equ REG_ABI_PARAM_0
+son equ REG_ABI_PARAM_3
+
+endif
+
+
+if (IS_LINUX gt 0)
+ maxLen_OFFS equ (REG_SIZE * (6 + 1))
+else
+ cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))
+ d_OFFS equ (REG_SIZE + cutValue_OFFS)
+ maxLen_OFFS equ (REG_SIZE + d_OFFS)
+endif
+ hash_OFFS equ (REG_SIZE + maxLen_OFFS)
+ limit_OFFS equ (REG_SIZE + hash_OFFS)
+ size_OFFS equ (REG_SIZE + limit_OFFS)
+ cycPos_OFFS equ (REG_SIZE + size_OFFS)
+ cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)
+ posRes_OFFS equ (REG_SIZE + cycSize_OFFS)
+
+if (IS_LINUX gt 0)
+else
+ cutValue_PAR equ [r0 + cutValue_OFFS]
+ d_PAR equ [r0 + d_OFFS]
+endif
+ maxLen_PAR equ [r0 + maxLen_OFFS]
+ hash_PAR equ [r0 + hash_OFFS]
+ limit_PAR equ [r0 + limit_OFFS]
+ size_PAR equ [r0 + size_OFFS]
+ cycPos_PAR equ [r0 + cycPos_OFFS]
+ cycSize_PAR equ [r0 + cycSize_OFFS]
+ posRes_PAR equ [r0 + posRes_OFFS]
+
+
+ cutValue_VAR equ DWORD PTR [r4 + 8 * 0]
+ cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]
+ cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]
+ cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]
+ hash_VAR equ QWORD PTR [r4 + 8 * 2]
+ limit_VAR equ QWORD PTR [r4 + 8 * 3]
+ size_VAR equ QWORD PTR [r4 + 8 * 4]
+ distances equ QWORD PTR [r4 + 8 * 5]
+ maxLen_VAR equ QWORD PTR [r4 + 8 * 6]
+
+ Old_RSP equ QWORD PTR [r4 + 8 * 7]
+ LOCAL_SIZE equ 8 * 8
+
+COPY_VAR_32 macro dest_var, src_var
+ mov x3, src_var
+ mov dest_var, x3
+endm
+
+COPY_VAR_64 macro dest_var, src_var
+ mov r3, src_var
+ mov dest_var, r3
+endm
+
+
+; MY_ALIGN_64
+MY_PROC GetMatchesSpecN_2, 13
+MY_PUSH_PRESERVED_ABI_REGS
+ mov r0, RSP
+ lea r3, [r0 - LOCAL_SIZE]
+ and r3, -64
+ mov RSP, r3
+ mov Old_RSP, r0
+
+if (IS_LINUX gt 0)
+ mov d, REG_ABI_PARAM_5 ; r13 = r9
+ mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8
+ mov son, REG_ABI_PARAM_3 ; r9 = r1
+ mov r8, REG_ABI_PARAM_2 ; r8 = r2
+ mov pos, REG_ABI_PARAM_1_x ; r2 = x6
+ mov r1, REG_ABI_PARAM_0 ; r1 = r7
+else
+ COPY_VAR_32 cutValue_VAR, cutValue_PAR
+ mov d, d_PAR
+endif
+
+ COPY_VAR_64 limit_VAR, limit_PAR
+
+ mov hash_lim, size_PAR
+ mov size_VAR, hash_lim
+
+ mov cp_x, cycPos_PAR
+ mov hash, hash_PAR
+
+ mov cycSize, cycSize_PAR
+ mov cycSize_VAR, cycSize
+
+ ; we want cur in (rcx). So we change the cur and lenLimit variables
+ sub lenLimit, cur
+ neg lenLimit_x
+ inc lenLimit_x
+
+ mov t0_x, maxLen_PAR
+ sub t0, lenLimit
+ mov maxLen_VAR, t0
+
+ jmp main_loop
+
+MY_ALIGN_64
+fill_empty:
+ ; ptr0 = *ptr1 = kEmptyHashValue;
+ mov QWORD PTR [ptr1], 0
+ inc pos
+ inc cp_x
+ mov DWORD PTR [d - 4], 0
+ cmp d, limit_VAR
+ jae fin
+ cmp hash, hash_lim
+ je fin
+
+; MY_ALIGN_64
+main_loop:
+ ; UInt32 delta = *hash++;
+ mov diff_x, [hash] ; delta
+ add hash, 4
+ ; mov cycPos_VAR, cp_x
+
+ inc cur
+ add d, 4
+ mov m, pos
+ sub m, diff_x; ; matchPos
+
+ ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ lea ptr1, [son + 8 * cp_r]
+ ; mov cycSize, cycSize_VAR
+ cmp pos, cycSize
+ jb directMode ; if (pos < cycSize_VAR)
+
+ ; CYC MODE
+
+ cmp diff_x, cycSize
+ jae fill_empty ; if (delta >= cycSize_VAR)
+
+ xor t0_x, t0_x
+ mov cycPos_VAR, cp_x
+ sub cp_x, diff_x
+ ; jae prepare_for_tree_loop
+ ; add cp_x, cycSize
+ cmovb t0_x, cycSize
+ add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
+ jmp prepare_for_tree_loop
+
+
+directMode:
+ cmp diff_x, pos
+ je fill_empty ; if (delta == pos)
+ jae fin_error ; if (delta >= pos)
+
+ mov cycPos_VAR, cp_x
+ mov cp_x, m
+
+prepare_for_tree_loop:
+ mov len0, lenLimit
+ mov hash_VAR, hash
+ ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ lea ptr0, [ptr1 + 4]
+ ; UInt32 *_distances = ++d;
+ mov distances, d
+
+ neg len0
+ mov len1, len0
+
+ mov t0_x, cutValue_VAR
+ mov maxLen, maxLen_VAR
+ mov cutValueCur_VAR, t0_x
+
+MY_ALIGN_32
+tree_loop:
+ neg diff
+ mov len, len0
+ cmp len1, len0
+ cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);
+ add diff, cur
+
+ mov t0_x, [son + cp_r * 8] ; prefetch
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ lea cp_r, [son + cp_r * 8]
+ cmp [cur + 1 * len], t0_L
+ je matched_1
+
+ jb left_0
+
+ mov [ptr1], m
+ mov m, [cp_r + 4]
+ lea ptr1, [cp_r + 4]
+ sub diff, cur ; FIX32
+ jmp next_node
+
+MY_ALIGN_32
+left_0:
+ mov [ptr0], m
+ mov m, [cp_r]
+ mov ptr0, cp_r
+ sub diff, cur ; FIX32
+ ; jmp next_node
+
+; ------------ NEXT NODE ------------
+; MY_ALIGN_32
+next_node:
+ mov cycSize, cycSize_VAR
+ dec cutValueCur_VAR
+ je finish_tree
+
+ add diff_x, pos ; prev_match = pos + diff
+ cmp m, diff_x
+ jae fin_error ; if (new_match >= prev_match)
+
+ mov diff_x, pos
+ sub diff_x, m ; delta = pos - new_match
+ cmp pos, cycSize
+ jae cyc_mode_2 ; if (pos >= cycSize)
+
+ mov cp_x, m
+ test m, m
+ jne tree_loop ; if (m != 0)
+
+finish_tree:
+ ; ptr0 = *ptr1 = kEmptyHashValue;
+ mov DWORD PTR [ptr0], 0
+ mov DWORD PTR [ptr1], 0
+
+ inc pos
+
+ ; _distances[-1] = (UInt32)(d - _distances);
+ mov t0, distances
+ mov t1, d
+ sub t1, t0
+ shr t1_x, 2
+ mov [t0 - 4], t1_x
+
+ cmp d, limit_VAR
+ jae fin ; if (d >= limit)
+
+ mov cp_x, cycPos_VAR
+ mov hash, hash_VAR
+ mov hash_lim, size_VAR
+ inc cp_x
+ cmp hash, hash_lim
+ jne main_loop ; if (hash != size)
+ jmp fin
+
+
+MY_ALIGN_32
+cyc_mode_2:
+ cmp diff_x, cycSize
+ jae finish_tree ; if (delta >= cycSize)
+
+ mov cp_x, cycPos_VAR
+ xor t0_x, t0_x
+ sub cp_x, diff_x ; cp_x = cycPos - delta
+ cmovb t0_x, cycSize
+ add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
+ jmp tree_loop
+
+
+MY_ALIGN_32
+matched_1:
+
+ inc len
+ ; cmp len_x, lenLimit_x
+ je short lenLimit_reach
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ cmp [cur + 1 * len], t0_L
+ jne mismatch
+
+
+MY_ALIGN_32
+match_loop:
+ ; while (++len != lenLimit) (len[diff] != len[0]) ;
+
+ inc len
+ ; cmp len_x, lenLimit_x
+ je short lenLimit_reach
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ cmp BYTE PTR [cur + 1 * len], t0_L
+ je match_loop
+
+mismatch:
+ jb left_2
+
+ mov [ptr1], m
+ mov m, [cp_r + 4]
+ lea ptr1, [cp_r + 4]
+ mov len1, len
+
+ jmp max_update
+
+MY_ALIGN_32
+left_2:
+ mov [ptr0], m
+ mov m, [cp_r]
+ mov ptr0, cp_r
+ mov len0, len
+
+max_update:
+ sub diff, cur ; restore diff
+
+ cmp maxLen, len
+ jae next_node
+
+ mov maxLen, len
+ add len, lenLimit
+ mov [d], len_x
+ mov t0_x, diff_x
+ not t0_x
+ mov [d + 4], t0_x
+ add d, 8
+
+ jmp next_node
+
+
+
+MY_ALIGN_32
+lenLimit_reach:
+
+ mov delta_r, cur
+ sub delta_r, diff
+ lea delta1_r, [delta_r - 1]
+
+ mov t0_x, [cp_r]
+ mov [ptr1], t0_x
+ mov t0_x, [cp_r + 4]
+ mov [ptr0], t0_x
+
+ mov [d], lenLimit_x
+ mov [d + 4], delta1_x
+ add d, 8
+
+ ; _distances[-1] = (UInt32)(d - _distances);
+ mov t0, distances
+ mov t1, d
+ sub t1, t0
+ shr t1_x, 2
+ mov [t0 - 4], t1_x
+
+ mov hash, hash_VAR
+ mov hash_lim, size_VAR
+
+ inc pos
+ mov cp_x, cycPos_VAR
+ inc cp_x
+
+ mov d_lim, limit_VAR
+ mov cycSize, cycSize_VAR
+ ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ ; break;
+ cmp hash, hash_lim
+ je fin
+ cmp d, d_lim
+ jae fin
+ cmp delta_x, [hash]
+ jne main_loop
+ movzx t0_x, BYTE PTR [diff]
+ cmp [cur], t0_L
+ jne main_loop
+
+ ; jmp main_loop ; bypass for debug
+
+ mov cycPos_VAR, cp_x
+ shl len, 3 ; cycSize * 8
+ sub diff, cur ; restore diff
+ xor t0_x, t0_x
+ cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)
+ lea cp_r, [son + 8 * cp_r] ; dest
+ lea src, [cp_r + 8 * diff]
+ cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)
+ add src, t0
+ add len, son ; len = son + cycSize * 8
+
+
+MY_ALIGN_32
+long_loop:
+ add hash, 4
+
+ ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+
+ mov t0, [src]
+ add src, 8
+ mov [cp_r], t0
+ add cp_r, 8
+ cmp src, len
+ cmove src, son ; if end of (son) buffer is reached, we wrap to begin
+
+ mov DWORD PTR [d], 2
+ mov [d + 4], lenLimit_x
+ mov [d + 8], delta1_x
+ add d, 12
+
+ inc cur
+
+ cmp hash, hash_lim
+ je long_footer
+ cmp delta_x, [hash]
+ jne long_footer
+ movzx t0_x, BYTE PTR [diff + cur]
+ cmp [cur], t0_L
+ jne long_footer
+ cmp d, d_lim
+ jb long_loop
+
+long_footer:
+ sub cp_r, son
+ shr cp_r, 3
+ add pos, cp_x
+ sub pos, cycPos_VAR
+ mov cycSize, cycSize_VAR
+
+ cmp d, d_lim
+ jae fin
+ cmp hash, hash_lim
+ jne main_loop
+ jmp fin
+
+
+
+fin_error:
+ xor d, d
+
+fin:
+ mov RSP, Old_RSP
+ mov t0, [r4 + posRes_OFFS]
+ mov [t0], pos
+ mov r0, d
+
+MY_POP_PRESERVED_ABI_REGS
+MY_ENDP
+
+_TEXT$LZFINDOPT ENDS
+
+end
diff --git a/C/7zTypes.h b/C/7zTypes.h
index f817b7f5..3f66a7b5 100644
--- a/C/7zTypes.h
+++ b/C/7zTypes.h
@@ -1,5 +1,5 @@
/* 7zTypes.h -- Basic types
-2021-04-25 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
@@ -62,6 +62,8 @@ typedef int SRes;
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+
#else // _WIN32
// #define ENV_HAVE_LSTAT
@@ -95,6 +97,7 @@ typedef int WRes;
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
+#define ERROR_INTERNAL_ERROR 1359L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
@@ -206,6 +209,8 @@ typedef size_t SIZE_T;
#endif // _WIN32
+#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
+
#ifdef _SZ_NO_INT_64
diff --git a/C/7zVersion.h b/C/7zVersion.h
index 57c4c205..e7fff57d 100644
--- a/C/7zVersion.h
+++ b/C/7zVersion.h
@@ -1,7 +1,7 @@
#define MY_VER_MAJOR 21
-#define MY_VER_MINOR 02
+#define MY_VER_MINOR 03
#define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "21.02 alpha"
+#define MY_VERSION_NUMBERS "21.03 beta"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,7 +10,7 @@
#define MY_VERSION_CPU MY_VERSION
#endif
-#define MY_DATE "2021-05-06"
+#define MY_DATE "2021-07-20"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
diff --git a/C/Aes.c b/C/Aes.c
index 4436a3c6..27e32e62 100644
--- a/C/Aes.c
+++ b/C/Aes.c
@@ -1,5 +1,5 @@
/* Aes.c -- AES encryption / decryption
-2021-04-01 : Igor Pavlov : Public domain */
+2021-05-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -365,10 +365,10 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
#ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)(void *)data) ^= t;
#else
- data[0] ^= (t & 0xFF);
- data[1] ^= ((t >> 8) & 0xFF);
- data[2] ^= ((t >> 16) & 0xFF);
- data[3] ^= ((t >> 24));
+ data[0] = (Byte)(data[0] ^ (t & 0xFF));
+ data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF));
+ data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF));
+ data[3] = (Byte)(data[3] ^ ((t >> 24)));
#endif
}
}
diff --git a/C/Alloc.c b/C/Alloc.c
index b350deb7..d1af76c5 100644
--- a/C/Alloc.c
+++ b/C/Alloc.c
@@ -1,5 +1,5 @@
/* Alloc.c -- Memory allocation functions
-2020-10-29 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -247,14 +247,14 @@ static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
-const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
-
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
-
+#endif
/*
uintptr_t : <stdint.h> C99 (optional)
diff --git a/C/Alloc.h b/C/Alloc.h
index b32b6767..3be2041e 100644
--- a/C/Alloc.h
+++ b/C/Alloc.h
@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
-2021-02-08 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
@@ -30,8 +30,15 @@ void BigFree(void *address);
#endif
extern const ISzAlloc g_Alloc;
+
+#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
+#else
+#define g_BigAlloc g_AlignedAlloc
+#define g_MidAlloc g_AlignedAlloc
+#endif
+
extern const ISzAlloc g_AlignedAlloc;
diff --git a/C/CpuArch.c b/C/CpuArch.c
index 1d761416..fa9afe39 100644
--- a/C/CpuArch.c
+++ b/C/CpuArch.c
@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
-2021-04-28 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -278,6 +278,30 @@ BoolInt CPU_IsSupported_SHA()
#include <Windows.h>
#endif
+BoolInt CPU_IsSupported_AVX2()
+{
+ Cx86cpuid p;
+ CHECK_SYS_SSE_SUPPORT
+
+ #ifdef _WIN32
+ #define MY__PF_XSAVE_ENABLED 17
+ if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
+ return False;
+ #endif
+
+ if (!x86cpuid_CheckAndRead(&p))
+ return False;
+ if (p.maxFunc < 7)
+ return False;
+ {
+ UInt32 d[4] = { 0 };
+ MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+ // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+ return 1
+ & (d[1] >> 5); // avx2
+ }
+}
+
BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
@@ -329,10 +353,9 @@ BoolInt CPU_IsSupported_PageGB()
#include <Windows.h>
-BoolInt CPU_IsSupported_CRC32()
- { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
-BoolInt CPU_IsSupported_CRYPTO()
- { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
@@ -356,17 +379,27 @@ static void Print_sysctlbyname(const char *name)
}
*/
-BoolInt CPU_IsSupported_CRC32(void)
+static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
{
+ UInt32 val = 0;
+ if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+ return 1;
+ return 0;
+}
+
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
- UInt32 val = 0;
- if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1)
- return 1;
- return 0;
+BoolInt CPU_IsSupported_CRC32(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+}
+
+BoolInt CPU_IsSupported_NEON(void)
+{
+ return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
}
#ifdef MY_CPU_ARM64
@@ -390,18 +423,25 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#include <asm/hwcap.h>
+ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+ BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
+
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
- BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; }
+ MY_HWCAP_CHECK_FUNC_2(name, name)
+ MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
+ MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
+ MY_HWCAP_CHECK_FUNC(NEON)
#endif // USE_HWCAP
diff --git a/C/CpuArch.h b/C/CpuArch.h
index 6c4ab404..529d3a50 100644
--- a/C/CpuArch.h
+++ b/C/CpuArch.h
@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
-2021-04-25 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@@ -225,7 +225,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#else
#ifdef __xlC__
- // for XLC compiler:
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
@@ -253,8 +252,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
- || defined(MY_CPU_ARM64) \
- || defined(__ARM_FEATURE_UNALIGNED)
+ || defined(MY_CPU_ARM64)
+ #define MY_CPU_LE_UNALIGN
+ #define MY_CPU_LE_UNALIGN_64
+ #elif defined(__ARM_FEATURE_UNALIGNED)
+ /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
+ So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
@@ -264,11 +267,15 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+#endif
#else
@@ -282,8 +289,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
-#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
-
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
@@ -294,12 +299,22 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
+#endif
+
+
+#ifndef MY_CPU_LE_UNALIGN_64
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
+
+
+
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
@@ -392,6 +407,7 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
@@ -401,6 +417,7 @@ BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
+BoolInt CPU_IsSupported_NEON(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);
diff --git a/C/LzFind.c b/C/LzFind.c
index 95966f06..3b32eae8 100644
--- a/C/LzFind.c
+++ b/C/LzFind.c
@@ -1,19 +1,29 @@
/* LzFind.c -- Match finder for LZ algorithms
-2021-04-01 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
+// #include <stdio.h>
#include "CpuArch.h"
#include "LzFind.h"
#include "LzHash.h"
+#define kBlockMoveAlign (1 << 7) // alignment for memmove()
+#define kBlockSizeAlign (1 << 16) // alignment for block allocation
+#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary
+
#define kEmptyHashValue 0
-#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
-#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
-#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
-#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kMaxValForNormalize ((UInt32)0)
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
+
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+
+#define GET_AVAIL_BYTES(p) \
+ Inline_MatchFinder_GetNumAvailableBytes(p)
+
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define kFix5HashSize kFix4HashSize
@@ -64,46 +74,57 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
}
}
-/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
-static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc)
{
- UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
- if (p->directInput)
- {
- p->blockSize = blockSize;
- return 1;
- }
+ if (blockSize == 0)
+ return 0;
if (!p->bufferBase || p->blockSize != blockSize)
{
+ // size_t blockSizeT;
LzInWindow_Free(p, alloc);
p->blockSize = blockSize;
- p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+ // blockSizeT = blockSize;
+
+ // printf("\nblockSize = 0x%x\n", blockSize);
+ /*
+ #if defined _WIN64
+ // we can allocate 4GiB, but still use UInt32 for (p->blockSize)
+ // we use UInt32 type for (p->blockSize), because
+ // we don't want to wrap over 4 GiB,
+ // when we use (p->streamPos - p->pos) that is UInt32.
+ if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign)
+ {
+ blockSizeT = ((size_t)1 << 32);
+ printf("\nchanged to blockSizeT = 4GiB\n");
+ }
+ #endif
+ */
+
+ p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+ // printf("\nbufferBase = %p\n", p->bufferBase);
+ // return 0; // for debug
}
return (p->bufferBase != NULL);
}
-static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
-static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
-{
- p->posLimit -= subValue;
- p->pos -= subValue;
- p->streamPos -= subValue;
-}
+MY_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p)
{
if (p->streamEndWasReached || p->result != SZ_OK)
return;
- /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+ /* We use (p->streamPos - p->pos) value.
+ (p->streamPos < p->pos) is allowed. */
if (p->directInput)
{
- UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+ UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem;
p->directInputRem -= curSize;
@@ -115,10 +136,22 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;)
{
- Byte *dest = p->buffer + (p->streamPos - p->pos);
+ Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
if (size == 0)
+ {
+ /* we call ReadBlock() after NeedMove() and MoveBlock().
+ NeedMove() and MoveBlock() povide more than (keepSizeAfter)
+ to the end of (blockSize).
+ So we don't execute this branch in normal code flow.
+ We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock().
+ */
+ // p->result = SZ_ERROR_FAIL; // we can show error here
return;
+ }
+
+ // #define kRead 3
+ // if (size > kRead) size = kRead; // for debug
p->result = ISeqInStream_Read(p->stream, dest, &size);
if (p->result != SZ_OK)
@@ -129,41 +162,52 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
return;
}
p->streamPos += (UInt32)size;
- if (p->streamPos - p->pos > p->keepSizeAfter)
+ if (GET_AVAIL_BYTES(p) > p->keepSizeAfter)
return;
+ /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function
+ (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */
}
+
+ // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter)
}
+
+
+MY_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p)
{
+ const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
+ const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+ p->buffer = p->bufferBase + keepBefore;
memmove(p->bufferBase,
- p->buffer - p->keepSizeBefore,
- (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
- p->buffer = p->bufferBase + p->keepSizeBefore;
+ p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+ keepBefore + (size_t)GET_AVAIL_BYTES(p));
}
+/* We call MoveBlock() before ReadBlock().
+ So MoveBlock() can be wasteful operation, if the whole input data
+ can fit in current block even without calling MoveBlock().
+ in important case where (dataSize <= historySize)
+ condition (p->blockSize > dataSize + p->keepSizeAfter) is met
+ So there is no MoveBlock() in that case case.
+*/
+
int MatchFinder_NeedMove(CMatchFinder *p)
{
if (p->directInput)
return 0;
- /* if (p->streamEndWasReached) return 0; */
+ if (p->streamEndWasReached || p->result != SZ_OK)
+ return 0;
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
}
void MatchFinder_ReadIfRequired(CMatchFinder *p)
{
- if (p->streamEndWasReached)
- return;
- if (p->keepSizeAfter >= p->streamPos - p->pos)
+ if (p->keepSizeAfter >= GET_AVAIL_BYTES(p))
MatchFinder_ReadBlock(p);
}
-static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
-{
- if (MatchFinder_NeedMove(p))
- MatchFinder_MoveBlock(p);
- MatchFinder_ReadBlock(p);
-}
+
static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
{
@@ -214,32 +258,67 @@ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
}
-int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
- UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
- ISzAllocPtr alloc)
+#if (kBlockSizeReserveMin < kBlockSizeAlign * 2)
+ #error Stop_Compiling_Bad_Reserve
+#endif
+
+
+
+static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
{
- UInt32 sizeReserv;
-
+ UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter);
+ /*
if (historySize > kMaxHistorySize)
- {
- MatchFinder_Free(p, alloc);
return 0;
- }
+ */
+ // printf("\nhistorySize == 0x%x\n", historySize);
- sizeReserv = historySize >> 1;
- if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
- else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+ if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow
+ return 0;
- sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign;
+ const UInt32 rem = kBlockSizeMax - blockSize;
+ const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2))
+ + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here
+ if (blockSize >= kBlockSizeMax
+ || rem < kBlockSizeReserveMin) // we reject settings that will be slow
+ return 0;
+ if (reserve >= rem)
+ blockSize = kBlockSizeMax;
+ else
+ {
+ blockSize += reserve;
+ blockSize &= ~(UInt32)(kBlockSizeAlign - 1);
+ }
+ }
+ // printf("\n LzFind_blockSize = %x\n", blockSize);
+ // printf("\n LzFind_blockSize = %d\n", blockSize >> 20);
+ return blockSize;
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+ UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+ ISzAllocPtr alloc)
+{
+ /* we need one additional byte in (p->keepSizeBefore),
+ since we use MoveBlock() after (p->pos++) and before dictionary using */
+ // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug
p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
- p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
-
- /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
-
- if (LzInWindow_Create(p, sizeReserv, alloc))
+
+ keepAddBufferAfter += matchMaxLen;
+ /* we need (p->keepSizeAfter >= p->numHashBytes) */
+ if (keepAddBufferAfter < p->numHashBytes)
+ keepAddBufferAfter = p->numHashBytes;
+ // keepAddBufferAfter -= 2; // for debug
+ p->keepSizeAfter = keepAddBufferAfter;
+
+ if (p->directInput)
+ p->blockSize = 0;
+ if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
{
- UInt32 newCyclicBufferSize = historySize + 1;
+ const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{
@@ -299,7 +378,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
size_t numSons;
p->historySize = historySize;
p->hashSizeSum = hs;
- p->cyclicBufferSize = newCyclicBufferSize;
+ p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize;
if (p->btMode)
@@ -329,33 +408,43 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
return 0;
}
+
static void MatchFinder_SetLimits(CMatchFinder *p)
{
- UInt32 limit = kMaxValForNormalize - p->pos;
- UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
-
- if (limit2 < limit)
- limit = limit2;
- limit2 = p->streamPos - p->pos;
+ UInt32 k;
+ UInt32 n = kMaxValForNormalize - p->pos;
+ if (n == 0)
+ n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0)
- if (limit2 <= p->keepSizeAfter)
+ k = p->cyclicBufferSize - p->cyclicBufferPos;
+ if (k < n)
+ n = k;
+
+ k = GET_AVAIL_BYTES(p);
{
- if (limit2 > 0)
- limit2 = 1;
+ const UInt32 ksa = p->keepSizeAfter;
+ UInt32 mm = p->matchMaxLen;
+ if (k > ksa)
+ k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock
+ else if (k >= mm)
+ {
+ // the limitation for (p->lenLimit) update
+ k -= mm; // optimization : to reduce the number of checks
+ k++;
+ // k = 1; // non-optimized version : for debug
+ }
+ else
+ {
+ mm = k;
+ if (k != 0)
+ k = 1;
+ }
+ p->lenLimit = mm;
}
- else
- limit2 -= p->keepSizeAfter;
-
- if (limit2 < limit)
- limit = limit2;
+ if (k < n)
+ n = k;
- {
- UInt32 lenLimit = p->streamPos - p->pos;
- if (lenLimit > p->matchMaxLen)
- lenLimit = p->matchMaxLen;
- p->lenLimit = lenLimit;
- }
- p->posLimit = p->pos + limit;
+ p->posLimit = p->pos + n;
}
@@ -363,7 +452,7 @@ void MatchFinder_Init_LowHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash;
- size_t numItems = p->fixedHashSize;
+ const size_t numItems = p->fixedHashSize;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
@@ -373,80 +462,315 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
{
size_t i;
CLzRef *items = p->hash + p->fixedHashSize;
- size_t numItems = (size_t)p->hashMask + 1;
+ const size_t numItems = (size_t)p->hashMask + 1;
for (i = 0; i < numItems; i++)
items[i] = kEmptyHashValue;
}
-void MatchFinder_Init_3(CMatchFinder *p, int readData)
+void MatchFinder_Init_4(CMatchFinder *p)
{
- p->cyclicBufferPos = 0;
p->buffer = p->bufferBase;
- p->pos =
- p->streamPos = p->cyclicBufferSize;
+ {
+ /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+ the code in CMatchFinderMt expects (pos = 1) */
+ p->pos =
+ p->streamPos =
+ 1; // it's smallest optimal value. do not change it
+ // 0; // for debug
+ }
p->result = SZ_OK;
p->streamEndWasReached = 0;
-
- if (readData)
- MatchFinder_ReadBlock(p);
-
- MatchFinder_SetLimits(p);
}
+// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
void MatchFinder_Init(CMatchFinder *p)
{
MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p);
- MatchFinder_Init_3(p, True);
+ MatchFinder_Init_4(p);
+ // if (readData)
+ MatchFinder_ReadBlock(p);
+
+ /* if we init (cyclicBufferPos = pos), then we can use one variable
+ instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos)
+ // p->cyclicBufferPos = 0; // smallest value
+ // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses.
+ MatchFinder_SetLimits(p);
}
-
-static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8) \
+ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+ #define USE_SATUR_SUB_128
+ #define USE_AVX2
+ #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+ #define ATTRIB_AVX2 __attribute__((__target__("avx2")))
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1600)
+ #define USE_SATUR_SUB_128
+ #if (_MSC_VER >= 1900)
+ #define USE_AVX2
+ #include <immintrin.h> // avx
+ #endif
+ #endif
+ #endif
+
+// #elif defined(MY_CPU_ARM_OR_ARM64)
+#elif defined(MY_CPU_ARM64)
+
+ #if defined(__clang__) && (__clang_major__ >= 8) \
+ || defined(__GNUC__) && (__GNUC__ >= 8)
+ #define USE_SATUR_SUB_128
+ #ifdef MY_CPU_ARM64
+ // #define ATTRIB_SSE41 __attribute__((__target__("")))
+ #else
+ // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+ #endif
+
+ #elif defined(_MSC_VER)
+ #if (_MSC_VER >= 1910)
+ #define USE_SATUR_SUB_128
+ #endif
+ #endif
+
+ #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
+ #include <arm_neon.h>
+ #endif
+
+#endif
+
+/*
+#ifndef ATTRIB_SSE41
+ #define ATTRIB_SSE41
+#endif
+#ifndef ATTRIB_AVX2
+ #define ATTRIB_AVX2
+#endif
+*/
+
+#ifdef USE_SATUR_SUB_128
+
+// #define _SHOW_HW_STATUS
+
+#ifdef _SHOW_HW_STATUS
+#include <stdio.h>
+#define _PRF(x) x
+_PRF(;)
+#else
+#define _PRF(x)
+#endif
+
+#ifdef MY_CPU_ARM_OR_ARM64
+
+#ifdef MY_CPU_ARM64
+// #define FORCE_SATUR_SUB_128
+#endif
+
+typedef uint32x4_t v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
+
+#else
+
+#include <smmintrin.h> // sse4.1
+
+typedef __m128i v128;
+#define SASUB_128(i) \
+ *(v128 *)(void *)(items + (i) * 4) = \
+ _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
+
+#endif
+
+
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_SSE41
+ATTRIB_SSE41
+#endif
+void LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- return (p->pos - p->historySize - 1) & kNormalizeMask;
+ v128 sub2 =
+ #ifdef MY_CPU_ARM_OR_ARM64
+ vdupq_n_u32(subValue);
+ #else
+ _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ #endif
+ do
+ {
+ SASUB_128(0)
+ SASUB_128(1)
+ SASUB_128(2)
+ SASUB_128(3)
+ items += 4 * 4;
+ }
+ while (items != lim);
}
-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+
+
+#ifdef USE_AVX2
+
+#include <immintrin.h> // avx
+
+#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
+
+MY_NO_INLINE
+static
+#ifdef ATTRIB_AVX2
+ATTRIB_AVX2
+#endif
+void LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{
- if (numItems == 0)
- return;
- {
- const CLzRef *lim = items + numItems - 1;
- for (; items < lim; items += 2)
+ __m256i sub2 = _mm256_set_epi32(
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+ (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+ do
{
- UInt32 v, m;
- v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
- v = items[1]; items[0] = m; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
- items[1] = m;
+ SASUB_256(0)
+ SASUB_256(1)
+ items += 2 * 8;
}
- if (items == lim)
+ while (items != lim);
+}
+#endif // USE_AVX2
+
+#ifndef FORCE_SATUR_SUB_128
+typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+ UInt32 subValue, CLzRef *items, const CLzRef *lim);
+static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+#endif // FORCE_SATUR_SUB_128
+
+#endif // USE_SATUR_SUB_128
+
+
+// kEmptyHashValue must be zero
+// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m;
+#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue;
+
+#ifdef FORCE_SATUR_SUB_128
+
+#define DEFAULT_SaturSub LzFind_SaturSub_128
+
+#else
+
+#define DEFAULT_SaturSub LzFind_SaturSub_32
+
+MY_NO_INLINE
+static void LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+{
+ do
{
- UInt32 v, m;
- v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
- items[0] = m;
- }
+ UInt32 v;
+ SASUB_32(0)
+ SASUB_32(1)
+ SASUB_32(2)
+ SASUB_32(3)
+ SASUB_32(4)
+ SASUB_32(5)
+ SASUB_32(6)
+ SASUB_32(7)
+ items += 8;
}
+ while (items != lim);
}
-static void MatchFinder_Normalize(CMatchFinder *p)
+#endif
+
+
+MY_NO_INLINE
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{
- UInt32 subValue = MatchFinder_GetSubValue(p);
- MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
- MatchFinder_ReduceOffsets(p, subValue);
+ #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
+
+ CLzRef *lim;
+
+ for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
+
+ {
+ #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
+ lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
+ numItems &= K_NORM_ALIGN_MASK;
+ if (items != lim)
+ {
+ #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
+ if (g_LzFind_SaturSub)
+ g_LzFind_SaturSub(subValue, items, lim);
+ else
+ #endif
+ DEFAULT_SaturSub(subValue, items, lim);
+ }
+ items = lim;
+ }
+
+
+ for (; numItems != 0; numItems--)
+ {
+ UInt32 v;
+ SASUB_32(0);
+ items++;
+ }
}
+
+// call MatchFinder_CheckLimits() only after (p->pos++) update
+
MY_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p)
{
+ if (// !p->streamEndWasReached && p->result == SZ_OK &&
+ p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ {
+ // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p))
+ if (MatchFinder_NeedMove(p))
+ MatchFinder_MoveBlock(p);
+ MatchFinder_ReadBlock(p);
+ }
+
if (p->pos == kMaxValForNormalize)
- MatchFinder_Normalize(p);
- if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
- MatchFinder_CheckAndMoveAndRead(p);
+ if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data.
+ /*
+ if we disable normalization for last bytes of data, and
+ if (data_size == 4 GiB), we don't call wastfull normalization,
+ but (pos) will be wrapped over Zero (0) in that case.
+ And we cannot resume later to normal operation
+ */
+ {
+ // MatchFinder_Normalize(p);
+ /* after normalization we need (p->pos >= p->historySize + 1); */
+ /* we can reduce subValue to aligned value, if want to keep alignment
+ of (p->pos) and (p->buffer) for speculated accesses. */
+ const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+ // const UInt32 subValue = (1 << 15); // for debug
+ // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+ size_t numSonRefs = p->cyclicBufferSize;
+ if (p->btMode)
+ numSonRefs <<= 1;
+ Inline_MatchFinder_ReduceOffsets(p, subValue);
+ MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
+ }
+
if (p->cyclicBufferPos == p->cyclicBufferSize)
p->cyclicBufferPos = 0;
+
MatchFinder_SetLimits(p);
}
@@ -455,9 +779,9 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
(lenLimit > maxLen)
*/
MY_FORCE_INLINE
-static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, unsigned maxLen)
+static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, unsigned maxLen)
{
/*
son[_cyclicBufferPos] = curMatch;
@@ -465,7 +789,7 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
{
UInt32 delta = pos - curMatch;
if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- return distances;
+ return d;
{
const Byte *pb = cur - delta;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
@@ -478,10 +802,10 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
if (maxLen < len)
{
maxLen = len;
- *distances++ = len;
- *distances++ = delta - 1;
+ *d++ = len;
+ *d++ = delta - 1;
if (len == lenLimit)
- return distances;
+ return d;
}
}
}
@@ -490,9 +814,15 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
const Byte *lim = cur + lenLimit;
son[_cyclicBufferPos] = curMatch;
+
do
{
- UInt32 delta = pos - curMatch;
+ UInt32 delta;
+
+ if (curMatch == 0)
+ break;
+ // if (curMatch2 >= curMatch) return NULL;
+ delta = pos - curMatch;
if (delta >= _cyclicBufferSize)
break;
{
@@ -506,19 +836,19 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
{
if (++c == lim)
{
- distances[0] = (UInt32)(lim - cur);
- distances[1] = delta - 1;
- return distances + 2;
+ d[0] = (UInt32)(lim - cur);
+ d[1] = delta - 1;
+ return d + 2;
}
}
{
- unsigned len = (unsigned)(c - cur);
+ const unsigned len = (unsigned)(c - cur);
if (maxLen < len)
{
maxLen = len;
- distances[0] = (UInt32)len;
- distances[1] = delta - 1;
- distances += 2;
+ d[0] = (UInt32)len;
+ d[1] = delta - 1;
+ d += 2;
}
}
}
@@ -526,31 +856,36 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
}
while (--cutValue);
- return distances;
+ return d;
}
MY_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
- UInt32 *distances, UInt32 maxLen)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+ UInt32 *d, UInt32 maxLen)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return distances;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = pair[0];
+ const UInt32 pair0 = pair[0];
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
@@ -560,48 +895,60 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
if (maxLen < len)
{
maxLen = (UInt32)len;
- *distances++ = (UInt32)len;
- *distances++ = delta - 1;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
if (len == lenLimit)
{
*ptr1 = pair0;
*ptr0 = pair[1];
- return distances;
+ return d;
}
}
}
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ // const UInt32 curMatch2 = pair[1];
+ // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
+ // curMatch = curMatch2;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return d;
}
+
static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
unsigned len0 = 0, len1 = 0;
- for (;;)
+
+ UInt32 cmCheck;
+
+ cmCheck = (UInt32)(pos - _cyclicBufferSize);
+ if ((UInt32)pos <= _cyclicBufferSize)
+ cmCheck = 0;
+
+ if (// curMatch >= pos || // failure
+ cmCheck < curMatch)
+ do
{
- UInt32 delta = pos - curMatch;
- if (cutValue-- == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- return;
- }
+ const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
@@ -623,43 +970,62 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
if (pb[len] < cur[len])
{
*ptr1 = curMatch;
+ curMatch = pair[1];
ptr1 = pair + 1;
- curMatch = *ptr1;
len1 = len;
}
else
{
*ptr0 = curMatch;
+ curMatch = pair[0];
ptr0 = pair;
- curMatch = *ptr0;
len0 = len;
}
}
}
+ while(--cutValue && cmCheck < curMatch);
+
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ return;
}
+
#define MOVE_POS \
++p->cyclicBufferPos; \
p->buffer++; \
- if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+ { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
-#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+#define MOVE_POS_RET MOVE_POS return distances;
-static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+MY_NO_INLINE
+static void MatchFinder_MovePos(CMatchFinder *p)
+{
+ /* we go here at the end of stream data, when (avail < num_hash_bytes)
+ We don't update sons[cyclicBufferPos << btMode].
+ So (sons) record will contain junk. And we cannot resume match searching
+ to normal operation, even if we will provide more input data in buffer.
+ p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue
+ if (p->btMode)
+ p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
+ */
+ MOVE_POS;
+}
#define GET_MATCHES_HEADER2(minLen, ret_op) \
- unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+ unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer;
-#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
-#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
-#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
- offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
- distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET;
+ distances = func(MF_PARAMS(p), \
+ distances, (UInt32)_maxLen_); MOVE_POS_RET;
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@@ -667,42 +1033,45 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
#define GET_MATCHES_FOOTER_HC(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
-#define SKIP_FOOTER \
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
#define UPDATE_maxLen { \
- ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); }
-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
GET_MATCHES_FOOTER_BT(1)
}
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
GET_MATCHES_FOOTER_BT(2)
}
-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+
+#define SET_mmm \
+ mmm = p->cyclicBufferSize; \
+ if (pos < mmm) \
+ mmm = pos;
+
+
+static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, d2, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(3)
@@ -718,18 +1087,19 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
hash[h2] = pos;
(hash + kFix3HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 2;
- offset = 0;
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
UPDATE_maxLen
distances[0] = (UInt32)maxLen;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
}
@@ -738,10 +1108,11 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
-static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -758,43 +1129,44 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 3;
- offset = 0;
for (;;)
{
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
- // distances[0] = 3;
+ // distances[-2] = 3;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
- distances[2 + 1] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
}
else
break;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
- offset = 2;
+ distances += 2;
}
else
break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET
}
break;
@@ -804,9 +1176,10 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
-static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, d2, d3, maxLen, offset, pos;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -826,45 +1199,46 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 4;
- offset = 0;
for (;;)
{
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
else
break;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
- offset = 2;
+ distances += 2;
d2 = d3;
}
else
break;
- distances[(size_t)offset - 2] = 3;
+ distances[-2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+ SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET;
}
break;
@@ -874,10 +1248,11 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
+ UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
- unsigned maxLen, offset;
+ unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(4)
@@ -894,40 +1269,41 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 3;
- offset = 0;
for (;;)
{
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
- // distances[0] = 3;
+ // distances[-2] = 3;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
- distances[2 + 1] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
}
else
break;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
- offset = 2;
+ distances += 2;
}
else
break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = (UInt32)maxLen;
+ distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
@@ -940,9 +1316,10 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
-static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- UInt32 h2, h3, d2, d3, maxLen, offset, pos;
+ UInt32 mmm;
+ UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -962,42 +1339,43 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
+ SET_mmm
+
maxLen = 4;
- offset = 0;
for (;;)
{
- if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+ if (d2 < mmm && *(cur - d2) == *cur)
{
distances[0] = 2;
distances[1] = d2 - 1;
- offset = 2;
+ distances += 2;
if (*(cur - d2 + 2) == cur[2])
{
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
- distances[3] = d3 - 1;
- offset = 4;
+ distances[1] = d3 - 1;
+ distances += 2;
d2 = d3;
}
else
break;
}
- else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+ else if (d3 < mmm && *(cur - d3) == *cur)
{
distances[1] = d3 - 1;
- offset = 2;
+ distances += 2;
d2 = d3;
}
else
break;
- distances[(size_t)offset - 2] = 3;
+ distances[-2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
- distances[(size_t)offset - 2] = maxLen;
+ distances[-2] = maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
@@ -1010,86 +1388,75 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
- unsigned offset;
GET_MATCHES_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- offset = 0;
GET_MATCHES_FOOTER_HC(2)
}
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(2)
{
- SKIP_HEADER(2)
HASH2_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
- SKIP_HEADER(3)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(3)
{
UInt32 h2;
UInt32 *hash;
- SKIP_HEADER(3)
HASH3_CALC;
hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv];
hash[h2] =
(hash + kFix3HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(4)
{
UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
+ SKIP_HEADER(5)
{
UInt32 h2, h3;
UInt32 *hash;
- SKIP_HEADER(5)
HASH5_CALC;
hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv];
@@ -1097,66 +1464,84 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
(hash + kFix3HashSize)[h3] =
// (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
- SKIP_FOOTER
}
- while (--num != 0);
+ SKIP_FOOTER
}
+
+#define HC_SKIP_HEADER(minLen) \
+ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+ Byte *cur; \
+ UInt32 *hash; \
+ UInt32 *son; \
+ UInt32 pos = p->pos; \
+ UInt32 num2 = num; \
+ /* (p->pos == p->posLimit) is not allowed here !!! */ \
+ { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+ num -= num2; \
+ { const UInt32 cycPos = p->cyclicBufferPos; \
+ son = p->son + cycPos; \
+ p->cyclicBufferPos = cycPos + num2; } \
+ cur = p->buffer; \
+ hash = p->hash; \
+ do { \
+ UInt32 curMatch; \
+ UInt32 hv;
+
+
+#define HC_SKIP_FOOTER \
+ cur++; pos++; *son++ = curMatch; \
+ } while (--num2); \
+ p->buffer = cur; \
+ p->pos = pos; \
+ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \
+ }} while(num); \
+
+
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
+ HC_SKIP_HEADER(4)
+
UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(4)
HASH4_CALC;
- hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
- (hash + kFix4HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ (hash + kFix4HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
+
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
+ HC_SKIP_HEADER(5)
+
UInt32 h2, h3;
- UInt32 *hash;
- SKIP_HEADER(5)
- HASH5_CALC;
- hash = p->hash;
+ HASH5_CALC
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
// (hash + kFix4HashSize)[h4] =
- (hash + kFix5HashSize)[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ (hash + kFix5HashSize)[hv] = pos;
+
+ HC_SKIP_FOOTER
}
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
- do
- {
- SKIP_HEADER(3)
+ HC_SKIP_HEADER(3)
+
HASH_ZIP_CALC;
- curMatch = p->hash[hv];
- p->hash[hv] = p->pos;
- p->son[p->cyclicBufferPos] = curMatch;
- MOVE_POS
- }
- while (--num != 0);
+ curMatch = hash[hv];
+ hash[hv] = pos;
+
+ HC_SKIP_FOOTER
}
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
@@ -1195,3 +1580,42 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
}
}
+
+
+
+void LzFindPrepare()
+{
+ #ifndef FORCE_SATUR_SUB_128
+ #ifdef USE_SATUR_SUB_128
+ LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+ #ifdef MY_CPU_ARM_OR_ARM64
+ {
+ if (CPU_IsSupported_NEON())
+ {
+ #pragma message ("=== LzFind NEON")
+ _PRF(printf("\n=== LzFind NEON\n"));
+ f = LzFind_SaturSub_128;
+ }
+ // f = 0; // for debug
+ }
+ #else // MY_CPU_ARM_OR_ARM64
+ if (CPU_IsSupported_SSE41())
+ {
+ #pragma message ("=== LzFind SSE41")
+ _PRF(printf("\n=== LzFind SSE41\n"));
+ f = LzFind_SaturSub_128;
+
+ #ifdef USE_AVX2
+ if (CPU_IsSupported_AVX2())
+ {
+ #pragma message ("=== LzFind AVX2")
+ _PRF(printf("\n=== LzFind AVX2\n"));
+ f = LzFind_SaturSub_256;
+ }
+ #endif
+ }
+ #endif // MY_CPU_ARM_OR_ARM64
+ g_LzFind_SaturSub = f;
+ #endif // USE_SATUR_SUB_128
+ #endif // FORCE_SATUR_SUB_128
+}
diff --git a/C/LzFind.h b/C/LzFind.h
index c613c739..eea873ff 100644
--- a/C/LzFind.h
+++ b/C/LzFind.h
@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
-2021-02-09 : Igor Pavlov : Public domain */
+2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@@ -15,7 +15,7 @@ typedef struct _CMatchFinder
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
- UInt32 streamPos;
+ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
@@ -51,17 +51,19 @@ typedef struct _CMatchFinder
UInt64 expectedDataSize;
} CMatchFinder;
-#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
-#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
+/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
+*/
int MatchFinder_NeedMove(CMatchFinder *p);
-// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
@@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+/*
+#define Inline_MatchFinder_InitPos(p, val) \
+ (p)->pos = (val); \
+ (p)->streamPos = (val);
+*/
+
+#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
+ (p)->pos -= (subValue); \
+ (p)->streamPos -= (subValue);
+
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@@ -91,7 +104,7 @@ Conditions:
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
-typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
@@ -101,21 +114,23 @@ typedef struct _IMatchFinder
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
-} IMatchFinder;
+} IMatchFinder2;
-void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
-void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void LzFindPrepare(void);
+
EXTERN_C_END
#endif
diff --git a/C/LzFindMt.c b/C/LzFindMt.c
index 49369ec0..da339ebf 100644
--- a/C/LzFindMt.c
+++ b/C/LzFindMt.c
@@ -1,8 +1,10 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
-2021-04-01 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
+// #include <stdio.h>
+
#include "CpuArch.h"
#include "LzHash.h"
@@ -10,22 +12,34 @@
// #define LOG_ITERS
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+#define PRF(x)
+#endif
+
#ifdef LOG_ITERS
#include <stdio.h>
-static UInt64 g_NumIters_Tree;
-static UInt64 g_NumIters_Loop;
+extern UInt64 g_NumIters_Tree;
+extern UInt64 g_NumIters_Loop;
+extern UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
-#define kMtHashBlockSize (1 << 17)
+#define kMtHashBlockSize ((UInt32)1 << 17)
#define kMtHashNumBlocks (1 << 1)
-#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
-#define kMtBtBlockSize (1 << 16)
+#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
+
+#define kMtBtBlockSize ((UInt32)1 << 16)
#define kMtBtNumBlocks (1 << 4)
-#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
+
+#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
/*
HASH functions:
@@ -36,11 +50,17 @@ static UInt64 g_NumIters_Loop;
(crc[0...0xFF] & 0xFF) <-> [0...0xFF]
*/
+#define MF(mt) ((mt)->MatchFinder)
+#define MF_CRC (p->crc)
+
+// #define MF(mt) (&(mt)->MatchFinder)
+// #define MF_CRC (p->MatchFinder.crc)
+
#define MT_HASH2_CALC \
- h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+ h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
- UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+ UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
@@ -59,93 +79,137 @@ static UInt64 g_NumIters_Loop;
*/
+MY_NO_INLINE
static void MtSync_Construct(CMtSync *p)
{
+ p->affinity = 0;
p->wasCreated = False;
p->csWasInitialized = False;
p->csWasEntered = False;
Thread_Construct(&p->thread);
Event_Construct(&p->canStart);
- Event_Construct(&p->wasStarted);
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
- p->affinity = 0;
}
+#define DEBUG_BUFFER_LOCK // define it to debug lock state
+
+#ifdef DEBUG_BUFFER_LOCK
+#include <stdlib.h>
+#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1);
+#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1);
+#else
+#define BUFFER_MUST_BE_LOCKED(p)
+#define BUFFER_MUST_BE_UNLOCKED(p)
+#endif
+
+#define LOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_UNLOCKED(p); \
+ CriticalSection_Enter(&(p)->cs); \
+ (p)->csWasEntered = True; }
+
+#define UNLOCK_BUFFER(p) { \
+ BUFFER_MUST_BE_LOCKED(p); \
+ CriticalSection_Leave(&(p)->cs); \
+ (p)->csWasEntered = False; }
+
+
MY_NO_INLINE
-static void MtSync_GetNextBlock(CMtSync *p)
+static UInt32 MtSync_GetNextBlock(CMtSync *p)
{
+ UInt32 numBlocks = 0;
if (p->needStart)
{
+ BUFFER_MUST_BE_UNLOCKED(p)
p->numProcessedBlocks = 1;
p->needStart = False;
p->stopWriting = False;
p->exit = False;
- Event_Reset(&p->wasStarted);
Event_Reset(&p->wasStopped);
-
Event_Set(&p->canStart);
- Event_Wait(&p->wasStarted);
-
- // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder);
}
else
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
- p->numProcessedBlocks++;
+ UNLOCK_BUFFER(p)
+ // we free current block
+ numBlocks = p->numProcessedBlocks++;
Semaphore_Release1(&p->freeSemaphore);
}
+
+ // buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore);
- CriticalSection_Enter(&p->cs);
- p->csWasEntered = True;
+ LOCK_BUFFER(p);
+ return numBlocks;
}
-/* MtSync_StopWriting must be called if Writing was started */
+/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
+
+MY_NO_INLINE
static void MtSync_StopWriting(CMtSync *p)
{
- UInt32 myNumBlocks = p->numProcessedBlocks;
if (!Thread_WasCreated(&p->thread) || p->needStart)
return;
- p->stopWriting = True;
+
+ PRF(printf("\nMtSync_StopWriting %p\n", p));
+
if (p->csWasEntered)
{
- CriticalSection_Leave(&p->cs);
- p->csWasEntered = False;
+ /* we don't use buffer in this thread after StopWriting().
+ So we UNLOCK buffer.
+ And we restore default UNLOCKED state for stopped thread */
+ UNLOCK_BUFFER(p)
}
- Semaphore_Release1(&p->freeSemaphore);
-
+
+ /* We send (p->stopWriting) message and release freeSemaphore
+ to free current block.
+ So the thread will see (p->stopWriting) at some
+ iteration after Wait(freeSemaphore).
+ The thread doesn't need to fill all avail free blocks,
+ so we can get fast thread stop.
+ */
+
+ p->stopWriting = True;
+ Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
+
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
Event_Wait(&p->wasStopped);
+ PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
+
+ /* 21.03 : we don't restore samaphore counters here.
+ We will recreate and reinit samaphores in next start */
- while (myNumBlocks++ != p->numProcessedBlocks)
- {
- Semaphore_Wait(&p->filledSemaphore);
- Semaphore_Release1(&p->freeSemaphore);
- }
p->needStart = True;
}
+
+MY_NO_INLINE
static void MtSync_Destruct(CMtSync *p)
{
+ PRF(printf("\nMtSync_Destruct %p\n", p));
+
if (Thread_WasCreated(&p->thread))
{
+ /* we want thread to be in Stopped state before sending EXIT command.
+ note: stop(btSync) will stop (htSync) also */
MtSync_StopWriting(p);
+ /* thread in Stopped state here : (p->needStart == true) */
p->exit = True;
- if (p->needStart)
- Event_Set(&p->canStart);
- Thread_Wait_Close(&p->thread);
+ // if (p->needStart) // it's (true)
+ Event_Set(&p->canStart); // we send EXIT command to thread
+ Thread_Wait_Close(&p->thread); // we wait thread finishing
}
+
if (p->csWasInitialized)
{
CriticalSection_Delete(&p->cs);
p->csWasInitialized = False;
}
+ p->csWasEntered = False;
Event_Close(&p->canStart);
- Event_Close(&p->wasStarted);
Event_Close(&p->wasStopped);
Semaphore_Close(&p->freeSemaphore);
Semaphore_Close(&p->filledSemaphore);
@@ -153,48 +217,75 @@ static void MtSync_Destruct(CMtSync *p)
p->wasCreated = False;
}
-#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
-static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
+// we want to get real system error codes here instead of SZ_ERROR_THREAD
+#define RINOK_THREAD(x) RINOK(x)
+
+
+// call it before each new file (when new starting is required):
+MY_NO_INLINE
+static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
+{
+ WRes wres;
+ // BUFFER_MUST_BE_UNLOCKED(p)
+ if (!p->needStart || p->csWasEntered)
+ return SZ_ERROR_FAIL;
+ wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
+ if (wres == 0)
+ wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
+}
+
+
+static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
WRes wres;
+
if (p->wasCreated)
return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs));
p->csWasInitialized = True;
+ p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
- RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted));
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
-
- RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks));
- RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks));
p->needStart = True;
+ p->exit = True; /* p->exit is unused before (canStart) Event.
+ But in case of some unexpected code failure we will get fast exit from thread */
+
+ // return ERROR_TOO_MANY_POSTS; // for debug
+ // return EINVAL; // for debug
if (p->affinity != 0)
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
else
wres = Thread_Create(&p->thread, startAddress, obj);
+
RINOK_THREAD(wres);
p->wasCreated = True;
return SZ_OK;
}
-static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
+
+MY_NO_INLINE
+static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{
- SRes res = MtSync_Create2(p, startAddress, obj, numBlocks);
- if (res != SZ_OK)
- MtSync_Destruct(p);
- return res;
+ const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
+ if (wres == 0)
+ return 0;
+ MtSync_Destruct(p);
+ return MY_SRes_HRESULT_FROM_WRes(wres);
}
-// static void MtSync_Init(CMtSync *p) { p->needStart = True; }
-#define kMtMaxValForNormalize 0xFFFFFFFF
-// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20))
+// ---------- HASH THREAD ----------
+#define kMtMaxValForNormalize 0xFFFFFFFF
+// #define kMtMaxValForNormalize ((1 << 21)) // for debug
+// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
#ifdef MY_CPU_LE_UNALIGN
#define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
@@ -349,27 +440,28 @@ DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from
static void HashThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->hashSync;
+ PRF(printf("\nHashThreadFunc\n"));
+
for (;;)
{
- UInt32 numProcessedBlocks = 0;
+ UInt32 blockIndex = 0;
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+ PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
+ if (p->exit)
+ {
+ PRF(printf("\nHashThreadFunc : exit \n"));
+ return;
+ }
- MatchFinder_Init_HighHash(mt->MatchFinder);
+ MatchFinder_Init_HighHash(MF(mt));
for (;;)
{
- if (p->exit)
- return;
- if (p->stopWriting)
- {
- p->numProcessedBlocks = numProcessedBlocks;
- Event_Set(&p->wasStopped);
- break;
- }
+ PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
{
- CMatchFinder *mf = mt->MatchFinder;
+ CMatchFinder *mf = MF(mt);
if (MatchFinder_NeedMove(mf))
{
CriticalSection_Enter(&mt->btSync.cs);
@@ -382,196 +474,178 @@ static void HashThreadFunc(CMatchFinderMt *mt)
mt->pointerToCurPos -= offset;
mt->buffer -= offset;
}
- CriticalSection_Leave(&mt->btSync.cs);
CriticalSection_Leave(&mt->hashSync.cs);
+ CriticalSection_Leave(&mt->btSync.cs);
continue;
}
Semaphore_Wait(&p->freeSemaphore);
+ if (p->exit) // exit is unexpected here. But we check it here for some failure case
+ return;
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
+ if (p->stopWriting)
+ break;
+
MatchFinder_ReadIfRequired(mf);
- if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize))
{
- UInt32 subValue = (mf->pos - mf->historySize - 1);
- MatchFinder_ReduceOffsets(mf, subValue);
- MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
- }
- {
- UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- UInt32 num = mf->streamPos - mf->pos;
+ UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
+ UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
heads[0] = 2;
heads[1] = num;
+
+ /* heads[1] contains the number of avail bytes:
+ if (avail < mf->numHashBytes) :
+ {
+ it means that stream was finished
+ HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
+ HASH_THREAD doesn't stop,
+ HASH_THREAD fills only the header (2 numbers) for all next blocks:
+ {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
+ }
+ else
+ {
+ HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
+ }
+ */
+
if (num >= mf->numHashBytes)
{
num = num - mf->numHashBytes + 1;
if (num > kMtHashBlockSize - 2)
num = kMtHashBlockSize - 2;
- mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
+
+ if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
+ {
+ const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ Inline_MatchFinder_ReduceOffsets(mf, subValue);
+ MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
+ }
+
heads[0] = 2 + num;
+ mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
}
- mf->pos += num;
+
+ mf->pos += num; // wrap over zero is allowed at the end of stream
mf->buffer += num;
}
}
Semaphore_Release1(&p->filledSemaphore);
- }
- }
-}
+ } // for() processing end
-static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
-{
- MtSync_GetNextBlock(&p->hashSync);
- p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize;
- p->hashBufPosLimit += p->hashBuf[p->hashBufPos++];
- p->hashNumAvail = p->hashBuf[p->hashBufPos++];
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
+ } // for() thread end
}
-#define kEmptyHashValue 0
+
+
+
+// ---------- BT THREAD ----------
+
+/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
+ here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
+#define CYC_TO_POS_OFFSET 0
+// #define CYC_TO_POS_OFFSET 1 // for debug
#define MFMT_GM_INLINE
#ifdef MFMT_GM_INLINE
/*
- we use size_t for _cyclicBufferPos instead of UInt32
+ we use size_t for (pos) instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
-MY_NO_INLINE
-static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
- size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
- UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
-{
- do
- {
- UInt32 *_distances = ++d;
- UInt32 delta = *hash++;
- CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
- CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
- unsigned len0 = 0, len1 = 0;
- UInt32 cutValue = _cutValue;
- unsigned maxLen = (unsigned)_maxLen;
-
- /*
- #define PREF_STEP 1
- if (size > PREF_STEP)
- {
- UInt32 delta = hash[PREF_STEP - 1];
- if (delta < _cyclicBufferSize)
- {
- size_t cyc1 = _cyclicBufferPos + PREF_STEP;
- CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
- Byte b = *(cur + PREF_STEP - delta);
- _distances[0] = pair[0];
- _distances[1] = b;
- }
- }
- */
- if (cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- }
- else
- for (LOG_ITER(g_NumIters_Tree++);;)
- {
- LOG_ITER(g_NumIters_Loop++);
- {
- CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
- const Byte *pb = cur - delta;
- unsigned len = (len0 < len1 ? len0 : len1);
- UInt32 pair0 = *pair;
- if (pb[len] == cur[len])
- {
- if (++len != lenLimit && pb[len] == cur[len])
- while (++len != lenLimit)
- if (pb[len] != cur[len])
- break;
- if (maxLen < len)
- {
- maxLen = len;
- *d++ = (UInt32)len;
- *d++ = delta - 1;
- if (len == lenLimit)
- {
- UInt32 pair1 = pair[1];
- *ptr1 = pair0;
- *ptr0 = pair1;
- break;
- }
- }
- }
- {
- UInt32 curMatch = pos - delta;
- // delta = pos - *pair;
- // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31];
- if (pb[len] < cur[len])
- {
- delta = pos - pair[1];
- *ptr1 = curMatch;
- ptr1 = pair + 1;
- len1 = len;
- }
- else
- {
- delta = pos - *pair;
- *ptr0 = curMatch;
- ptr0 = pair;
- len0 = len;
- }
- }
- }
- if (--cutValue == 0 || delta >= _cyclicBufferSize)
- {
- *ptr0 = *ptr1 = kEmptyHashValue;
- break;
- }
- }
- pos++;
- _cyclicBufferPos++;
- cur++;
- {
- UInt32 num = (UInt32)(d - _distances);
- _distances[-1] = num;
- }
- }
- while (d < limit && --size != 0);
- *posRes = pos;
- return d;
-}
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
#endif
-
static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
- UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
+ /* GetMatchesSpec() functions don't create (len = 1)
+ in [len, dist] match pairs, if (p->numHashBytes >= 2)
+ Also we suppose here that (matchMaxLen >= 2).
+ So the following code for (reserve) is not required
+ UInt32 reserve = (p->matchMaxLen * 2);
+ const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
+ if (reserve < kNumHashBytes_Max - 1)
+ reserve = kNumHashBytes_Max - 1;
+ const UInt32 limit = kMtBtBlockSize - (reserve);
+ */
+
+ const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
+
d[1] = p->hashNumAvail;
+
+ if (p->failure_BT)
+ {
+ // printf("\n == 1 BtGetMatches() p->failure_BT\n");
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
- MatchFinderMt_GetNextBlock_Hash(p);
- d[1] = numProcessed + p->hashNumAvail;
- if (p->hashNumAvail >= p->numHashBytes)
+ // MatchFinderMt_GetNextBlock_Hash(p);
+ UInt32 avail;
+ {
+ const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
+ const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
+ const UInt32 *h = p->hashBuf + k;
+ avail = h[1];
+ p->hashBufPosLimit = k + h[0];
+ p->hashNumAvail = avail;
+ p->hashBufPos = k + 2;
+ }
+
+ {
+ /* we must prevent UInt32 overflow for avail total value,
+ if avail was increased with new hash block */
+ UInt32 availSum = numProcessed + avail;
+ if (availSum < numProcessed)
+ availSum = (UInt32)(Int32)-1;
+ d[1] = availSum;
+ }
+
+ if (avail >= p->numHashBytes)
continue;
- d[0] = curPos + p->hashNumAvail;
+
+ // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
+
+ /* (avail < p->numHashBytes)
+ It means that stream was finished.
+ And (avail) - is a number of remaining bytes,
+ we fill (d) for (avail) bytes for LZ_THREAD (receiver).
+ but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
+
+ /* here we suppose that we have space enough:
+ (kMtBtBlockSize - curPos >= p->hashNumAvail) */
+ p->hashNumAvail = 0;
+ d[0] = curPos + avail;
d += curPos;
- for (; p->hashNumAvail != 0; p->hashNumAvail--)
+ for (; avail != 0; avail--)
*d++ = 0;
return;
}
{
UInt32 size = p->hashBufPosLimit - p->hashBufPos;
- UInt32 lenLimit = p->matchMaxLen;
UInt32 pos = p->pos;
UInt32 cyclicBufferPos = p->cyclicBufferPos;
+ UInt32 lenLimit = p->matchMaxLen;
if (lenLimit >= p->hashNumAvail)
lenLimit = p->hashNumAvail;
{
@@ -583,6 +657,14 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
size = size2;
}
+ if (pos > (UInt32)kMtMaxValForNormalize - size)
+ {
+ const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
+ pos -= subValue;
+ p->pos = pos;
+ MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
+ }
+
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
@@ -598,21 +680,45 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
}
#else
{
- UInt32 posRes;
- curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
- d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
- d + limit,
- size, &posRes) - d);
- p->hashBufPos += posRes - pos;
- cyclicBufferPos += posRes - pos;
- p->buffer += posRes - pos;
- pos = posRes;
+ UInt32 posRes = pos;
+ const UInt32 *d_end;
+ {
+ d_end = GetMatchesSpecN_2(
+ p->buffer + lenLimit - 1,
+ pos, p->buffer, p->son, p->cutValue, d + curPos,
+ p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
+ d + limit, p->hashBuf + p->hashBufPos + size,
+ cyclicBufferPos, p->cyclicBufferSize,
+ &posRes);
+ }
+ {
+ if (!d_end)
+ {
+ // printf("\n == 2 BtGetMatches() p->failure_BT\n");
+ // internal data failure
+ p->failure_BT = True;
+ d[0] = 0;
+ // d[1] = 0;
+ return;
+ }
+ }
+ curPos = (UInt32)(d_end - d);
+ {
+ const UInt32 processed = posRes - pos;
+ pos = posRes;
+ p->hashBufPos += processed;
+ cyclicBufferPos += processed;
+ p->buffer += processed;
+ }
}
#endif
- numProcessed += pos - p->pos;
- p->hashNumAvail -= pos - p->pos;
- p->pos = pos;
+ {
+ const UInt32 processed = pos - p->pos;
+ numProcessed += processed;
+ p->hashNumAvail -= processed;
+ p->pos = pos;
+ }
if (cyclicBufferPos == p->cyclicBufferSize)
cyclicBufferPos = 0;
p->cyclicBufferPos = cyclicBufferPos;
@@ -622,31 +728,28 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
d[0] = curPos;
}
+
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
{
CMtSync *sync = &p->hashSync;
+
+ BUFFER_MUST_BE_UNLOCKED(sync)
+
if (!sync->needStart)
{
- CriticalSection_Enter(&sync->cs);
- sync->csWasEntered = True;
+ LOCK_BUFFER(sync)
}
- BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize);
-
- if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize)
- {
- UInt32 subValue = p->pos - p->cyclicBufferSize;
- MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
- p->pos -= subValue;
- }
+ BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
+
+ /* We suppose that we have called GetNextBlock() from start.
+ So buffer is LOCKED */
- if (!sync->needStart)
- {
- CriticalSection_Leave(&sync->cs);
- sync->csWasEntered = False;
- }
+ UNLOCK_BUFFER(sync)
}
+
+MY_NO_INLINE
static void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
@@ -654,25 +757,35 @@ static void BtThreadFunc(CMatchFinderMt *mt)
{
UInt32 blockIndex = 0;
Event_Wait(&p->canStart);
- Event_Set(&p->wasStarted);
+
for (;;)
{
+ PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos));
+ /* (p->exit == true) is possible after (p->canStart) at first loop iteration
+ and is unexpected after more Wait(freeSemaphore) iterations */
if (p->exit)
return;
+
+ Semaphore_Wait(&p->freeSemaphore);
+
+ // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
if (p->stopWriting)
- {
- p->numProcessedBlocks = blockIndex;
- MtSync_StopWriting(&mt->hashSync);
- Event_Set(&p->wasStopped);
break;
- }
- Semaphore_Wait(&p->freeSemaphore);
+
BtFillBlock(mt, blockIndex++);
+
Semaphore_Release1(&p->filledSemaphore);
}
+
+ // we stop HASH_THREAD here
+ MtSync_StopWriting(&mt->hashSync);
+
+ // p->numBlocks_Sent = blockIndex;
+ Event_Set(&p->wasStopped);
}
}
+
void MatchFinderMt_Construct(CMatchFinderMt *p)
{
p->hashBuf = NULL;
@@ -688,22 +801,37 @@ static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
- MtSync_Destruct(&p->hashSync);
+ /*
+ HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
+ So we must be sure that HASH_THREAD will not use CriticalSection(s)
+ after deleting CriticalSection here.
+
+ we call ReleaseStream(p)
+ that calls StopWriting(btSync)
+ that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
+ after StopWriting() it's safe to destruct MtSync(s) in any order */
+
+ MatchFinderMt_ReleaseStream(p);
+
MtSync_Destruct(&p->btSync);
+ MtSync_Destruct(&p->hashSync);
LOG_ITER(
- printf("\nTree %9d * %7d iter = %9d sum \n",
+ printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n",
(UInt32)(g_NumIters_Tree / 1000),
(UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
- (UInt32)(g_NumIters_Loop / 1000)
+ (UInt32)(g_NumIters_Loop / 1000),
+ (UInt32)(g_NumIters_Bytes / 1000)
));
MatchFinderMt_FreeMem(p, alloc);
}
+
#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
+
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; }
static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
{
@@ -716,16 +844,17 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p)
return 0;
}
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->historySize = historySize;
if (kMtBtBlockSize <= matchMaxLen * 4)
return SZ_ERROR_PARAM;
if (!p->hashBuf)
{
- p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32));
+ p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
if (!p->hashBuf)
return SZ_ERROR_MEM;
p->btBuf = p->hashBuf + kHashBufferSize;
@@ -735,101 +864,163 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM;
- RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks));
- RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks));
+ RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));
+ RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));
return SZ_OK;
}
-/* Call it after ReleaseStream / SetStream */
+
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
+{
+ RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));
+ return MtSync_Init(&p->btSync, kMtBtNumBlocks);
+}
+
+
static void MatchFinderMt_Init(CMatchFinderMt *p)
{
- CMatchFinder *mf = p->MatchFinder;
+ CMatchFinder *mf = MF(p);
p->btBufPos =
- p->btBufPosLimit = 0;
+ p->btBufPosLimit = NULL;
p->hashBufPos =
p->hashBufPosLimit = 0;
+ p->hashNumAvail = 0; // 21.03
+
+ p->failure_BT = False;
/* Init without data reading. We don't want to read data in this thread */
- MatchFinder_Init_3(mf, False);
+ MatchFinder_Init_4(mf);
+
MatchFinder_Init_LowHash(mf);
p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
p->btNumAvailBytes = 0;
- p->lzPos = p->historySize + 1;
+ p->failure_LZ_BT = False;
+ // p->failure_LZ_LZ = False;
+
+ p->lzPos =
+ 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
// p->hash4Mask = mf->hash4Mask;
p->crc = mf->crc;
+ // memcpy(p->crc, mf->crc, sizeof(mf->crc));
p->son = mf->son;
p->matchMaxLen = mf->matchMaxLen;
p->numHashBytes = mf->numHashBytes;
- p->pos = mf->pos;
- p->buffer = mf->buffer;
- p->cyclicBufferPos = mf->cyclicBufferPos;
+
+ /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
+ // mf->streamPos = mf->pos = 1; // optimal smallest value
+ // 0; // for debug: ignores match to start
+ // kNormalizeAlign; // for debug
+
+ /* we must init (p->pos = mf->pos) for BT, because
+ BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
+ p->pos = mf->pos; // do not change it
+
+ p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
p->cyclicBufferSize = mf->cyclicBufferSize;
+ p->buffer = mf->buffer;
p->cutValue = mf->cutValue;
+ // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
}
+
/* ReleaseStream is required to finish multithreading */
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
{
+ // Sleep(1); // for debug
MtSync_StopWriting(&p->btSync);
+ // Sleep(200); // for debug
/* p->MatchFinder->ReleaseStream(); */
}
MY_NO_INLINE
-static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
+static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
- UInt32 blockIndex, k;
-
- MtSync_GetNextBlock(&p->btSync);
-
- blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
- k = blockIndex * kMtBtBlockSize;
- p->btBufPosLimit = k + p->btBuf[k];
- p->btNumAvailBytes = p->btBuf[k + 1];
- p->btBufPos = k + 2;
- if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
+ if (p->failure_LZ_BT)
+ p->btBufPos = p->failureBuf;
+ else
{
- MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
- p->lzPos = p->historySize + 1;
+ const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
+ const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
+ {
+ const UInt32 numItems = bt[0];
+ p->btBufPosLimit = bt + numItems;
+ p->btNumAvailBytes = bt[1];
+ p->btBufPos = bt + 2;
+ if (numItems < 2 || numItems > kMtBtBlockSize)
+ {
+ p->failureBuf[0] = 0;
+ p->btBufPos = p->failureBuf;
+ p->btBufPosLimit = p->failureBuf + 1;
+ p->failure_LZ_BT = True;
+ // p->btNumAvailBytes = 0;
+ /* we don't want to decrease AvailBytes, that was load before.
+ that can be unxepected for the code that have loaded anopther value before */
+ }
+ }
+
+ if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
+ {
+ /* we don't check (lzPos) over exact avail bytes in (btBuf).
+ (fixedHashSize) is small, so normalization is fast */
+ const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
+ p->lzPos -= subValue;
+ MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
+ }
}
+ return p->btNumAvailBytes;
}
+
+
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
{
return p->pointerToCurPos;
}
+
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
+
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
{
- GET_NEXT_BLOCK_IF_REQUIRED;
- return p->btNumAvailBytes;
+ if (p->btBufPos != p->btBufPosLimit)
+ return p->btNumAvailBytes;
+ return MatchFinderMt_GetNextBlock_Bt(p);
}
+
+// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; }
+#define CHECK_FAILURE_LZ(_match_, _pos_)
+
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, c2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 m = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH2_CALC
c2 = hash[h2];
hash[h2] = m;
if (c2 >= matchMinPos)
+ {
+ CHECK_FAILURE_LZ(c2, m)
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
*d++ = 2;
*d++ = m - c2 - 1;
}
+ }
return d;
}
@@ -839,7 +1030,7 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 m = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH3_CALC
c2 = hash[h2];
@@ -848,22 +1039,30 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
- if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
+ if (c2 >= matchMinPos)
{
- d[1] = m - c2 - 1;
- if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ CHECK_FAILURE_LZ(c2, m)
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
- d[0] = 3;
- return d + 2;
+ d[1] = m - c2 - 1;
+ if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
+ {
+ d[0] = 3;
+ return d + 2;
+ }
+ d[0] = 2;
+ d += 2;
}
- d[0] = 2;
- d += 2;
}
- if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ if (c3 >= matchMinPos)
{
- *d++ = 3;
- *d++ = m - c3 - 1;
+ CHECK_FAILURE_LZ(c3, m)
+ if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
+ {
+ *d++ = 3;
+ *d++ = m - c3 - 1;
+ }
}
return d;
@@ -874,30 +1073,37 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
/*
static
-UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
+UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
- UInt32 pos = p->btBufPos;
- const UInt32 *bt = p->btBuf + pos;
- UInt32 len = *bt++;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
UInt32 matchMinPos;
- const UInt32 *d_base = d;
UInt32 avail = p->btNumAvailBytes - 1;
- p->btBufPos = pos + 1 + len;
+ p->btBufPos = btLim;
{
- UInt32 temp1 = p->historySize;
p->btNumAvailBytes = avail;
#define BT_HASH_BYTES_MAX 5
+
+ matchMinPos = p->lzPos;
if (len != 0)
- temp1 = bt[1];
- else if (avail < (BT_HASH_BYTES_MAX - 2))
+ matchMinPos -= bt[1];
+ else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
{
INCREASE_LZ_POS
- return 0;
+ return d;
+ }
+ else
+ {
+ const UInt32 hs = p->historySize;
+ if (matchMinPos > hs)
+ matchMinPos -= hs;
+ else
+ matchMinPos = 1;
}
- matchMinPos = p->lzPos - temp1;
}
for (;;)
@@ -942,17 +1148,17 @@ UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
do
{
- UInt32 v0 = bt[0];
- UInt32 v1 = bt[1];
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
- while ((len -= 2) != 0);
+ while (bt != btLim);
}
INCREASE_LZ_POS
- return (UInt32)(d - d_base);
+ return d;
}
*/
@@ -962,7 +1168,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
- UInt32 m = p->lzPos;
+ const UInt32 m = p->lzPos;
MT_HASH3_CALC
// MT_HASH4_CALC
c2 = hash[h2];
@@ -1038,43 +1244,49 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
}
-static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
+static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
- const UInt32 *bt = p->btBuf + p->btBufPos;
- UInt32 len = *bt++;
- p->btBufPos += 1 + len;
+ const UInt32 *bt = p->btBufPos;
+ const UInt32 len = *bt++;
+ const UInt32 *btLim = bt + len;
+ p->btBufPos = btLim;
p->btNumAvailBytes--;
+ INCREASE_LZ_POS
{
- UInt32 i;
- for (i = 0; i < len; i += 2)
+ while (bt != btLim)
{
- UInt32 v0 = bt[0];
- UInt32 v1 = bt[1];
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
}
- INCREASE_LZ_POS
- return len;
+ return d;
}
-static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
+static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
- UInt32 pos = p->btBufPos;
- const UInt32 *bt = p->btBuf + pos;
+ const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++;
- UInt32 avail = p->btNumAvailBytes - 1;
+ const UInt32 avail = p->btNumAvailBytes - 1;
p->btNumAvailBytes = avail;
- p->btBufPos = pos + 1 + len;
+ p->btBufPos = bt + len;
if (len == 0)
{
#define BT_HASH_BYTES_MAX 5
if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
- len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d);
+ {
+ UInt32 m = p->lzPos;
+ if (m > p->historySize)
+ m -= p->historySize;
+ else
+ m = 1;
+ d = p->MixMatchesFunc(p, m, d);
+ }
}
else
{
@@ -1083,27 +1295,26 @@ static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
(match_len >= numHashBytes).
MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
*/
- UInt32 *d2;
- d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
+ d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
+ // if (d) // check for failure
do
{
- UInt32 v0 = bt[0];
- UInt32 v1 = bt[1];
+ const UInt32 v0 = bt[0];
+ const UInt32 v1 = bt[1];
bt += 2;
- d2[0] = v0;
- d2[1] = v1;
- d2 += 2;
+ d[0] = v0;
+ d[1] = v1;
+ d += 2;
}
- while ((len -= 2) != 0);
- len = (UInt32)(d2 - d);
+ while (len -= 2);
}
INCREASE_LZ_POS
- return len;
+ return d;
}
#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
-#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0);
+#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
{
@@ -1131,10 +1342,14 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
SKIP_FOOTER_MT
}
+/*
+// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
+// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
+
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
- UInt32 h2, h3 /*, h4 */;
+ UInt32 h2, h3; // h4
MT_HASH3_CALC
// MT_HASH4_CALC
// (hash + kFix4HashSize)[h4] =
@@ -1143,15 +1358,16 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
p->lzPos;
SKIP_FOOTER_MT
}
+*/
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
- switch (p->MatchFinder->numHashBytes)
+ switch (MF(p)->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
@@ -1160,12 +1376,12 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
- p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
case 4:
- p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
// it's fast inline version of GetMatches()
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
@@ -1174,9 +1390,11 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
default:
- p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5;
+ p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
- vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
+ vTable->Skip =
+ (Mf_Skip_Func)MatchFinderMt3_Skip;
+ // (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
}
}
diff --git a/C/LzFindMt.h b/C/LzFindMt.h
index 05297ca3..660b7244 100644
--- a/C/LzFindMt.h
+++ b/C/LzFindMt.h
@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
-2019-11-05 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@@ -11,22 +11,24 @@ EXTERN_C_BEGIN
typedef struct _CMtSync
{
+ UInt32 numProcessedBlocks;
+ CThread thread;
+ UInt64 affinity;
+
BoolInt wasCreated;
BoolInt needStart;
+ BoolInt csWasInitialized;
+ BoolInt csWasEntered;
+
BoolInt exit;
BoolInt stopWriting;
- CThread thread;
CAutoResetEvent canStart;
- CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
- BoolInt csWasInitialized;
- BoolInt csWasEntered;
CCriticalSection cs;
- UInt32 numProcessedBlocks;
- UInt64 affinity;
+ // UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -42,8 +44,8 @@ typedef struct _CMatchFinderMt
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
- UInt32 btBufPos;
- UInt32 btBufPosLimit;
+ const UInt32 *btBufPos;
+ const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
@@ -54,6 +56,10 @@ typedef struct _CMatchFinderMt
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
+ UInt32 failure_LZ_BT; // failure in BT transfered to LZ
+ // UInt32 failure_LZ_LZ; // failure in LZ tables
+ UInt32 failureBuf[1];
+ // UInt32 crc[256];
/* LZ + BT */
CMtSync btSync;
@@ -64,6 +70,8 @@ typedef struct _CMatchFinderMt
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
+ UInt32 failure_BT;
+
CLzRef *son;
UInt32 matchMaxLen;
@@ -71,7 +79,7 @@ typedef struct _CMatchFinderMt
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
- UInt32 cyclicBufferSize; /* it must be historySize + 1 */
+ UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
@@ -81,13 +89,19 @@ typedef struct _CMatchFinderMt
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
+ // CMatchFinder MatchFinder;
} CMatchFinderMt;
+// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
+
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
-void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
+void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
+
+/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
+SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
diff --git a/C/LzFindOpt.c b/C/LzFindOpt.c
new file mode 100644
index 00000000..8ff006e0
--- /dev/null
+++ b/C/LzFindOpt.c
@@ -0,0 +1,578 @@
+/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
+2021-07-13 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "LzFind.h"
+
+// #include "LzFindMt.h"
+
+// #define LOG_ITERS
+
+// #define LOG_THREAD
+
+#ifdef LOG_THREAD
+#include <stdio.h>
+#define PRF(x) x
+#else
+// #define PRF(x)
+#endif
+
+#ifdef LOG_ITERS
+#include <stdio.h>
+UInt64 g_NumIters_Tree;
+UInt64 g_NumIters_Loop;
+UInt64 g_NumIters_Bytes;
+#define LOG_ITER(x) x
+#else
+#define LOG_ITER(x)
+#endif
+
+// ---------- BT THREAD ----------
+
+#define USE_SON_PREFETCH
+#define USE_LONG_MATCH_OPT
+
+#define kEmptyHashValue 0
+
+// #define CYC_TO_POS_OFFSET 0
+
+// #define CYC_TO_POS_OFFSET 1 // for debug
+
+/*
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
+{
+ do
+ {
+ UInt32 delta;
+ if (hash == size)
+ break;
+ delta = *hash++;
+
+ if (delta == 0 || delta > (UInt32)pos)
+ return NULL;
+
+ lenLimit++;
+
+ if (delta == (UInt32)pos)
+ {
+ CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+
+ const Byte *len0 = cur, *len1 = cur;
+ UInt32 cutValue = _cutValue;
+ const Byte *maxLen = cur + _maxLen;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ hash++;
+ pos++;
+ cur++;
+ lenLimit++;
+ {
+ CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+ #else
+ const UInt32 p0 = ptr[0 + (diff * 2)];
+ const UInt32 p1 = ptr[1 + (diff * 2)];
+ ptr[0] = p0;
+ ptr[1] = p1;
+ // ptr[0] = ptr[0 + (diff * 2)];
+ // ptr[1] = ptr[1 + (diff * 2)];
+ #endif
+ }
+ // PrintSon(son + 2, pos - 1);
+ // printf("\npos = %x delta = %x\n", pos, delta);
+ len++;
+ *d++ = 2;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ if (delta >= curMatch)
+ return NULL;
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ if (delta >= curMatch)
+ return NULL;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= pos)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
+
+/* define cbs if you use 2 functions.
+ GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
+ GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
+
+ do not define cbs if you use 1 function:
+ GetMatchesSpecN_2()
+*/
+
+// #define cbs _cyclicBufferSize
+
+/*
+ we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
+ to eliminate "movsx" BUG in old MSVC x64 compiler.
+*/
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes);
+
+MY_NO_INLINE
+UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ lenLimit++;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ UInt32 *_distances = ++d;
+
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+
+ UInt32 cutValue = _cutValue;
+ const Byte *len0 = cur, *len1 = cur;
+ const Byte *maxLen = cur + _maxLen;
+
+ // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // SPEC code
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ const Byte *len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (len[diff] == len[0])
+ {
+ if (++len != lenLimit && len[diff] == len[0])
+ while (++len != lenLimit)
+ {
+ LOG_ITER(g_NumIters_Bytes++);
+ if (len[diff] != len[0])
+ break;
+ }
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)(len - cur);
+ *d++ = delta - 1;
+
+ if (len == lenLimit)
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ *ptr0 = pair1;
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+
+ {
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)(lenLimit - cur);
+ *d++ = delta - 1;
+ cur++;
+ lenLimit++;
+ // SPEC
+ _cyclicBufferPos++;
+ {
+ // SPEC code
+ CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
+ const CLzRef *src = dest + ((diff
+ + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
+ // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ pos++;
+ hash++;
+ if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ break;
+ } // for() end for long matches
+ }
+ #endif
+
+ break; // break from TREE iterations
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
+ if (len[diff] < len[0])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ if (delta >= curMatch)
+ return NULL;
+ }
+ delta = (UInt32)pos - delta;
+
+ if (--cutValue == 0 || delta >= cbs)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+
+
+
+/*
+typedef UInt32 uint32plus; // size_t
+
+UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
+ UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
+ size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
+ UInt32 *posRes)
+{
+ do // while (hash != size)
+ {
+ UInt32 delta;
+
+ #ifndef cbs
+ UInt32 cbs;
+ #endif
+
+ if (hash == size)
+ break;
+
+ delta = *hash++;
+
+ if (delta == 0)
+ return NULL;
+
+ #ifndef cbs
+ cbs = _cyclicBufferSize;
+ if ((UInt32)pos < cbs)
+ {
+ if (delta > (UInt32)pos)
+ return NULL;
+ cbs = (UInt32)pos;
+ }
+ #endif
+
+ if (delta >= cbs)
+ {
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ *d++ = 0;
+ ptr1[0] = kEmptyHashValue;
+ ptr1[1] = kEmptyHashValue;
+ }
+else
+{
+ CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+ CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+ UInt32 *_distances = ++d;
+ uint32plus len0 = 0, len1 = 0;
+ UInt32 cutValue = _cutValue;
+ uint32plus maxLen = _maxLen;
+ // lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
+
+ for (LOG_ITER(g_NumIters_Tree++);;)
+ {
+ LOG_ITER(g_NumIters_Loop++);
+ {
+ // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
+ ) << 1);
+ const Byte *pb = cur - delta;
+ uint32plus len = (len0 < len1 ? len0 : len1);
+
+ #ifdef USE_SON_PREFETCH
+ const UInt32 pair0 = *pair;
+ #endif
+
+ if (pb[len] == cur[len])
+ {
+ if (++len != lenLimit && pb[len] == cur[len])
+ while (++len != lenLimit)
+ if (pb[len] != cur[len])
+ break;
+ if (maxLen < len)
+ {
+ maxLen = len;
+ *d++ = (UInt32)len;
+ *d++ = delta - 1;
+ if (len == lenLimit)
+ {
+ {
+ const UInt32 pair1 = pair[1];
+ *ptr0 = pair1;
+ *ptr1 =
+ #ifdef USE_SON_PREFETCH
+ pair0;
+ #else
+ pair[0];
+ #endif
+ }
+
+ _distances[-1] = (UInt32)(d - _distances);
+
+ #ifdef USE_LONG_MATCH_OPT
+
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+
+ {
+ const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
+ for (;;)
+ {
+ *d++ = 2;
+ *d++ = (UInt32)lenLimit;
+ *d++ = delta - 1;
+ _cyclicBufferPos++;
+ {
+ CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
+ const CLzRef *src = dest + ((diff +
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
+ #if 0
+ *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
+ #else
+ const UInt32 p0 = src[0];
+ const UInt32 p1 = src[1];
+ dest[0] = p0;
+ dest[1] = p1;
+ #endif
+ }
+ hash++;
+ pos++;
+ cur++;
+ pb++;
+ if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
+ break;
+ }
+ }
+ #endif
+
+ break;
+ }
+ }
+ }
+ {
+ const UInt32 curMatch = (UInt32)pos - delta;
+ if (pb[len] < cur[len])
+ {
+ delta = pair[1];
+ *ptr1 = curMatch;
+ ptr1 = pair + 1;
+ len1 = len;
+ }
+ else
+ {
+ delta = *pair;
+ *ptr0 = curMatch;
+ ptr0 = pair;
+ len0 = len;
+ }
+
+ {
+ if (delta >= curMatch)
+ return NULL;
+ delta = (UInt32)pos - delta;
+ if (delta >= cbs
+ // delta >= _cyclicBufferSize || delta >= pos
+ || --cutValue == 0)
+ {
+ *ptr0 = *ptr1 = kEmptyHashValue;
+ _distances[-1] = (UInt32)(d - _distances);
+ break;
+ }
+ }
+ }
+ }
+ } // for (tree iterations)
+}
+ pos++;
+ _cyclicBufferPos++;
+ cur++;
+ }
+ while (d < limit);
+ *posRes = (UInt32)pos;
+ return d;
+}
+*/
diff --git a/C/LzmaEnc.c b/C/LzmaEnc.c
index 377e20c3..832e4149 100644
--- a/C/LzmaEnc.c
+++ b/C/LzmaEnc.c
@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
-2021-04-01: Igor Pavlov : Public domain */
+2021-07-10: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,6 +12,7 @@
#include <stdio.h>
#endif
+#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
@@ -36,8 +37,8 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp);
static unsigned g_STAT_OFFSET = 0;
#endif
-#define kLzmaMaxHistorySize ((UInt32)3 << 29)
-/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+/* for good normalization speed we still reserve 256 MB before 4 GB range */
+#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@@ -78,13 +79,12 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->dictSize > p->reduceSize)
{
- unsigned i;
- UInt32 reduceSize = (UInt32)p->reduceSize;
- for (i = 11; i <= 30; i++)
- {
- if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
- if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
- }
+ UInt32 v = (UInt32)p->reduceSize;
+ const UInt32 kReduceMin = ((UInt32)1 << 12);
+ if (v < kReduceMin)
+ v = kReduceMin;
+ if (p->dictSize > v)
+ p->dictSize = v;
}
if (p->lc < 0) p->lc = 3;
@@ -113,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
-#if defined(_MSC_VER) && (_MSC_VER >= 1400)
-/* BSR code is fast for some new CPUs */
-/* #define LZMA_LOG_BSR */
+
+/*
+x86/x64:
+
+BSR:
+ IF (SRC == 0) ZF = 1, DEST is undefined;
+ AMD : DEST is unchanged;
+ IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
+ BSR is slow in some processors
+
+LZCNT:
+ IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
+ IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
+ IF (DEST == 0) ZF = 1;
+
+LZCNT works only in new processors starting from Haswell.
+if LZCNT is not supported by processor, then it's executed as BSR.
+LZCNT can be faster than BSR, if supported.
+*/
+
+// #define LZMA_LOG_BSR
+
+#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
+
+ #if (defined(__clang__) && (__clang_major__ >= 6)) \
+ || (defined(__GNUC__) && (__GNUC__ >= 6))
+ #define LZMA_LOG_BSR
+ #elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+ // #if defined(MY_CPU_ARM_OR_ARM64)
+ #define LZMA_LOG_BSR
+ // #endif
+ #endif
#endif
+// #include <intrin.h>
+
#ifdef LZMA_LOG_BSR
-#define kDicLogSizeMaxCompress 32
+#if defined(__clang__) \
+ || defined(__GNUC__)
+
+/*
+ C code: : (30 - __builtin_clz(x))
+ gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
+ clang10 for x64 : 31 + (bsr(x) xor -32)
+*/
+
+ #define MY_clz(x) ((unsigned)__builtin_clz(x))
+ // __lzcnt32
+ // __builtin_ia32_lzcnt_u32
+
+#else // #if defined(_MSC_VER)
+
+ #ifdef MY_CPU_ARM_OR_ARM64
+
+ #define MY_clz _CountLeadingZeros
+
+ #else // if defined(MY_CPU_X86_OR_AMD64)
+
+ // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
+ // _BitScanReverse code is not optimal for some MSVC compilers
+ #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
+ res = (zz + zz) + (pos >> zz); }
+
+ #endif // MY_CPU_X86_OR_AMD64
-#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+#endif // _MSC_VER
-static unsigned GetPosSlot1(UInt32 pos)
+
+#ifndef BSR2_RET
+
+ #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
+ res = (zz + zz) + (pos >> zz); }
+
+#endif
+
+
+unsigned GetPosSlot1(UInt32 pos);
+unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
@@ -133,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
-#else
-#define kNumLogBits (9 + sizeof(size_t) / 2)
-/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+#else // ! LZMA_LOG_BSR
+
+#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
@@ -183,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
-#endif
+#endif // LZMA_LOG_BSR
#define LZMA_NUM_REPS 4
@@ -319,7 +386,7 @@ typedef UInt32 CProbPrice;
typedef struct
{
void *matchFinderObj;
- IMatchFinder matchFinder;
+ IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
@@ -364,10 +431,14 @@ typedef struct
// begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
+ // #else
+ // CMatchFinder matchFinderBase;
#endif
-
CMatchFinder matchFinderBase;
+
+ // we suppose that we have 8-bytes alignment after CMatchFinder
+
#ifndef _7ZIP_ST
Byte pad[128];
#endif
@@ -375,8 +446,10 @@ typedef struct
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
- UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+ // we want {len , dist} pairs to be 8-bytes aligned in matches array
+ UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
+ // we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
@@ -405,12 +478,19 @@ typedef struct
CSaveState saveState;
+ // BoolInt mf_Failure;
#ifndef _7ZIP_ST
Byte pad2[128];
#endif
} CLzmaEnc;
+#define MFB (p->matchFinderBase)
+/*
+#ifndef _7ZIP_ST
+#define MFB (p->matchFinderMt.MatchFinder)
+#endif
+*/
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
@@ -475,11 +555,21 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
- || props.pb > LZMA_PB_MAX
- || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
- || props.dictSize > kLzmaMaxHistorySize)
+ || props.pb > LZMA_PB_MAX)
return SZ_ERROR_PARAM;
+
+ if (props.dictSize > kLzmaMaxHistorySize)
+ props.dictSize = kLzmaMaxHistorySize;
+
+ #ifndef LZMA_LOG_BSR
+ {
+ const UInt64 dict64 = props.dictSize;
+ if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
+ return SZ_ERROR_PARAM;
+ }
+ #endif
+
p->dictSize = props.dictSize;
{
unsigned fb = (unsigned)props.fb;
@@ -494,7 +584,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
- p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+ MFB.btMode = (Byte)(props.btMode ? 1 : 0);
{
unsigned numHashBytes = 4;
if (props.btMode)
@@ -504,10 +594,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
if (props.numHashBytes >= 5) numHashBytes = 5;
- p->matchFinderBase.numHashBytes = numHashBytes;
+ MFB.numHashBytes = numHashBytes;
}
- p->matchFinderBase.cutValue = props.mc;
+ MFB.cutValue = props.mc;
p->writeEndMark = (BoolInt)props.writeEndMark;
@@ -531,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.expectedDataSize = expectedDataSiize;
+ MFB.expectedDataSize = expectedDataSiize;
}
@@ -1007,7 +1097,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
p->additionalOffset++;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
- numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ {
+ const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
+ // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
+ numPairs = (unsigned)(d - p->matches);
+ }
*numPairsRes = numPairs;
#ifdef SHOW_STAT
@@ -1023,7 +1117,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
if (numPairs == 0)
return 0;
{
- unsigned len = p->matches[(size_t)numPairs - 2];
+ const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
{
@@ -1033,7 +1127,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
- ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
+ const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@@ -1189,6 +1283,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
+ if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
+ break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
@@ -1201,10 +1297,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
matches = p->matches;
+ #define MATCHES matches
+ // #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
- p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
+ p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
@@ -1298,13 +1396,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (len < 2)
len = 2;
else
- while (len > matches[offs])
+ while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
- UInt32 dist = matches[(size_t)offs + 1];
+ UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
@@ -1328,7 +1426,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
opt->extra = 0;
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
@@ -1749,8 +1847,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (newLen > numAvail)
{
newLen = numAvail;
- for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
- matches[numPairs] = (UInt32)newLen;
+ for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
+ MATCHES[numPairs] = (UInt32)newLen;
numPairs += 2;
}
@@ -1769,9 +1867,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
offs = 0;
- while (startLen > matches[offs])
+ while (startLen > MATCHES[offs])
offs += 2;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
@@ -1798,7 +1896,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
}
- if (len == matches[offs])
+ if (len == MATCHES[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
@@ -1863,7 +1961,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
offs += 2;
if (offs == numPairs)
break;
- dist = matches[(size_t)offs + 1];
+ dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
}
@@ -2081,8 +2179,23 @@ static SRes CheckErrors(CLzmaEnc *p)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
- if (p->matchFinderBase.result != SZ_OK)
+
+ #ifndef _7ZIP_ST
+ if (
+ // p->mf_Failure ||
+ (p->mtMode &&
+ ( // p->matchFinderMt.failure_LZ_LZ ||
+ p->matchFinderMt.failure_LZ_BT))
+ )
+ {
+ p->result = MY_HRES_ERROR__INTERNAL_ERROR;
+ // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+ }
+ #endif
+
+ if (MFB.result != SZ_OK)
p->result = SZ_ERROR_READ;
+
if (p->result != SZ_OK)
p->finished = True;
return p->result;
@@ -2223,11 +2336,11 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
- MatchFinder_Construct(&p->matchFinderBase);
+ MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST
+ p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt);
- p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
@@ -2243,7 +2356,6 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
-
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
@@ -2269,7 +2381,7 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
- MatchFinder_Free(&p->matchFinderBase, allocBig);
+ MatchFinder_Free(&MFB, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
@@ -2287,6 +2399,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
UInt32 nowPos32, startPos32;
if (p->needInit)
{
+ #ifndef _7ZIP_ST
+ if (p->mtMode)
+ {
+ RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
+ }
+ #endif
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
}
@@ -2582,11 +2700,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
UInt32 beforeSize = kNumOpts;
+ UInt32 dictSize;
+
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef _7ZIP_ST
- p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+ p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif
{
@@ -2605,30 +2725,50 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
- p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+ MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+
+ dictSize = p->dictSize;
+ if (dictSize == ((UInt32)2 << 30) ||
+ dictSize == ((UInt32)3 << 30))
+ {
+ /* 21.03 : here we reduce the dictionary for 2 reasons:
+ 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
+ 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
+ where data size is aligned for 1 GB: 5/6/8 GB.
+ That reducing must be >= 1 for such corner cases. */
+ dictSize -= 1;
+ }
+
+ if (beforeSize + dictSize < keepWindowSize)
+ beforeSize = keepWindowSize - dictSize;
- if (beforeSize + p->dictSize < keepWindowSize)
- beforeSize = keepWindowSize - p->dictSize;
+ /* in worst case we can look ahead for
+ max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
+ we send larger value for (keepAfter) to MantchFinder_Create():
+ (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+ */
#ifndef _7ZIP_ST
if (p->mtMode)
{
- RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
- LZMA_MATCH_LEN_MAX
- + 1 /* 18.04 */
+ RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig));
p->matchFinderObj = &p->matchFinderMt;
- p->matchFinderBase.bigHash = (Byte)(
- (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+ MFB.bigHash = (Byte)(
+ (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
- if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+ if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
+ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
+ , allocBig))
return SZ_ERROR_MEM;
- p->matchFinderObj = &p->matchFinderBase;
- MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+ p->matchFinderObj = &MFB;
+ MatchFinder_CreateVTable(&MFB, &p->matchFinder);
}
return SZ_OK;
@@ -2700,6 +2840,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+
+ // p->mf_Failure = False;
}
@@ -2742,7 +2884,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
@@ -2753,16 +2895,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
- p->matchFinderBase.stream = inStream;
+ MFB.stream = inStream;
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
- p->matchFinderBase.directInput = 1;
- p->matchFinderBase.bufferBase = (Byte *)src;
- p->matchFinderBase.directInputRem = srcLen;
+ MFB.directInput = 1;
+ MFB.bufferBase = (Byte *)src;
+ MFB.directInputRem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
@@ -2895,7 +3037,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
LzmaEnc_Finish(p);
/*
- if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
+ if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
@@ -2914,29 +3056,37 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
- CLzmaEnc *p = (CLzmaEnc *)pp;
- unsigned i;
- UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
- props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
-
- if (dictSize >= ((UInt32)1 << 22))
- {
- const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
- if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
- dictSize = (dictSize + kDictMask) & ~kDictMask;
- }
- else for (i = 11; i <= 30; i++)
{
- if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; }
- if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; }
- }
+ const CLzmaEnc *p = (const CLzmaEnc *)pp;
+ const UInt32 dictSize = p->dictSize;
+ UInt32 v;
+ props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+ // we write aligned dictionary value to properties for lzma decoder
+ if (dictSize >= ((UInt32)1 << 21))
+ {
+ const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+ v = (dictSize + kDictMask) & ~kDictMask;
+ if (v < dictSize)
+ v = dictSize;
+ }
+ else
+ {
+ unsigned i = 11 * 2;
+ do
+ {
+ v = (UInt32)(2 + (i & 1)) << (i >> 1);
+ i++;
+ }
+ while (v < dictSize);
+ }
- for (i = 0; i < 4; i++)
- props[1 + i] = (Byte)(dictSize >> (8 * i));
- return SZ_OK;
+ SetUi32(props + 1, v);
+ return SZ_OK;
+ }
}
diff --git a/C/MtCoder.c b/C/MtCoder.c
index 17e33182..7936c415 100644
--- a/C/MtCoder.c
+++ b/C/MtCoder.c
@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
-2021-02-09 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -495,12 +495,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
-
- if (Semaphore_IsCreated(&p->blocksSemaphore))
- {
- RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore));
- }
- RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
+ RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
diff --git a/C/Sha1.c b/C/Sha1.c
index a34c13ed..9665b5b5 100644
--- a/C/Sha1.c
+++ b/C/Sha1.c
@@ -1,5 +1,5 @@
/* Sha1.c -- SHA-1 Hash
-2021-04-01 : Igor Pavlov : Public domain
+2021-07-13 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -34,7 +34,7 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
- #if _MSC_VER >= 1910
+ #if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
@@ -435,7 +435,37 @@ void Sha1Prepare()
#endif
{
// printf("\n========== HW SHA1 ======== \n");
- f = f_hw = Sha1_UpdateBlocks_HW;
+ #if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
+ /* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
+ It generated incorrect SHA-1 code.
+ 21.03 : we test sha1-hardware code at runtime initialization */
+
+ #pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
+
+ UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
+ Byte data[64];
+ unsigned i;
+ for (i = 0; i < sizeof(data); i += 2)
+ {
+ data[i ] = (Byte)(i);
+ data[i + 1] = (Byte)(i + 1);
+ }
+
+ Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
+
+ if ( state[0] != 0x9acd7297
+ || state[1] != 0x4624d898
+ || state[2] != 0x0bf079f0
+ || state[3] != 0x031e61b3
+ || state[4] != 0x8323fe20)
+ {
+ // printf("\n========== SHA-1 hardware version failure ======== \n");
+ }
+ else
+ #endif
+ {
+ f = f_hw = Sha1_UpdateBlocks_HW;
+ }
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
diff --git a/C/Threads.c b/C/Threads.c
index bd9553dc..7b4f5b5d 100644
--- a/C/Threads.c
+++ b/C/Threads.c
@@ -1,5 +1,5 @@
/* Threads.c -- multithreading library
-2021-04-25 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -150,6 +150,17 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
return HandleToWRes(*p);
}
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ // if (Semaphore_IsCreated(p))
+ {
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
@@ -158,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
- /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
+ /* InitializeCriticalSection() can raise exception:
+ Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
+ Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
@@ -167,7 +180,7 @@ WRes CriticalSection_Init(CCriticalSection *p)
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
- __except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
+ __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
@@ -406,6 +419,27 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
return 0;
}
+
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
+{
+ if (Semaphore_IsCreated(p))
+ {
+ /*
+ WRes wres = Semaphore_Close(p);
+ if (wres != 0)
+ return wres;
+ */
+ if (initCount > maxCount || maxCount < 1)
+ return EINVAL;
+ // return EINVAL; // for debug
+ p->_count = initCount;
+ p->_maxCount = maxCount;
+ return 0;
+ }
+ return Semaphore_Create(p, initCount, maxCount);
+}
+
+
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;
diff --git a/C/Threads.h b/C/Threads.h
index 6cb4aa4b..9e70ecab 100644
--- a/C/Threads.h
+++ b/C/Threads.h
@@ -1,5 +1,5 @@
/* Threads.h -- multithreading library
-2021-04-25 : Igor Pavlov : Public domain */
+2021-07-12 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
@@ -8,14 +8,18 @@
#include <Windows.h>
#else
-#if !defined(__APPLE__) && !defined(_AIX)
+#if defined(__linux__)
+#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef _7ZIP_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED
+// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE
#endif
#endif
+#endif
#include <pthread.h>
+
#endif
#include "7zTypes.h"
@@ -122,6 +126,7 @@ typedef HANDLE CSemaphore;
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
@@ -172,6 +177,7 @@ typedef struct _CSemaphore
#define Semaphore_IsCreated(p) ((p)->_created)
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
+WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
WRes Semaphore_Wait(CSemaphore *p);
diff --git a/C/Util/LzmaLib/LzmaLib.dsp b/C/Util/LzmaLib/LzmaLib.dsp
index 3421de83..6ce91dcd 100644
--- a/C/Util/LzmaLib/LzmaLib.dsp
+++ b/C/Util/LzmaLib/LzmaLib.dsp
@@ -136,6 +136,10 @@ SOURCE=..\..\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\LzFindOpt.c
+# End Source File
+# Begin Source File
+
SOURCE=..\..\LzHash.h
# End Source File
# Begin Source File
diff --git a/C/Util/LzmaLib/makefile b/C/Util/LzmaLib/makefile
index 74103bb0..eb6ade33 100644
--- a/C/Util/LzmaLib/makefile
+++ b/C/Util/LzmaLib/makefile
@@ -13,6 +13,7 @@ C_OBJS = \
$O\Alloc.obj \
$O\LzFind.obj \
$O\LzFindMt.obj \
+ $O\LzFindOpt.obj \
$O\LzmaDec.obj \
$O\LzmaEnc.obj \
$O\LzmaLib.obj \
diff --git a/C/var_clang_x64.mak b/C/var_clang_x64.mak
index fefed51c..34e1b49c 100644
--- a/C/var_clang_x64.mak
+++ b/C/var_clang_x64.mak
@@ -9,4 +9,3 @@ USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1
-
diff --git a/C/var_clang_x86.mak b/C/var_clang_x86.mak
index 5f3c2d9c..bd2317c2 100644
--- a/C/var_clang_x86.mak
+++ b/C/var_clang_x86.mak
@@ -9,4 +9,3 @@ USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1
-
diff --git a/C/var_gcc_x86.mak b/C/var_gcc_x86.mak
index 288bf94b..f0718ec7 100644
--- a/C/var_gcc_x86.mak
+++ b/C/var_gcc_x86.mak
@@ -8,4 +8,3 @@ MY_ARCH=-m32
USE_ASM=1
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++
-
diff --git a/C/warn_gcc.mak b/C/warn_gcc.mak
index 5fb747dc..7aab7a44 100644
--- a/C/warn_gcc.mak
+++ b/C/warn_gcc.mak
@@ -49,5 +49,3 @@ CFLAGS_WARN_GCC_PPMD_UNALIGNED = \
CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \
# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED)
-
- \ No newline at end of file
diff --git a/CPP/7zip/7zip_gcc.mak b/CPP/7zip/7zip_gcc.mak
index 122686ff..59074214 100644
--- a/CPP/7zip/7zip_gcc.mak
+++ b/CPP/7zip/7zip_gcc.mak
@@ -2,7 +2,7 @@
# USE_ASM = 1
# IS_X64 = 1
# MY_ARCH =
-
+# USE_ASM=
MY_ARCH_2 = $(MY_ARCH)
@@ -23,6 +23,8 @@ CFLAGS_BASE = -O2 $(CFLAGS_BASE_LIST) $(CFLAGS_WARN_WALL) $(CFLAGS_WARN) \
-DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE \
-fPIC
+# -D_7ZIP_AFFINITY_DISABLE
+
ifdef SystemDrive
IS_MINGW = 1
@@ -186,6 +188,8 @@ $O/Lang.o: ../../../Common/Lang.cpp
$(CXX) $(CXXFLAGS) $<
$O/ListFileUtils.o: ../../../Common/ListFileUtils.cpp
$(CXX) $(CXXFLAGS) $<
+$O/LzFindPrepare.o: ../../../Common/LzFindPrepare.cpp
+ $(CXX) $(CXXFLAGS) $<
$O/MyMap.o: ../../../Common/MyMap.cpp
$(CXX) $(CXXFLAGS) $<
$O/MyString.o: ../../../Common/MyString.cpp
@@ -1095,6 +1099,7 @@ $O/XzCrc64.o: ../../../../C/XzCrc64.c
ifdef USE_ASM
ifdef IS_X64
USE_X86_ASM=1
+USE_X64_ASM=1
else
ifdef IS_X86
USE_X86_ASM=1
@@ -1126,6 +1131,13 @@ $O/AesOpt.o: ../../../../C/AesOpt.c
$(CC) $(CFLAGS) $<
endif
+ifdef USE_X64_ASM
+$O/LzFindOpt.o: ../../../../Asm/x86/LzFindOpt.asm
+ $(MY_ASM) $(AFLAGS) $<
+else
+$O/LzFindOpt.o: ../../../../C/LzFindOpt.c
+ $(CC) $(CFLAGS) $<
+endif
ifdef USE_LZMA_DEC_ASM
diff --git a/CPP/7zip/Archive/7z/7zHandlerOut.cpp b/CPP/7zip/Archive/7z/7zHandlerOut.cpp
index b549d94a..8bb87341 100644
--- a/CPP/7zip/Archive/7z/7zHandlerOut.cpp
+++ b/CPP/7zip/Archive/7z/7zHandlerOut.cpp
@@ -133,7 +133,7 @@ HRESULT CHandler::SetMainMethod(
if (_numSolidBytesDefined)
continue;
- UInt32 dicSize;
+ UInt64 dicSize;
switch (methodFull.Id)
{
case k_LZMA:
diff --git a/CPP/7zip/Archive/Zip/ZipHeader.h b/CPP/7zip/Archive/Zip/ZipHeader.h
index c5c7166e..c47659ac 100644
--- a/CPP/7zip/Archive/Zip/ZipHeader.h
+++ b/CPP/7zip/Archive/Zip/ZipHeader.h
@@ -89,6 +89,7 @@ namespace NFileHeader
kZip64 = 0x01,
kNTFS = 0x0A,
kStrongEncrypt = 0x17,
+ kIzNtSecurityDescriptor = 0x4453,
kUnixTime = 0x5455,
kUnixExtra = 0x5855,
kIzUnicodeComment = 0x6375,
diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp
index 38921dce..be336485 100644
--- a/CPP/7zip/Archive/Zip/ZipItem.cpp
+++ b/CPP/7zip/Archive/Zip/ZipItem.cpp
@@ -37,6 +37,7 @@ static const CUInt32PCharPair g_ExtraTypes[] =
{ NExtraID::kUnix3Extra, "ux" },
{ NExtraID::kIzUnicodeComment, "uc" },
{ NExtraID::kIzUnicodeName, "up" },
+ { NExtraID::kIzNtSecurityDescriptor, "SD" },
{ NExtraID::kWzAES, "WzAES" },
{ NExtraID::kApkAlign, "ApkAlign" }
};
diff --git a/CPP/7zip/Bundles/Alone/Alone.dsp b/CPP/7zip/Bundles/Alone/Alone.dsp
index 145cf2ca..7a1f79df 100644
--- a/CPP/7zip/Bundles/Alone/Alone.dsp
+++ b/CPP/7zip/Bundles/Alone/Alone.dsp
@@ -44,7 +44,7 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /Gz /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "WIN_LONG_PATH" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /Yu"StdAfx.h" /FD /c
+# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "WIN_LONG_PATH" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAcs /Yu"StdAfx.h" /FD /c
# ADD BASE RSC /l 0x419 /d "NDEBUG"
# ADD RSC /l 0x419 /d "NDEBUG"
BSC32=bscmake.exe
@@ -270,6 +270,10 @@ SOURCE=..\..\..\Common\CommandLineParser.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\Common.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\ComTry.h
# End Source File
# Begin Source File
@@ -306,6 +310,18 @@ SOURCE=..\..\..\Common\ListFileUtils.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\LzFindPrepare.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\Common\MyBuffer.h
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\..\Common\MyBuffer2.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyCom.h
# End Source File
# Begin Source File
@@ -322,6 +338,10 @@ SOURCE=..\..\..\Common\MyInitGuid.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\MyLinux.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyString.cpp
# End Source File
# Begin Source File
@@ -330,6 +350,10 @@ SOURCE=..\..\..\Common\MyString.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\MyTypes.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyUnknown.h
# End Source File
# Begin Source File
@@ -342,6 +366,10 @@ SOURCE=..\..\..\Common\MyVector.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\MyWindows.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\NewHandler.cpp
# End Source File
# Begin Source File
@@ -478,6 +506,10 @@ SOURCE=..\..\..\Windows\FileLink.cpp
# End Source File
# Begin Source File
+SOURCE=..\..\..\Windows\FileMapping.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Windows\FileName.cpp
# End Source File
# Begin Source File
@@ -506,6 +538,10 @@ SOURCE=..\..\..\Windows\MemoryLock.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Windows\NtCheck.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Windows\PropVariant.cpp
# End Source File
# Begin Source File
@@ -538,6 +574,10 @@ SOURCE=..\..\..\Windows\Registry.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Windows\SecurityUtils.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Windows\Synchronization.cpp
# End Source File
# Begin Source File
@@ -654,6 +694,10 @@ SOURCE=..\..\Common\MemBlocks.cpp
# End Source File
# Begin Source File
+SOURCE=..\..\Common\MemBlocks.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Common\MethodId.cpp
# End Source File
# Begin Source File
@@ -769,6 +813,10 @@ SOURCE=..\..\Common\VirtThread.h
# PROP Default_Filter ""
# Begin Source File
+SOURCE=..\..\Compress\BZip2Const.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Compress\BZip2Crc.cpp
# End Source File
# Begin Source File
@@ -909,10 +957,6 @@ SOURCE=..\..\Compress\DeflateEncoder.h
# End Source File
# Begin Source File
-SOURCE=..\..\Compress\DeflateExtConst.h
-# End Source File
-# Begin Source File
-
SOURCE=..\..\Compress\DeflateRegister.cpp
# End Source File
# End Group
@@ -1678,6 +1722,10 @@ SOURCE=..\..\UI\Common\DefaultName.h
# End Source File
# Begin Source File
+SOURCE=..\..\UI\Common\DirItem.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\UI\Common\EnumDirItems.cpp
# End Source File
# Begin Source File
@@ -1686,6 +1734,10 @@ SOURCE=..\..\UI\Common\EnumDirItems.h
# End Source File
# Begin Source File
+SOURCE=..\..\UI\Common\ExitCode.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\UI\Common\Extract.cpp
# End Source File
# Begin Source File
@@ -1702,6 +1754,10 @@ SOURCE=..\..\UI\Common\ExtractingFilePath.h
# End Source File
# Begin Source File
+SOURCE=..\..\UI\Common\ExtractMode.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\UI\Common\HashCalc.cpp
# End Source File
# Begin Source File
@@ -1710,6 +1766,10 @@ SOURCE=..\..\UI\Common\HashCalc.h
# End Source File
# Begin Source File
+SOURCE=..\..\UI\Common\IFileExtractCallback.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\UI\Common\LoadCodecs.cpp
# End Source File
# Begin Source File
@@ -1726,6 +1786,10 @@ SOURCE=..\..\UI\Common\OpenArchive.h
# End Source File
# Begin Source File
+SOURCE=..\..\UI\Common\Property.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\UI\Common\PropIDUtils.cpp
# End Source File
# Begin Source File
@@ -1912,6 +1976,10 @@ SOURCE=..\..\Crypto\RandGen.h
# End Source File
# Begin Source File
+SOURCE=..\..\Crypto\Sha1Cls.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Crypto\WzAes.cpp
# End Source File
# Begin Source File
@@ -1959,6 +2027,10 @@ SOURCE=..\..\ICoder.h
# End Source File
# Begin Source File
+SOURCE=..\..\IDecl.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\IMyUnknown.h
# End Source File
# Begin Source File
@@ -1975,6 +2047,10 @@ SOURCE=..\..\IStream.h
# End Source File
# Begin Source File
+SOURCE=..\..\MyVersion.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\PropID.h
# End Source File
# End Group
@@ -2223,6 +2299,10 @@ SOURCE=..\..\..\..\C\7zTypes.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\7zVersion.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\Aes.c
!IF "$(CFG)" == "Alone - Win32 Release"
@@ -2440,6 +2520,10 @@ SOURCE=..\..\..\..\C\BwtSort.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\Compiler.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\CpuArch.c
!IF "$(CFG)" == "Alone - Win32 Release"
@@ -2593,6 +2677,30 @@ SOURCE=..\..\..\..\C\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\LzFindOpt.c
+
+!IF "$(CFG)" == "Alone - Win32 Release"
+
+# ADD CPP /O2
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 Debug"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 ReleaseU"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 DebugU"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ENDIF
+
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\LzHash.h
# End Source File
# Begin Source File
diff --git a/CPP/7zip/Bundles/Alone/makefile b/CPP/7zip/Bundles/Alone/makefile
index 0fce341b..ca3392db 100644
--- a/CPP/7zip/Bundles/Alone/makefile
+++ b/CPP/7zip/Bundles/Alone/makefile
@@ -1,6 +1,7 @@
PROG = 7za.exe
# USE_C_AES = 1
# USE_C_SHA = 1
+# USE_C_LZFINDOPT = 1
COMMON_OBJS = \
$O\CommandLineParser.obj \
@@ -8,6 +9,7 @@ COMMON_OBJS = \
$O\CrcReg.obj \
$O\IntToString.obj \
$O\ListFileUtils.obj \
+ $O\LzFindPrepare.obj \
$O\NewHandler.obj \
$O\StdInStream.obj \
$O\StdOutStream.obj \
@@ -217,6 +219,7 @@ C_OBJS = \
!include "../../Aes.mak"
!include "../../Crc.mak"
!include "../../Crc64.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha1.mak"
!include "../../Sha256.mak"
diff --git a/CPP/7zip/Bundles/Alone/makefile.gcc b/CPP/7zip/Bundles/Alone/makefile.gcc
index e63fc332..182e9a7c 100644
--- a/CPP/7zip/Bundles/Alone/makefile.gcc
+++ b/CPP/7zip/Bundles/Alone/makefile.gcc
@@ -110,6 +110,7 @@ COMMON_OBJS = \
$O/CrcReg.o \
$O/IntToString.o \
$O/ListFileUtils.o \
+ $O/LzFindPrepare.o \
$O/MyString.o \
$O/NewHandler.o \
$O/StdInStream.o \
@@ -283,6 +284,7 @@ C_OBJS = \
$O/Delta.o \
$O/HuffEnc.o \
$O/LzFind.o \
+ $O/LzFindOpt.o \
$O/Lzma2Dec.o \
$O/Lzma2DecMt.o \
$O/Lzma2Enc.o \
diff --git a/CPP/7zip/Bundles/Alone7z/Alone.dsp b/CPP/7zip/Bundles/Alone7z/Alone.dsp
index a46526bf..a15a5bfa 100644
--- a/CPP/7zip/Bundles/Alone7z/Alone.dsp
+++ b/CPP/7zip/Bundles/Alone7z/Alone.dsp
@@ -44,7 +44,7 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
-# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAc /Yu"StdAfx.h" /FD /c
+# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAcs /Yu"StdAfx.h" /FD /c
# ADD BASE RSC /l 0x419 /d "NDEBUG"
# ADD RSC /l 0x419 /d "NDEBUG"
BSC32=bscmake.exe
@@ -306,6 +306,10 @@ SOURCE=..\..\..\Common\ListFileUtils.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\LzFindPrepare.cpp
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyCom.h
# End Source File
# Begin Source File
@@ -1681,6 +1685,30 @@ SOURCE=..\..\..\..\C\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\LzFindOpt.c
+
+!IF "$(CFG)" == "Alone - Win32 Release"
+
+# ADD CPP /O2
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 Debug"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 ReleaseU"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "Alone - Win32 DebugU"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ENDIF
+
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\Compress\Lz\LzHash.h
# End Source File
# Begin Source File
diff --git a/CPP/7zip/Bundles/Alone7z/makefile b/CPP/7zip/Bundles/Alone7z/makefile
index 803277ab..0a68e141 100644
--- a/CPP/7zip/Bundles/Alone7z/makefile
+++ b/CPP/7zip/Bundles/Alone7z/makefile
@@ -10,6 +10,7 @@ COMMON_OBJS = \
$O\CrcReg.obj \
$O\IntToString.obj \
$O\ListFileUtils.obj \
+ $O\LzFindPrepare.obj \
$O\NewHandler.obj \
$O\StdInStream.obj \
$O\StdOutStream.obj \
@@ -152,6 +153,7 @@ C_OBJS = \
!include "../../Aes.mak"
!include "../../Crc.mak"
!include "../../Crc64.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha256.mak"
diff --git a/CPP/7zip/Bundles/Alone7z/makefile.gcc b/CPP/7zip/Bundles/Alone7z/makefile.gcc
index b1f69734..c1d6ac50 100644
--- a/CPP/7zip/Bundles/Alone7z/makefile.gcc
+++ b/CPP/7zip/Bundles/Alone7z/makefile.gcc
@@ -27,6 +27,7 @@ else
MT_OBJS = \
$O/LzFindMt.o \
+ $O/LzFindOpt.o \
$O/StreamBinder.o \
$O/Synchronization.o \
$O/VirtThread.o \
@@ -110,6 +111,7 @@ COMMON_OBJS = \
$O/CrcReg.o \
$O/IntToString.o \
$O/ListFileUtils.o \
+ $O/LzFindPrepare.o \
$O/MyString.o \
$O/MyVector.o \
$O/NewHandler.o \
diff --git a/CPP/7zip/Bundles/Fm/FM.dsp b/CPP/7zip/Bundles/Fm/FM.dsp
index cccf1be5..86d788f0 100644
--- a/CPP/7zip/Bundles/Fm/FM.dsp
+++ b/CPP/7zip/Bundles/Fm/FM.dsp
@@ -1016,6 +1016,11 @@ SOURCE=..\..\..\..\C\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\LzFindOpt.c
+# SUBTRACT CPP /YX /Yc /Yu
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\Lzma2Dec.c
# SUBTRACT CPP /YX /Yc /Yu
# End Source File
@@ -1514,6 +1519,10 @@ SOURCE=..\..\..\Common\ListFileUtils.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\LzFindPrepare.cpp
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyBuffer.h
# End Source File
# Begin Source File
diff --git a/CPP/7zip/Bundles/Format7z/makefile b/CPP/7zip/Bundles/Format7z/makefile
index ca66599a..7276f175 100644
--- a/CPP/7zip/Bundles/Format7z/makefile
+++ b/CPP/7zip/Bundles/Format7z/makefile
@@ -8,6 +8,7 @@ COMMON_OBJS = \
$O\CRC.obj \
$O\CrcReg.obj \
$O\IntToString.obj \
+ $O\LzFindPrepare.obj \
$O\NewHandler.obj \
$O\MyString.obj \
$O\Sha256Reg.obj \
@@ -137,6 +138,7 @@ C_OBJS = \
!include "../../Aes.mak"
!include "../../Crc.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha256.mak"
diff --git a/CPP/7zip/Bundles/Format7zF/Arc.mak b/CPP/7zip/Bundles/Format7zF/Arc.mak
index fedce5e0..53a8895a 100644
--- a/CPP/7zip/Bundles/Format7zF/Arc.mak
+++ b/CPP/7zip/Bundles/Format7zF/Arc.mak
@@ -3,6 +3,7 @@ COMMON_OBJS = \
$O\CrcReg.obj \
$O\DynLimBuf.obj \
$O\IntToString.obj \
+ $O\LzFindPrepare.obj \
$O\MyMap.obj \
$O\MyString.obj \
$O\MyVector.obj \
@@ -287,6 +288,7 @@ C_OBJS = \
!include "../../Aes.mak"
!include "../../Crc.mak"
!include "../../Crc64.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha1.mak"
!include "../../Sha256.mak"
diff --git a/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak b/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak
index c3dbf349..73711d3a 100644
--- a/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak
+++ b/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak
@@ -18,6 +18,7 @@ else
MT_OBJS = \
$O/LzFindMt.o \
+ $O/LzFindOpt.o \
$O/StreamBinder.o \
$O/Synchronization.o \
$O/VirtThread.o \
@@ -35,6 +36,7 @@ COMMON_OBJS = \
$O/CrcReg.o \
$O/DynLimBuf.o \
$O/IntToString.o \
+ $O/LzFindPrepare.o \
$O/MyMap.o \
$O/MyString.o \
$O/MyVector.o \
diff --git a/CPP/7zip/Bundles/Format7zF/Format7z.dsp b/CPP/7zip/Bundles/Format7zF/Format7z.dsp
index 67883ebc..36ac6042 100644
--- a/CPP/7zip/Bundles/Format7zF/Format7z.dsp
+++ b/CPP/7zip/Bundles/Format7zF/Format7z.dsp
@@ -267,6 +267,10 @@ SOURCE=..\..\..\Common\IntToString.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\LzFindPrepare.cpp
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyBuffer.h
# End Source File
# Begin Source File
@@ -1869,6 +1873,22 @@ SOURCE=..\..\..\..\C\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\LzFindOpt.c
+
+!IF "$(CFG)" == "7z - Win32 Release"
+
+# ADD CPP /O2
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ELSEIF "$(CFG)" == "7z - Win32 Debug"
+
+# SUBTRACT CPP /YX /Yc /Yu
+
+!ENDIF
+
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\LzHash.h
# End Source File
# Begin Source File
diff --git a/CPP/7zip/Bundles/Format7zR/makefile b/CPP/7zip/Bundles/Format7zR/makefile
index 5c05abd1..c2237152 100644
--- a/CPP/7zip/Bundles/Format7zR/makefile
+++ b/CPP/7zip/Bundles/Format7zR/makefile
@@ -7,6 +7,7 @@ COMMON_OBJS = \
$O\CRC.obj \
$O\CrcReg.obj \
$O\IntToString.obj \
+ $O\LzFindPrepare.obj \
$O\NewHandler.obj \
$O\MyString.obj \
$O\StringConvert.obj \
@@ -111,6 +112,7 @@ C_OBJS = \
$O\Threads.obj \
!include "../../Crc.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../7zip.mak"
diff --git a/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp b/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp
index 907b23e0..d7326efc 100644
--- a/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp
+++ b/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp
@@ -212,6 +212,10 @@ SOURCE=..\..\..\Common\IntToString.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Common\LzFindPrepare.cpp
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Common\MyCom.h
# End Source File
# Begin Source File
@@ -316,6 +320,14 @@ SOURCE=..\..\Common\MethodProps.h
# End Source File
# Begin Source File
+SOURCE=..\..\Common\StreamObjects.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=..\..\Common\StreamObjects.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\Common\StreamUtils.cpp
# End Source File
# Begin Source File
@@ -441,6 +453,11 @@ SOURCE=..\..\..\..\C\LzFindMt.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\..\C\LzFindOpt.c
+# SUBTRACT CPP /YX /Yc /Yu
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\..\C\LzHash.h
# End Source File
# Begin Source File
diff --git a/CPP/7zip/Bundles/LzmaCon/makefile b/CPP/7zip/Bundles/LzmaCon/makefile
index e87becc4..5e53327a 100644
--- a/CPP/7zip/Bundles/LzmaCon/makefile
+++ b/CPP/7zip/Bundles/LzmaCon/makefile
@@ -14,6 +14,7 @@ COMMON_OBJS = \
$O\CRC.obj \
$O\CrcReg.obj \
$O\IntToString.obj \
+ $O\LzFindPrepare.obj \
$O\MyString.obj \
$O\MyVector.obj \
$O\NewHandler.obj \
@@ -33,6 +34,7 @@ WIN_OBJS = \
$O\FileStreams.obj \
$O\FilterCoder.obj \
$O\MethodProps.obj \
+ $O\StreamObjects.obj \
$O\StreamUtils.obj \
UI_COMMON_OBJS = \
@@ -55,6 +57,7 @@ C_OBJS = \
$O\Threads.obj \
!include "../../Crc.mak"
+!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../7zip.mak"
diff --git a/CPP/7zip/Bundles/LzmaCon/makefile.gcc b/CPP/7zip/Bundles/LzmaCon/makefile.gcc
index f9ccfce4..58c204af 100644
--- a/CPP/7zip/Bundles/LzmaCon/makefile.gcc
+++ b/CPP/7zip/Bundles/LzmaCon/makefile.gcc
@@ -18,6 +18,7 @@ else
MT_OBJS = \
$O/LzFindMt.o \
+ $O/LzFindOpt.o \
$O/Synchronization.o \
$O/Threads.o \
@@ -55,6 +56,7 @@ COMMON_OBJS = \
$O/CRC.o \
$O/CrcReg.o \
$O/IntToString.o \
+ $O/LzFindPrepare.o \
$O/MyString.o \
$O/MyVector.o \
$O/NewHandler.o \
@@ -83,6 +85,7 @@ CONSOLE_OBJS = \
$O/FileStreams.o \
$O/FilterCoder.o \
$O/MethodProps.o \
+ $O/StreamObjects.o \
$O/StreamUtils.o \
C_OBJS = \
diff --git a/CPP/7zip/Common/MemBlocks.cpp b/CPP/7zip/Common/MemBlocks.cpp
index f351abbd..9b0652c6 100644
--- a/CPP/7zip/Common/MemBlocks.cpp
+++ b/CPP/7zip/Common/MemBlocks.cpp
@@ -67,7 +67,6 @@ HRes CMemBlockManagerMt::AllocateSpace(size_t numBlocks, size_t numNoLockBlocks)
return E_OUTOFMEMORY;
if (!CMemBlockManager::AllocateSpace_bool(numBlocks))
return E_OUTOFMEMORY;
- Semaphore.Close();
// we need (maxCount = 1), if we want to create non-use empty Semaphore
if (maxCount == 0)
maxCount = 1;
@@ -75,12 +74,13 @@ HRes CMemBlockManagerMt::AllocateSpace(size_t numBlocks, size_t numNoLockBlocks)
// printf("\n Synchro.Create() \n");
WRes wres;
#ifndef _WIN32
+ Semaphore.Close();
wres = Synchro.Create();
if (wres != 0)
return HRESULT_FROM_WIN32(wres);
wres = Semaphore.Create(&Synchro, (UInt32)numLockBlocks, maxCount);
#else
- wres = Semaphore.Create((UInt32)numLockBlocks, maxCount);
+ wres = Semaphore.OptCreateInit((UInt32)numLockBlocks, maxCount);
#endif
return HRESULT_FROM_WIN32(wres);
diff --git a/CPP/7zip/Common/MethodProps.cpp b/CPP/7zip/Common/MethodProps.cpp
index bea51db3..3ab89ddb 100644
--- a/CPP/7zip/Common/MethodProps.cpp
+++ b/CPP/7zip/Common/MethodProps.cpp
@@ -99,41 +99,65 @@ HRESULT ParseMtProp(const UString &name, const PROPVARIANT &prop, UInt32 default
}
+static HRESULT SetLogSizeProp(UInt64 number, NCOM::CPropVariant &destProp)
+{
+ if (number >= 64)
+ return E_INVALIDARG;
+ UInt32 val32;
+ if (number < 32)
+ val32 = (UInt32)1 << (unsigned)number;
+ /*
+ else if (number == 32 && reduce_4GB_to_32bits)
+ val32 = (UInt32)(Int32)-1;
+ */
+ else
+ {
+ destProp = (UInt64)((UInt64)1 << (unsigned)number);
+ return S_OK;
+ }
+ destProp = (UInt32)val32;
+ return S_OK;
+}
+
+
static HRESULT StringToDictSize(const UString &s, NCOM::CPropVariant &destProp)
{
+ /* if (reduce_4GB_to_32bits) we can reduce (4 GiB) property to (4 GiB - 1).
+ to fit the value to UInt32 for clients that do not support 64-bit values */
+
const wchar_t *end;
- UInt32 number = ConvertStringToUInt32(s, &end);
- unsigned numDigits = (unsigned)(end - s.Ptr());
+ const UInt64 number = ConvertStringToUInt64(s, &end);
+ const unsigned numDigits = (unsigned)(end - s.Ptr());
if (numDigits == 0 || s.Len() > numDigits + 1)
return E_INVALIDARG;
if (s.Len() == numDigits)
- {
- if (number >= 64)
- return E_INVALIDARG;
- if (number < 32)
- destProp = (UInt32)((UInt32)1 << (unsigned)number);
- else
- destProp = (UInt64)((UInt64)1 << (unsigned)number);
- return S_OK;
- }
+ return SetLogSizeProp(number, destProp);
unsigned numBits;
switch (MyCharLower_Ascii(s[numDigits]))
{
- case 'b': destProp = number; return S_OK;
+ case 'b': numBits = 0; break;
case 'k': numBits = 10; break;
case 'm': numBits = 20; break;
case 'g': numBits = 30; break;
default: return E_INVALIDARG;
}
- if (number < ((UInt32)1 << (32 - numBits)))
- destProp = (UInt32)(number << numBits);
+ const UInt64 range4g = ((UInt64)1 << (32 - numBits));
+ if (number < range4g)
+ destProp = (UInt32)((UInt32)number << numBits);
+ /*
+ else if (number == range4g && reduce_4GB_to_32bits)
+ destProp = (UInt32)(Int32)-1;
+ */
+ else if (numBits == 0)
+ destProp = (UInt64)number;
+ else if (number >= ((UInt64)1 << (64 - numBits)))
+ return E_INVALIDARG;
else
destProp = (UInt64)((UInt64)number << numBits);
-
return S_OK;
}
@@ -141,16 +165,8 @@ static HRESULT StringToDictSize(const UString &s, NCOM::CPropVariant &destProp)
static HRESULT PROPVARIANT_to_DictSize(const PROPVARIANT &prop, NCOM::CPropVariant &destProp)
{
if (prop.vt == VT_UI4)
- {
- UInt32 v = prop.ulVal;
- if (v >= 64)
- return E_INVALIDARG;
- if (v < 32)
- destProp = (UInt32)((UInt32)1 << (unsigned)v);
- else
- destProp = (UInt64)((UInt64)1 << (unsigned)v);
- return S_OK;
- }
+ return SetLogSizeProp(prop.ulVal, destProp);
+
if (prop.vt == VT_BSTR)
{
UString s;
diff --git a/CPP/7zip/Common/MethodProps.h b/CPP/7zip/Common/MethodProps.h
index e0519b16..bd9283f0 100644
--- a/CPP/7zip/Common/MethodProps.h
+++ b/CPP/7zip/Common/MethodProps.h
@@ -64,23 +64,34 @@ public:
unsigned GetLevel() const;
int Get_NumThreads() const
{
- int i = FindProp(NCoderPropID::kNumThreads);
+ const int i = FindProp(NCoderPropID::kNumThreads);
if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
- return (int)Props[(unsigned)i].Value.ulVal;
+ {
+ const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
+ if (val.vt == VT_UI4)
+ return (int)val.ulVal;
+ }
return -1;
}
- bool Get_DicSize(UInt32 &res) const
+ bool Get_DicSize(UInt64 &res) const
{
res = 0;
- int i = FindProp(NCoderPropID::kDictionarySize);
+ const int i = FindProp(NCoderPropID::kDictionarySize);
if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
+ {
+ const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
+ if (val.vt == VT_UI4)
{
- res = Props[(unsigned)i].Value.ulVal;
+ res = val.ulVal;
return true;
}
+ if (val.vt == VT_UI8)
+ {
+ res = val.uhVal.QuadPart;
+ return true;
+ }
+ }
return false;
}
@@ -90,23 +101,26 @@ public:
{
int i = FindProp(NCoderPropID::kAlgorithm);
if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
- return Props[(unsigned)i].Value.ulVal;
+ {
+ const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
+ if (val.vt == VT_UI4)
+ return val.ulVal;
+ }
return GetLevel() >= 5 ? 1 : 0;
}
- UInt32 Get_Lzma_DicSize() const
+ UInt64 Get_Lzma_DicSize() const
{
- int i = FindProp(NCoderPropID::kDictionarySize);
- if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
- return Props[(unsigned)i].Value.ulVal;
- unsigned level = GetLevel();
- return
- ( level <= 3 ? (1 << (level * 2 + 16)) :
- ( level <= 6 ? (1 << (level + 19)) :
- ( level <= 7 ? (1 << 25) : (1 << 26)
+ UInt64 v;
+ if (Get_DicSize(v))
+ return v;
+ const unsigned level = GetLevel();
+ const UInt32 dictSize =
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
)));
+ return dictSize;
}
bool Get_Lzma_Eos() const
@@ -152,7 +166,7 @@ public:
UInt64 GetProp_BlockSize(PROPID id) const
{
- int i = FindProp(id);
+ const int i = FindProp(id);
if (i >= 0)
{
const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
@@ -176,7 +190,7 @@ public:
}
const UInt32 kMinSize = (UInt32)1 << 20;
const UInt32 kMaxSize = (UInt32)1 << 28;
- UInt32 dictSize = Get_Lzma_DicSize();
+ const UInt64 dictSize = Get_Lzma_DicSize();
UInt64 blockSize = (UInt64)dictSize << 2;
if (blockSize < kMinSize) blockSize = kMinSize;
if (blockSize > kMaxSize) blockSize = kMaxSize;
@@ -204,29 +218,38 @@ public:
UInt32 Get_BZip2_BlockSize() const
{
- int i = FindProp(NCoderPropID::kDictionarySize);
+ const int i = FindProp(NCoderPropID::kDictionarySize);
if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
+ {
+ const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
+ if (val.vt == VT_UI4)
{
- UInt32 blockSize = Props[(unsigned)i].Value.ulVal;
+ UInt32 blockSize = val.ulVal;
const UInt32 kDicSizeMin = 100000;
const UInt32 kDicSizeMax = 900000;
if (blockSize < kDicSizeMin) blockSize = kDicSizeMin;
if (blockSize > kDicSizeMax) blockSize = kDicSizeMax;
return blockSize;
}
- unsigned level = GetLevel();
+ }
+ const unsigned level = GetLevel();
return 100000 * (level >= 5 ? 9 : (level >= 1 ? level * 2 - 1: 1));
}
- UInt32 Get_Ppmd_MemSize() const
+ UInt64 Get_Ppmd_MemSize() const
{
- int i = FindProp(NCoderPropID::kUsedMemorySize);
+ const int i = FindProp(NCoderPropID::kUsedMemorySize);
if (i >= 0)
- if (Props[(unsigned)i].Value.vt == VT_UI4)
- return Props[(unsigned)i].Value.ulVal;
- unsigned level = GetLevel();
- return ((UInt32)1 << (level + 19));
+ {
+ const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value;
+ if (val.vt == VT_UI4)
+ return val.ulVal;
+ if (val.vt == VT_UI8)
+ return val.uhVal.QuadPart;
+ }
+ const unsigned level = GetLevel();
+ const UInt32 mem = (UInt32)1 << (level + 19);
+ return mem;
}
void AddProp_Level(UInt32 level)
diff --git a/CPP/7zip/Common/OffsetStream.cpp b/CPP/7zip/Common/OffsetStream.cpp
index b3e710f9..b16124c2 100644
--- a/CPP/7zip/Common/OffsetStream.cpp
+++ b/CPP/7zip/Common/OffsetStream.cpp
@@ -20,13 +20,13 @@ STDMETHODIMP COffsetOutStream::Write(const void *data, UInt32 size, UInt32 *proc
STDMETHODIMP COffsetOutStream::Seek(Int64 offset, UInt32 seekOrigin, UInt64 *newPosition)
{
- UInt64 absoluteNewPosition;
if (seekOrigin == STREAM_SEEK_SET)
{
if (offset < 0)
return HRESULT_WIN32_ERROR_NEGATIVE_SEEK;
offset += _offset;
}
+ UInt64 absoluteNewPosition = 0; // =0 for gcc-10
HRESULT result = _stream->Seek(offset, seekOrigin, &absoluteNewPosition);
if (newPosition)
*newPosition = absoluteNewPosition - _offset;
diff --git a/CPP/7zip/Common/StreamBinder.cpp b/CPP/7zip/Common/StreamBinder.cpp
index fbf2a0de..6b6e0e58 100644
--- a/CPP/7zip/Common/StreamBinder.cpp
+++ b/CPP/7zip/Common/StreamBinder.cpp
@@ -52,9 +52,9 @@ HRESULT CStreamBinder::Create_ReInit()
RINOK(Event__Create_or_Reset(_canRead_Event));
// RINOK(Event__Create_or_Reset(_canWrite_Event));
- _canWrite_Semaphore.Close();
+ // _canWrite_Semaphore.Close();
// we need at least 3 items of maxCount: 1 for normal unlock in Read(), 2 items for unlock in CloseRead_CallOnce()
- _canWrite_Semaphore.Create(0, 3);
+ _canWrite_Semaphore.OptCreateInit(0, 3);
// _readingWasClosed = false;
_readingWasClosed2 = false;
diff --git a/CPP/7zip/Compress/DeflateDecoder.cpp b/CPP/7zip/Compress/DeflateDecoder.cpp
index e34c2c0c..0206ce8d 100644
--- a/CPP/7zip/Compress/DeflateDecoder.cpp
+++ b/CPP/7zip/Compress/DeflateDecoder.cpp
@@ -274,15 +274,24 @@ HRESULT CCoder::CodeSpec(UInt32 curSize, bool finishInputStream, UInt32 inputPro
sym = m_DistDecoder.Decode(&m_InBitStream);
if (sym >= _numDistLevels)
return S_FALSE;
- UInt32 distance = kDistStart[sym] + m_InBitStream.ReadBits(kDistDirectBits[sym]);
- if (!m_OutWindowStream.CopyBlock(distance, locLen))
+ sym = kDistStart[sym] + m_InBitStream.ReadBits(kDistDirectBits[sym]);
+ /*
+ if (sym >= 4)
+ {
+ // sym &= 31;
+ const unsigned numDirectBits = (unsigned)(((sym >> 1) - 1));
+ sym = (2 | (sym & 1)) << numDirectBits;
+ sym += m_InBitStream.ReadBits(numDirectBits);
+ }
+ */
+ if (!m_OutWindowStream.CopyBlock(sym, locLen))
return S_FALSE;
curSize -= locLen;
len -= locLen;
if (len != 0)
{
_remainLen = (Int32)len;
- _rep0 = distance;
+ _rep0 = sym;
break;
}
}
diff --git a/CPP/7zip/Compress/DeflateEncoder.cpp b/CPP/7zip/Compress/DeflateEncoder.cpp
index fb24c6b0..8168ec78 100644
--- a/CPP/7zip/Compress/DeflateEncoder.cpp
+++ b/CPP/7zip/Compress/DeflateEncoder.cpp
@@ -44,7 +44,9 @@ static const Byte kNoLenStatPrice = 11;
static const Byte kNoPosStatPrice = 6;
static Byte g_LenSlots[kNumLenSymbolsMax];
-static Byte g_FastPos[1 << 9];
+
+#define kNumLogBits 9 // do not change it
+static Byte g_FastPos[1 << kNumLogBits];
class CFastPosInit
{
@@ -60,7 +62,7 @@ public:
g_LenSlots[c] = (Byte)i;
}
- const unsigned kFastSlots = 18;
+ const unsigned kFastSlots = kNumLogBits * 2;
unsigned c = 0;
for (Byte slotFast = 0; slotFast < kFastSlots; slotFast++)
{
@@ -73,14 +75,24 @@ public:
static CFastPosInit g_FastPosInit;
-
inline UInt32 GetPosSlot(UInt32 pos)
{
+ /*
if (pos < 0x200)
return g_FastPos[pos];
return g_FastPos[pos >> 8] + 16;
+ */
+ // const unsigned zz = (pos < ((UInt32)1 << (kNumLogBits))) ? 0 : 8;
+ /*
+ const unsigned zz = (kNumLogBits - 1) &
+ ((UInt32)0 - (((((UInt32)1 << kNumLogBits) - 1) - pos) >> 31));
+ */
+ const unsigned zz = (kNumLogBits - 1) &
+ (((((UInt32)1 << kNumLogBits) - 1) - pos) >> (31 - 3));
+ return g_FastPos[pos >> zz] + (zz * 2);
}
+
void CEncProps::Normalize()
{
int level = Level;
@@ -253,13 +265,13 @@ NO_INLINE void CCoder::GetMatches()
UInt32 distanceTmp[kMatchMaxLen * 2 + 3];
- UInt32 numPairs = (_btMode) ?
+ const UInt32 numPairs = (UInt32)((_btMode ?
Bt3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp):
- Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp);
+ Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp)) - distanceTmp);
*m_MatchDistances = (UInt16)numPairs;
- if (numPairs > 0)
+ if (numPairs != 0)
{
UInt32 i;
for (i = 0; i < numPairs; i += 2)
diff --git a/CPP/7zip/Compress/LzmaEncoder.cpp b/CPP/7zip/Compress/LzmaEncoder.cpp
index 5a6c6831..4b3acd30 100644
--- a/CPP/7zip/Compress/LzmaEncoder.cpp
+++ b/CPP/7zip/Compress/LzmaEncoder.cpp
@@ -112,12 +112,34 @@ HRESULT SetLzmaProp(PROPID propID, const PROPVARIANT &prop, CLzmaEncProps &ep)
return S_OK;
}
+ if (propID == NCoderPropID::kDictionarySize)
+ {
+ if (prop.vt == VT_UI8)
+ {
+ // 21.03 : we support 64-bit VT_UI8 for dictionary and (dict == 4 GiB)
+ const UInt64 v = prop.uhVal.QuadPart;
+ if (v > ((UInt64)1 << 32))
+ return E_INVALIDARG;
+ UInt32 dict;
+ if (v == ((UInt64)1 << 32))
+ dict = (UInt32)(Int32)-1;
+ else
+ dict = (UInt32)v;
+ ep.dictSize = dict;
+ return S_OK;
+ }
+ }
+
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 v = prop.ulVal;
switch (propID)
{
- case NCoderPropID::kDefaultProp: if (v > 31) return E_INVALIDARG; ep.dictSize = (UInt32)1 << (unsigned)v; break;
+ case NCoderPropID::kDefaultProp:
+ if (v > 32)
+ return E_INVALIDARG;
+ ep.dictSize = (v == 32) ? (UInt32)(Int32)-1 : (UInt32)1 << (unsigned)v;
+ break;
SET_PROP_32(kLevel, level)
SET_PROP_32(kNumFastBytes, fb)
SET_PROP_32U(kMatchFinderCycles, mc)
diff --git a/CPP/7zip/Compress/PpmdEncoder.cpp b/CPP/7zip/Compress/PpmdEncoder.cpp
index e2754772..d41d2aca 100644
--- a/CPP/7zip/Compress/PpmdEncoder.cpp
+++ b/CPP/7zip/Compress/PpmdEncoder.cpp
@@ -59,7 +59,7 @@ STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA
for (UInt32 i = 0; i < numProps; i++)
{
const PROPVARIANT &prop = coderProps[i];
- PROPID propID = propIDs[i];
+ const PROPID propID = propIDs[i];
if (propID > NCoderPropID::kReduceSize)
continue;
if (propID == NCoderPropID::kReduceSize)
@@ -68,16 +68,50 @@ STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA
props.ReduceSize = (UInt32)prop.uhVal.QuadPart;
continue;
}
+
+ if (propID == NCoderPropID::kUsedMemorySize)
+ {
+ // here we have selected (4 GiB - 1 KiB) as replacement for (4 GiB) MEM_SIZE.
+ const UInt32 kPpmd_Default_4g = (UInt32)0 - ((UInt32)1 << 10);
+ UInt32 v;
+ if (prop.vt == VT_UI8)
+ {
+ // 21.03 : we support 64-bit values (for 4 GiB value)
+ const UInt64 v64 = prop.uhVal.QuadPart;
+ if (v64 > ((UInt64)1 << 32))
+ return E_INVALIDARG;
+ if (v64 == ((UInt64)1 << 32))
+ v = kPpmd_Default_4g;
+ else
+ v = (UInt32)v64;
+ }
+ else if (prop.vt == VT_UI4)
+ v = (UInt32)prop.ulVal;
+ else
+ return E_INVALIDARG;
+ if (v > PPMD7_MAX_MEM_SIZE)
+ v = kPpmd_Default_4g;
+
+ /* here we restrict MEM_SIZE for Encoder.
+ It's for better performance of encoding and decoding.
+ The Decoder still supports more MEM_SIZE values. */
+ if (v < ((UInt32)1 << 16) || (v & 3) != 0)
+ return E_INVALIDARG;
+ // if (v < PPMD7_MIN_MEM_SIZE) return E_INVALIDARG; // (1 << 11)
+ /*
+ Supported MEM_SIZE range :
+ [ (1 << 11) , 0xFFFFFFFF - 12 * 3 ] - current 7-Zip's Ppmd7 constants
+ [ 1824 , 0xFFFFFFFF ] - real limits of Ppmd7 code
+ */
+ props.MemSize = v;
+ continue;
+ }
+
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 v = (UInt32)prop.ulVal;
switch (propID)
{
- case NCoderPropID::kUsedMemorySize:
- if (v < (1 << 16) || v > PPMD7_MAX_MEM_SIZE || (v & 3) != 0)
- return E_INVALIDARG;
- props.MemSize = v;
- break;
case NCoderPropID::kOrder:
if (v < 2 || v > 32)
return E_INVALIDARG;
diff --git a/CPP/7zip/LzFindOpt.mak b/CPP/7zip/LzFindOpt.mak
new file mode 100644
index 00000000..169e10f0
--- /dev/null
+++ b/CPP/7zip/LzFindOpt.mak
@@ -0,0 +1,7 @@
+!IF defined(USE_C_LZFINDOPT) || "$(PLATFORM)" != "x64"
+C_OBJS = $(C_OBJS) \
+ $O\LzFindOpt.obj
+!ELSE
+ASM_OBJS = $(ASM_OBJS) \
+ $O\LzFindOpt.obj
+!ENDIF
diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp
index ef7eb1a7..00df80e9 100644
--- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp
+++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp
@@ -1405,11 +1405,13 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options)
else if (options.Command.CommandType == NCommandType::kBenchmark)
{
options.NumIterations = 1;
+ options.NumIterations_Defined = false;
if (curCommandIndex < numNonSwitchStrings)
{
if (!StringToUInt32(nonSwitchStrings[curCommandIndex], options.NumIterations))
throw CArcCmdLineException("Incorrect number of benchmark iterations", nonSwitchStrings[curCommandIndex]);
curCommandIndex++;
+ options.NumIterations_Defined = true;
}
}
else if (options.Command.CommandType == NCommandType::kHash)
diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.h b/CPP/7zip/UI/Common/ArchiveCommandLine.h
index 150541e6..1e488d8c 100644
--- a/CPP/7zip/UI/Common/ArchiveCommandLine.h
+++ b/CPP/7zip/UI/Common/ArchiveCommandLine.h
@@ -109,6 +109,7 @@ struct CArcCmdLineOptions
// Benchmark
UInt32 NumIterations;
+ bool NumIterations_Defined;
CArcCmdLineOptions():
HelpMode(false),
diff --git a/CPP/7zip/UI/Common/Bench.cpp b/CPP/7zip/UI/Common/Bench.cpp
index 4e957331..fb4c1726 100644
--- a/CPP/7zip/UI/Common/Bench.cpp
+++ b/CPP/7zip/UI/Common/Bench.cpp
@@ -51,6 +51,7 @@
#include "../../../Common/StringToInt.h"
#include "../../Common/MethodProps.h"
+#include "../../Common/StreamObjects.h"
#include "../../Common/StreamUtils.h"
#include "Bench.h"
@@ -87,20 +88,30 @@ static void SetComplexCommandsMs(UInt32 complexInMs,
}
}
+// const UInt64 kBenchmarkUsageMult = 1000000; // for debug
+static const unsigned kBenchmarkUsageMultBits = 16;
+static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
+
+UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
+{
+ return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
+}
+
static const unsigned kNumHashDictBits = 17;
static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
-static const unsigned kOldLzmaDictBits = 30;
+static const unsigned kOldLzmaDictBits = 32;
-static const UInt32 kAdditionalSize = (1 << 16);
+// static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
+static const size_t kAdditionalSize = (size_t)1 << 16;
static const UInt32 kCompressedAdditionalSize = (1 << 10);
-static const UInt32 kMaxLzmaPropSize = 5;
+static const UInt32 kMaxMethodPropSize = (1 << 6);
#define ALLOC_WITH_HRESULT(_buffer_, _size_) \
{ (_buffer_)->Alloc(_size_); \
- if (!(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
+ if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
class CBaseRandomGenerator
@@ -143,7 +154,7 @@ static void RandGen(Byte *buf, size_t size)
}
-class CBenchRandomGenerator: public CAlignedBuffer
+class CBenchRandomGenerator: public CMidAlignedBuffer
{
static UInt32 GetVal(UInt32 &res, unsigned numBits)
{
@@ -172,14 +183,21 @@ public:
void GenerateLz(unsigned dictBits, UInt32 salt)
{
CBaseRandomGenerator rg(salt);
- UInt32 pos = 0;
- UInt32 rep0 = 1;
+ size_t pos = 0;
+ size_t rep0 = 1;
const size_t bufSize = Size();
Byte *buf = (Byte *)*this;
unsigned posBits = 1;
+
+ // printf("\n dictBits = %d\n", (UInt32)dictBits);
+ // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
while (pos < bufSize)
{
+ /*
+ if (pos >= ((UInt32)1 << 31))
+ printf(" %x\n", pos);
+ */
UInt32 r = rg.GetRnd();
if (GetVal(r, 1) == 0 || pos < 1024)
buf[pos++] = (Byte)(r & 0xFF);
@@ -192,7 +210,7 @@ public:
{
len += GetLen(r);
- while (((UInt32)1 << posBits) < pos)
+ while (((size_t)1 << posBits) < pos)
posBits++;
unsigned numBitsMax = dictBits;
@@ -206,11 +224,12 @@ public:
for (;;)
{
- UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
+ const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
r = rg.GetRnd();
if (ppp > numBitsMax)
continue;
- rep0 = GetVal(r, ppp);
+ // rep0 = GetVal(r, ppp);
+ rep0 = r & (((size_t)1 << ppp) - 1);
if (rep0 < pos)
break;
r = rg.GetRnd();
@@ -218,10 +237,11 @@ public:
rep0++;
}
+ // len *= 300; // for debug
{
- UInt32 rem = (UInt32)bufSize - pos;
+ const size_t rem = bufSize - pos;
if (len > rem)
- len = rem;
+ len = (UInt32)rem;
}
Byte *dest = buf + pos;
const Byte *src = dest - rep0;
@@ -230,6 +250,7 @@ public:
*dest++ = *src++;
}
}
+ // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
}
};
@@ -274,7 +295,7 @@ STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processed
class CBenchmarkOutStream:
public ISequentialOutStream,
- public CAlignedBuffer,
+ public CMidAlignedBuffer,
public CMyUnknownImp
{
// bool _overflow;
@@ -304,6 +325,8 @@ public:
Crc = CrcUpdate(Crc, data, size);
}
+ size_t GetPos() const { return Pos; }
+
// void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
MY_UNKNOWN_IMP
@@ -571,6 +594,7 @@ public:
STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
};
+
STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
{
HRESULT res = Status->GetResult();
@@ -578,6 +602,22 @@ STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64
return res;
if (!Callback)
return res;
+
+ /*
+ static UInt64 inSizePrev = 0;
+ static UInt64 outSizePrev = 0;
+ UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
+ if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; }
+ if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; }
+ UInt64 percents = delta2 * 1000;
+ if (delta1 != 0)
+ percents /= delta1;
+ printf("=== %7d %7d %7d %7d ratio = %4d\n",
+ (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
+ (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
+ (unsigned)percents);
+ */
+
CBenchInfo info;
SetFinishTime(info);
if (Status->EncodeMode)
@@ -599,18 +639,26 @@ STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64
static const unsigned kSubBits = 8;
-static UInt32 GetLogSize(UInt32 size)
+static UInt32 GetLogSize(UInt64 size)
{
- for (unsigned i = kSubBits; i < 32; i++)
- for (UInt32 j = 0; j < (1 << kSubBits); j++)
- if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
- return (i << kSubBits) + j;
- return (32 << kSubBits);
+ if (size <= 1)
+ return 0;
+ unsigned i;
+ for (i = 2; i < 64; i++)
+ if (size < ((UInt64)1 << i))
+ break;
+ i--;
+ UInt32 v;
+ if (i <= kSubBits)
+ v = (UInt32)(size) << (kSubBits - i);
+ else
+ v = (UInt32)(size >> (i - kSubBits));
+ return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
}
static void NormalizeVals(UInt64 &v1, UInt64 &v2)
{
- while (v1 > 1000000)
+ while (v1 >= ((UInt32)1 << ((64 - kBenchmarkUsageMultBits) / 2)))
{
v1 >>= 1;
v2 >>= 1;
@@ -629,7 +677,7 @@ UInt64 CBenchInfo::GetUsage() const
userFreq = 1;
if (globalTime == 0)
globalTime = 1;
- return userTime * globalFreq * 1000000 / userFreq / globalTime;
+ return userTime * globalFreq * kBenchmarkUsageMult / userFreq / globalTime;
}
UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
@@ -659,9 +707,9 @@ static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
return value * freq / elTime;
}
-UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
+UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
{
- return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
+ return MyMultDiv64(numUnits, GlobalTime, GlobalFreq);
}
struct CBenchProps
@@ -694,7 +742,7 @@ struct CBenchProps
return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
}
- UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
+ UInt64 GetCompressRating(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
};
@@ -706,38 +754,50 @@ void CBenchProps::SetLzmaCompexity()
LzmaRatingMode = true;
}
-UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
+UInt64 CBenchProps::GetCompressRating(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
{
if (dictSize < (1 << kBenchMinDicLogSize))
dictSize = (1 << kBenchMinDicLogSize);
UInt64 encComplex = EncComplex;
if (LzmaRatingMode)
{
- UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
+ /*
+ for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
+ {
+ unsigned rr = GetLogSize(uu);
+ printf("\n%16I64x , log = %4x", uu, rr);
+ uu += 1;
+ uu += uu / 50;
+ }
+ */
+ // throw 1;
+ const UInt32 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
}
- UInt64 numCommands = (UInt64)size * encComplex;
+ const UInt64 numCommands = (UInt64)size * encComplex;
return MyMultDiv64(numCommands, elapsedTime, freq);
}
UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
{
- UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
+ const UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
return MyMultDiv64(numCommands, elapsedTime, freq);
}
-UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
+
+
+UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
{
CBenchProps props;
props.SetLzmaCompexity();
- return props.GetCompressRating(dictSize, elapsedTime, freq, size);
+ return props.GetCompressRating(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
}
-UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
+UInt64 CBenchInfo::GetRating_LzmaDec() const
{
CBenchProps props;
props.SetLzmaCompexity();
- return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
+ return props.GetDecompressRating(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
}
@@ -880,10 +940,14 @@ struct CBenchSyncCommon
-struct CEncoderInfo;
+class CEncoderInfo;
-struct CEncoderInfo
+class CEncoderInfo
{
+ CLASS_NO_COPY(CEncoderInfo)
+
+public:
+
#ifndef _7ZIP_ST
NWindows::CThread thread[2];
NSynchronization::CManualResetEvent ReadyEvent;
@@ -949,8 +1013,11 @@ struct CEncoderInfo
const Byte *fileData;
CBenchRandomGenerator rg;
- CAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
- CBenchmarkOutStream *propStreamSpec;
+ CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
+
+ // CBenchmarkOutStream *propStreamSpec;
+ Byte propsData[kMaxMethodPropSize];
+ CBufPtrSeqOutStream *propStreamSpec;
CMyComPtr<ISequentialOutStream> propStream;
unsigned generateDictBits;
@@ -1055,15 +1122,21 @@ struct CEncoderInfo
};
+
+
+static size_t GetBenchCompressedSize(size_t bufferSize)
+{
+ return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
+ // kBufferSize / 2;
+}
+
+
HRESULT CEncoderInfo::Generate()
{
const COneMethodInfo &method = _method;
// we need extra space, if input data is already compressed
- const size_t kCompressedBufferSize =
- kCompressedAdditionalSize +
- kBufferSize + kBufferSize / 16;
- // kBufferSize / 2;
+ const size_t kCompressedBufferSize = GetBenchCompressedSize(kBufferSize);
if (kCompressedBufferSize < kBufferSize)
return E_FAIL;
@@ -1078,7 +1151,13 @@ HRESULT CEncoderInfo::Generate()
if (generateDictBits == 0)
rg.GenerateSimpleRandom(Salt);
else
+ {
+ if (generateDictBits >= sizeof(size_t) * 8
+ && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
+ return E_INVALIDARG;
rg.GenerateLz(generateDictBits, Salt);
+ // return E_ABORT; // for debug
+ }
// printf("\n%d\n ", GetTickCount() - ttt);
crc = CrcCalc((const Byte *)rg, rg.Size());
@@ -1101,11 +1180,12 @@ HRESULT CEncoderInfo::Generate()
if (!propStream)
{
- propStreamSpec = new CBenchmarkOutStream;
+ propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
propStream = propStreamSpec;
}
- ALLOC_WITH_HRESULT(propStreamSpec, kMaxLzmaPropSize);
- propStreamSpec->Init(true, false);
+ // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
+ // propStreamSpec->Init(true, false);
+ propStreamSpec->Init(propsData, sizeof(propsData));
CMyComPtr<IUnknown> coder;
@@ -1206,8 +1286,12 @@ static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
HRESULT CEncoderInfo::Encode()
{
+ // printf("\nCEncoderInfo::Generate\n");
+
RINOK(Generate());
+ // printf("\n2222\n");
+
#ifndef _7ZIP_ST
if (Common)
{
@@ -1359,7 +1443,7 @@ HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
- if (!setDecProps && propStreamSpec->Pos != 0)
+ if (!setDecProps && propStreamSpec->GetPos() != 0)
return E_FAIL;
CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
@@ -1393,7 +1477,10 @@ HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
if (setDecProps)
{
- RINOK(setDecProps->SetDecoderProperties2((const Byte *)*propStreamSpec, (UInt32)propStreamSpec->Pos));
+ RINOK(setDecProps->SetDecoderProperties2(
+ /* (const Byte *)*propStreamSpec, */
+ propsData,
+ (UInt32)propStreamSpec->GetPos()));
}
{
@@ -1918,16 +2005,36 @@ static HRESULT MethodBench(
info.PackSize += encoder.compressedSize;
}
- RINOK(callback->SetDecodeResult(info, false));
+ // RINOK(callback->SetDecodeResult(info, false)); // why we called before 21.03 ??
RINOK(callback->SetDecodeResult(info, true));
return S_OK;
}
-static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
+
+static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
{
- UInt32 hs = dictionary - 1;
+ /*
+ if (dictSizeLog < 32)
+ return (UInt32)1 << dictSizeLog;
+ else
+ return (UInt32)(Int32)-1;
+ */
+ return (UInt64)1 << dictSizeLog;
+}
+
+
+// it's limit of current LZMA implementation that can be changed later
+#define kLzmaMaxDictSize ((UInt32)15 << 28)
+
+static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
+{
+ if (dict == 0)
+ dict = 1;
+ if (dict > kLzmaMaxDictSize)
+ dict = kLzmaMaxDictSize;
+ UInt32 hs = (UInt32)dict - 1;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
@@ -1937,29 +2044,59 @@ static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
if (hs > (1 << 24))
hs >>= 1;
hs++;
- return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
+ hs += (1 << 16);
+
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
+ UInt64 blockSize = (UInt64)dict + (1 << 16)
+ + (multiThread ? (1 << 20) : 0);
+ blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
+ if (blockSize >= kBlockSizeMax)
+ blockSize = kBlockSizeMax;
+
+ UInt64 son = (UInt64)dict;
+ if (btMode)
+ son *= 2;
+ const UInt64 v = (hs + son) * 4 + blockSize +
(1 << 20) + (multiThread ? (6 << 20) : 0);
+
+ // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
+ // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
+ return v;
}
-UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
+
+UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
{
- const UInt32 kBufferSize = dictionary;
- const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
- bool lzmaMt = (totalBench || numThreads > 1);
+ const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
+ const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
+ if (level < 0)
+ level = 5;
+ const int algo = (level < 5 ? 0 : 1);
+ const int btMode = (algo == 0 ? 0 : 1);
+
UInt32 numBigThreads = numThreads;
- if (!totalBench && lzmaMt)
- numBigThreads /= 2;
+ bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
+ if (btMode)
+ {
+ if (!totalBench && lzmaMt)
+ numBigThreads /= 2;
+ }
return ((UInt64)kBufferSize + kCompressedBufferSize +
- GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
+ GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
}
+static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
+{
+ // dictionary += (dictionary >> 9); // for page tables (virtual memory)
+ return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
+}
// ---------- CRC and HASH ----------
struct CCrcInfo_Base
{
- CAlignedBuffer Buffer;
+ CMidAlignedBuffer Buffer;
const Byte *Data;
size_t Size;
bool CreateLocalBuf;
@@ -2111,9 +2248,12 @@ static THREAD_FUNC_DECL FreqThreadFunction(void *param)
UInt32 sum = g_BenchCpuFreqTemp;
for (UInt64 k = p->NumIterations; k > 0; k--)
{
- p->CallbackRes = p->Callback->CheckBreak();
- if (p->CallbackRes != S_OK)
- return 0;
+ if (p->Callback)
+ {
+ p->CallbackRes = p->Callback->CheckBreak();
+ if (p->CallbackRes != S_OK)
+ return 0;
+ }
sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
}
p->ValRes = sum;
@@ -2437,27 +2577,6 @@ static const CBenchHash g_Hash[] =
{ 2, 5500, 0x85189d02, "BLAKE2sp" }
};
-struct CTotalBenchRes
-{
- // UInt64 NumIterations1; // for Usage
- UInt64 NumIterations2; // for Rating / RPU
-
- UInt64 Rating;
- UInt64 Usage;
- UInt64 RPU;
-
- void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
-
- void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
- {
- Rating = (r1.Rating + r2.Rating);
- Usage = (r1.Usage + r2.Usage);
- RPU = (r1.RPU + r2.RPU);
- // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
- NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
- }
-};
-
static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
{
char s[128];
@@ -2523,7 +2642,7 @@ static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
{
- PrintNumber(f, (usage + 5000) / 10000, size);
+ PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
}
static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
@@ -2537,17 +2656,39 @@ static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt6
PrintSpaces(f, kFieldSize_EUAndEffec);
else
{
- UInt64 ddd = cpuFreq * usage / 100;
- /*
- if (ddd == 0)
- ddd = 1;
- */
- PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
+ PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
}
}
}
+
+void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
+{
+ Speed = info.GetUnpackSizeSpeed();
+ Usage = info.GetUsage();
+ RPU = info.GetRatingPerUsage(Rating);
+}
+
+void CTotalBenchRes::Mult_For_Weight(unsigned weight)
+{
+ NumIterations2 *= weight;
+ RPU *= weight;
+ Rating *= weight;
+ Usage += weight;
+ Speed += weight;
+}
+
+void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
+{
+ Rating += r.Rating;
+ Usage += r.Usage;
+ RPU += r.RPU;
+ Speed += r.Speed;
+ // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
+ NumIterations2 += r.NumIterations2;
+}
+
static void PrintResults(IBenchPrintCallback *f,
const CBenchInfo &info,
unsigned weight,
@@ -2555,36 +2696,43 @@ static void PrintResults(IBenchPrintCallback *f,
bool showFreq, UInt64 cpuFreq,
CTotalBenchRes *res)
{
- UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
+ CTotalBenchRes t;
+ t.Rating = rating;
+ t.NumIterations2 = 1;
+ t.Generate_From_BenchInfo(info);
+
if (f)
{
- if (speed != 0)
- PrintNumber(*f, speed / 1024, kFieldSize_Speed);
+ if (t.Speed != 0)
+ PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
else
PrintSpaces(*f, 1 + kFieldSize_Speed);
}
- UInt64 usage = info.GetUsage();
- UInt64 rpu = info.GetRatingPerUsage(rating);
if (f)
{
- PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
+ PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
}
if (res)
{
// res->NumIterations1++;
- res->NumIterations2 += weight;
- res->RPU += (rpu * weight);
- res->Rating += (rating * weight);
- res->Usage += (usage * weight);
+ t.Mult_For_Weight(weight);
+ res->Update_With_Res(t);
}
}
-static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
+static void PrintTotals(IBenchPrintCallback &f,
+ bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
{
- PrintSpaces(f, 1 + kFieldSize_Speed);
+ const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
+ const UInt64 speed = res.Speed / numIterations2;
+ if (showSpeed && speed != 0)
+ PrintNumber(f, speed / 1024, kFieldSize_Speed);
+ else
+ PrintSpaces(f, 1 + kFieldSize_Speed);
+
+ // PrintSpaces(f, 1 + kFieldSize_Speed);
// UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
- UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
}
@@ -2649,7 +2797,7 @@ void Add_LargePages_String(AString &s)
#ifdef _WIN32
if (g_LargePagesMode || g_LargePageSize != 0)
{
- s += " (LP-";
+ s.Add_OptSpaced("(LP-");
PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_IsSupported_PageGB())
@@ -2700,7 +2848,7 @@ struct CBenchCallbackToPrint: public IBenchCallback
CTotalBenchRes EncodeRes;
CTotalBenchRes DecodeRes;
IBenchPrintCallback *_file;
- UInt32 DictSize;
+ UInt64 DictSize;
bool Use2Columns;
unsigned NameFieldSize;
@@ -2876,7 +3024,8 @@ struct CFreqBench
UInt64 specifiedFreq;
// out:
- UInt64 cpuFreq;
+ UInt64 CpuFreqRes;
+ UInt64 UsageRes;
UInt32 res;
CFreqBench()
@@ -2897,7 +3046,8 @@ HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
)
{
res = 0;
- cpuFreq = 0;
+ CpuFreqRes = 0;
+ UsageRes = 0;
if (numThreads == 0)
numThreads = 1;
@@ -2984,17 +3134,17 @@ HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
info.PackSize = 0;
info.NumIterations = 1;
+ const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
+ const UInt64 rating = info.GetSpeed(numCommands);
+ CpuFreqRes = rating / numThreads;
+ UsageRes = info.GetUsage();
+
if (_file)
{
- {
- UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
- UInt64 rating = info.GetSpeed(numCommands);
- cpuFreq = rating / numThreads;
- PrintResults(_file, info,
+ PrintResults(_file, info,
0, // weight
rating,
- showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
- }
+ showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
RINOK(_file->CheckBreak());
}
@@ -3041,7 +3191,7 @@ static HRESULT CrcBench(
/*
// if will generate random data in each thread, instead of global data
- CAlignedBuffer buffer;
+ CMidAlignedBuffer buffer;
if (!fileData)
{
ALLOC_WITH_HRESULT(&buffer, bufferSize)
@@ -3288,10 +3438,10 @@ HRESULT Bench(
DECL_EXTERNAL_CODECS_LOC_VARS
IBenchPrintCallback *printCallback,
IBenchCallback *benchCallback,
- // IBenchFreqCallback *freqCallback,
const CObjectVector<CProperty> &props,
UInt32 numIterations,
- bool multiDict)
+ bool multiDict,
+ IBenchFreqCallback *freqCallback)
{
if (!CrcInternalTest())
return E_FAIL;
@@ -3342,7 +3492,9 @@ HRESULT Bench(
COneMethodInfo method;
- CAlignedBuffer fileDataBuffer;
+ CMidAlignedBuffer fileDataBuffer;
+ bool use_fileData = false;
+ bool isFixedDict = false;
{
unsigned i;
@@ -3395,7 +3547,10 @@ HRESULT Bench(
return E_INVALIDARG;
}
+ // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
+
ALLOC_WITH_HRESULT(&fileDataBuffer, len);
+ use_fileData = true;
{
size_t processed;
@@ -3436,9 +3591,14 @@ HRESULT Bench(
continue;
}
- if (name.IsEqualTo("ds"))
+ const bool isCurrent_fixedDict = name.IsEqualTo("df");
+ if (isCurrent_fixedDict)
+ isFixedDict = true;
+ if (isCurrent_fixedDict || name.IsEqualTo("ds"))
{
RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog));
+ if (startDicLog > 32)
+ return E_INVALIDARG;
startDicLog_Defined = true;
continue;
}
@@ -3505,6 +3665,15 @@ HRESULT Bench(
if (printCallback)
{
AString s;
+
+ #ifndef _WIN32
+ s += "Compiler: ";
+ GetCompiler(s);
+ printCallback->Print(s);
+ printCallback->NewLine();
+ s.Empty();
+ #endif
+
GetSystemInfoText(s);
printCallback->Print(s);
printCallback->NewLine();
@@ -3512,10 +3681,10 @@ HRESULT Bench(
if (printCallback)
{
- printCallback->Print("CPU Freq:");
+ printCallback->Print("1T CPU Freq (MHz):");
}
- if (printCallback /* || freqCallback */)
+ if (printCallback || freqCallback)
{
UInt64 numMilCommands = 1 << 6;
if (specifiedFreq != 0)
@@ -3543,6 +3712,7 @@ HRESULT Bench(
start = 1;
const UInt64 freq = GetFreq();
// mips is constant in some compilers
+ const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, start, freq);
const UInt64 mipsVal = numMilCommands * freq / start;
if (printCallback)
{
@@ -3556,10 +3726,10 @@ HRESULT Bench(
PrintNumber(*printCallback, mipsVal, 5);
}
}
- /*
if (freqCallback)
- freqCallback->AddCpuFreq(mipsVal);
- */
+ {
+ RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult));
+ }
if (jj >= 1)
{
@@ -3573,7 +3743,10 @@ HRESULT Bench(
if (start >= freq * 16)
{
printCallback->Print(" (Cmplx)");
- needSetComplexity = true;
+ if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
+ {
+ needSetComplexity = true;
+ }
needStop = true;
}
if (needSetComplexity)
@@ -3583,8 +3756,110 @@ HRESULT Bench(
numMilCommands <<= 1;
}
}
+ if (freqCallback)
+ {
+ RINOK(freqCallback->FreqsFinished(1));
+ }
+ }
+
+ if (numThreadsSpecified >= 2)
+ if (printCallback || freqCallback)
+ {
+ if (printCallback)
+ printCallback->NewLine();
+
+ /* it can show incorrect frequency for HT threads.
+ so we reduce freq test to (numCPUs / 2) */
+
+ UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified;
+ if (numThreads < 1)
+ numThreads = 1;
+
+ if (printCallback)
+ {
+ char s[128];
+ ConvertUInt64ToString(numThreads, s);
+ printCallback->Print(s);
+ printCallback->Print("T CPU Freq (MHz):");
+ }
+ UInt64 numMilCommands = 1 << 10;
+ if (specifiedFreq != 0)
+ {
+ while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
+ numMilCommands >>= 1;
+ }
+
+ for (int jj = 0;; jj++)
+ {
+ if (printCallback)
+ RINOK(printCallback->CheckBreak());
+
+ {
+ // PrintLeft(f, "CPU", kFieldSize_Name);
+
+ // UInt32 resVal;
+
+ CFreqBench fb;
+ fb.complexInCommands = numMilCommands * 1000000;
+ fb.numThreads = numThreads;
+ // showFreq;
+ // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
+ fb.showFreq = true;
+ fb.specifiedFreq = 1;
+
+ HRESULT res = fb.FreqBench(NULL /* printCallback */
+ #ifndef _7ZIP_ST
+ , &affinityMode
+ #endif
+ );
+ RINOK(res);
+
+ if (freqCallback)
+ {
+ RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes));
+ }
+
+ if (printCallback)
+ {
+ /*
+ if (realDelta == 0)
+ {
+ printCallback->Print(" -");
+ }
+ else
+ */
+ {
+ // PrintNumber(*printCallback, start, 0);
+ PrintUsage(*printCallback, fb.UsageRes, 3);
+ printCallback->Print("%");
+ PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
+ printCallback->Print(" ");
+
+ // PrintNumber(*printCallback, fb.UsageRes, 5);
+ }
+ }
+ }
+ // if (jj >= 1)
+ {
+ bool needStop = (numMilCommands >= (1 <<
+ #ifdef _DEBUG
+ 7
+ #else
+ 11
+ #endif
+ ));
+ if (needStop)
+ break;
+ numMilCommands <<= 1;
+ }
+ }
+ if (freqCallback)
+ {
+ RINOK(freqCallback->FreqsFinished(numThreads));
+ }
}
+
if (printCallback)
{
printCallback->NewLine();
@@ -3597,8 +3872,10 @@ HRESULT Bench(
if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
return E_INVALIDARG;
- UInt32 dict;
- bool dictIsDefined = method.Get_DicSize(dict);
+ UInt64 dict = (UInt64)1 << startDicLog;
+ const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
+
+ const int level = method.GetLevel();
if (method.MethodName.IsEmpty())
method.MethodName = "LZMA";
@@ -3607,8 +3884,20 @@ HRESULT Bench(
{
CBenchProps benchProps;
benchProps.SetLzmaCompexity();
- UInt32 dictSize = method.Get_Lzma_DicSize();
- UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
+ const UInt64 dictSize = method.Get_Lzma_DicSize();
+
+ size_t uncompressedDataSize;
+ if (use_fileData)
+ {
+ uncompressedDataSize = fileDataBuffer.Size();
+ }
+ else
+ {
+ uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
+ if (uncompressedDataSize < dictSize)
+ return E_INVALIDARG;
+ }
+
return MethodBench(
EXTERNAL_CODECS_LOC_VARS
complexInCommands,
@@ -3636,7 +3925,7 @@ HRESULT Bench(
UInt64 dict64 = dict;
if (!dictIsDefined)
dict64 = (1 << 27);
- if (fileDataBuffer.IsAllocated())
+ if (use_fileData)
{
if (!dictIsDefined)
dict64 = fileDataBuffer.Size();
@@ -3684,13 +3973,15 @@ HRESULT Bench(
{
UInt64 usage = 1 << 20;
UInt64 bufSize = dict64;
- if (fileDataBuffer.IsAllocated())
+ if (use_fileData)
{
usage += fileDataBuffer.Size();
if (bufSize > fileDataBuffer.Size())
bufSize = fileDataBuffer.Size();
+ #ifndef _7ZIP_ST
if (numThreadsSpecified != 1)
usage += bufSize * numThreadsSpecified * (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
+ #endif
}
else
usage += numThreadsSpecified * bufSize;
@@ -3776,7 +4067,7 @@ HRESULT Bench(
PrintRight(f, s, 4);
size_t dataSize = fileDataBuffer.Size();
- if (dataSize > bufSize || !fileDataBuffer.IsAllocated())
+ if (dataSize > bufSize || !use_fileData)
dataSize = (size_t)bufSize;
FOR_VECTOR (ti, numThreadsVector)
@@ -3792,7 +4083,7 @@ HRESULT Bench(
speed, usage,
complexity,
1, // benchWeight,
- (pow == kNumHashDictBits && !fileDataBuffer.IsAllocated()) ? checkSum : NULL,
+ (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
method,
&f,
#ifndef _7ZIP_ST
@@ -3880,7 +4171,7 @@ HRESULT Bench(
f.NewLine();
}
- if (!dictIsDefined)
+ if (!dictIsDefined && !onlyHashBench)
{
const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
unsigned dicSizeLog = dicSizeLog_Main;
@@ -3891,10 +4182,10 @@ HRESULT Bench(
if (ramSize_Defined)
for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
- if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
+ if (GetBenchMemoryUsage(numThreads, level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
break;
- dict = (UInt32)1 << dicSizeLog;
+ dict = (UInt64)1 << dicSizeLog;
if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
{
@@ -3904,7 +4195,12 @@ HRESULT Bench(
}
}
- Print_Usage_and_Threads(f, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), numThreads);
+ Print_Usage_and_Threads(f,
+ onlyHashBench ?
+ GetBenchMemoryUsage_Hash(numThreads, dict) :
+ GetBenchMemoryUsage(numThreads, level, dict, totalBenchMode),
+ numThreads);
+
f.NewLine();
f.NewLine();
@@ -3988,6 +4284,7 @@ HRESULT Bench(
if (specifiedFreq != 0)
cpuFreq = specifiedFreq;
+ // bool showTotalSpeed = false;
if (totalBenchMode)
{
@@ -4017,7 +4314,7 @@ HRESULT Bench(
);
RINOK(res);
- cpuFreq = fb.cpuFreq;
+ cpuFreq = fb.CpuFreqRes;
callback.NewLine();
if (specifiedFreq != 0)
@@ -4037,12 +4334,12 @@ HRESULT Bench(
if (!onlyHashBench)
{
- size_t dataSize = dict;
- if (fileDataBuffer.IsAllocated())
+ size_t dataSize = (size_t)dict;
+ if (use_fileData)
{
dataSize = fileDataBuffer.Size();
if (dictIsDefined && dataSize > dict)
- dataSize = dict;
+ dataSize = (size_t)dict;
}
HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
@@ -4051,7 +4348,7 @@ HRESULT Bench(
numThreads,
&affinityMode,
#endif
- dictIsDefined || fileDataBuffer.IsAllocated(), // forceUnpackSize
+ dictIsDefined || use_fileData, // forceUnpackSize
dataSize,
(const Byte *)fileDataBuffer,
printCallback, &callback);
@@ -4061,12 +4358,16 @@ HRESULT Bench(
{
size_t dataSize = (size_t)1 << kNumHashDictBits;
if (dictIsDefined)
- dataSize = dict;
- if (fileDataBuffer.IsAllocated())
+ {
+ dataSize = (size_t)dict;
+ if (dataSize != dict)
+ return E_OUTOFMEMORY;
+ }
+ if (use_fileData)
{
dataSize = fileDataBuffer.Size();
if (dictIsDefined && dataSize > dict)
- dataSize = dict;
+ dataSize = (size_t)dict;
}
HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
@@ -4143,12 +4444,12 @@ HRESULT Bench(
for (unsigned i = 0; i < numIterations; i++)
{
- unsigned pow = (dict < ((UInt32)1 << startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
+ unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
if (!multiDict)
- pow = 31;
- while (((UInt32)1 << pow) > dict && pow > 0)
+ pow = 32;
+ while (GetDictSizeFromLog(pow) > dict && pow > 0)
pow--;
- for (; ((UInt32)1 << pow) <= dict; pow++)
+ for (; GetDictSizeFromLog(pow) <= dict; pow++)
{
char s[16];
ConvertUInt32ToString(pow, s);
@@ -4156,7 +4457,7 @@ HRESULT Bench(
s[pos++] = ':';
s[pos] = 0;
PrintLeft(f, s, kFieldSize_SmallName);
- callback.DictSize = (UInt32)1 << pow;
+ callback.DictSize = (UInt64)1 << pow;
COneMethodInfo method2 = method;
@@ -4170,13 +4471,15 @@ HRESULT Bench(
}
size_t uncompressedDataSize;
- if (fileDataBuffer.IsAllocated())
+ if (use_fileData)
{
uncompressedDataSize = fileDataBuffer.Size();
}
else
{
- uncompressedDataSize = callback.DictSize;
+ uncompressedDataSize = (size_t)callback.DictSize;
+ if (uncompressedDataSize != callback.DictSize)
+ return E_OUTOFMEMORY;
if (uncompressedDataSize >= (1 << 18))
uncompressedDataSize += kAdditionalSize;
}
@@ -4212,16 +4515,19 @@ HRESULT Bench(
if (use2Columns)
{
PrintLeft(f, "Avr:", callback.NameFieldSize);
- PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
+ PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
f.Print(kSep);
- PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
+ PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
f.NewLine();
}
PrintLeft(f, "Tot:", callback.NameFieldSize);
CTotalBenchRes midRes;
- midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
- PrintTotals(f, showFreq, cpuFreq, midRes);
+ midRes = callback.EncodeRes;
+ midRes.Update_With_Res(callback.DecodeRes);
+
+ // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
+ PrintTotals(f, showFreq, cpuFreq, false, midRes);
f.NewLine();
}
diff --git a/CPP/7zip/UI/Common/Bench.h b/CPP/7zip/UI/Common/Bench.h
index 02f443e3..ab0c3048 100644
--- a/CPP/7zip/UI/Common/Bench.h
+++ b/CPP/7zip/UI/Common/Bench.h
@@ -8,6 +8,8 @@
#include "../../Common/CreateCoder.h"
#include "../../UI/Common/Property.h"
+UInt64 Benchmark_GetUsage_Percents(UInt64 usage);
+
struct CBenchInfo
{
UInt64 GlobalTime;
@@ -17,26 +19,71 @@ struct CBenchInfo
UInt64 UnpackSize;
UInt64 PackSize;
UInt64 NumIterations;
+
+ /*
+ during Code(): we track benchInfo only from one thread (theads with index[0])
+ NumIterations means number of threads
+ UnpackSize and PackSize are total sizes of all iterations of current thread
+ after Code():
+ NumIterations means the number of Iterations
+ UnpackSize and PackSize are total sizes of all threads
+ */
CBenchInfo(): NumIterations(0) {}
+
UInt64 GetUsage() const;
UInt64 GetRatingPerUsage(UInt64 rating) const;
- UInt64 GetSpeed(UInt64 numCommands) const;
+ UInt64 GetSpeed(UInt64 numUnits) const;
+ UInt64 GetUnpackSizeSpeed() const { return GetSpeed(UnpackSize * NumIterations); }
+
+ UInt64 Get_UnpackSize_Full() const { return UnpackSize * NumIterations; }
+
+ UInt64 GetRating_LzmaEnc(UInt64 dictSize) const;
+ UInt64 GetRating_LzmaDec() const;
};
+
+struct CTotalBenchRes
+{
+ // UInt64 NumIterations1; // for Usage
+ UInt64 NumIterations2; // for Rating / RPU
+
+ UInt64 Rating;
+ UInt64 Usage;
+ UInt64 RPU;
+ UInt64 Speed;
+
+ void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; Speed = 0; }
+
+ void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
+ {
+ Rating = (r1.Rating + r2.Rating);
+ Usage = (r1.Usage + r2.Usage);
+ RPU = (r1.RPU + r2.RPU);
+ Speed = (r1.Speed + r2.Speed);
+ // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
+ NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
+ }
+
+ void Generate_From_BenchInfo(const CBenchInfo &info);
+ void Mult_For_Weight(unsigned weight);
+ void Update_With_Res(const CTotalBenchRes &r);
+};
+
+
+
struct IBenchCallback
{
- virtual HRESULT SetFreq(bool showFreq, UInt64 cpuFreq) = 0;
+ // virtual HRESULT SetFreq(bool showFreq, UInt64 cpuFreq) = 0;
virtual HRESULT SetEncodeResult(const CBenchInfo &info, bool final) = 0;
virtual HRESULT SetDecodeResult(const CBenchInfo &info, bool final) = 0;
};
-UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
-UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
+
const unsigned kBenchMinDicLogSize = 18;
-UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench = false);
+UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench);
struct IBenchPrintCallback
{
@@ -45,22 +92,20 @@ struct IBenchPrintCallback
virtual HRESULT CheckBreak() = 0;
};
-/*
struct IBenchFreqCallback
{
- virtual void AddCpuFreq(UInt64 freq) = 0;
+ virtual HRESULT AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage) = 0;
+ virtual HRESULT FreqsFinished(unsigned numThreads) = 0;
};
-*/
HRESULT Bench(
DECL_EXTERNAL_CODECS_LOC_VARS
IBenchPrintCallback *printCallback,
IBenchCallback *benchCallback,
- // IBenchFreqCallback *freqCallback,
const CObjectVector<CProperty> &props,
UInt32 numIterations,
- bool multiDict
- );
+ bool multiDict,
+ IBenchFreqCallback *freqCallback = NULL);
AString GetProcessThreadsInfo(const NWindows::NSystem::CProcessAffinity &ti);
diff --git a/CPP/7zip/UI/Common/CompressCall2.cpp b/CPP/7zip/UI/Common/CompressCall2.cpp
index cbb5dcc9..8943e7d9 100644
--- a/CPP/7zip/UI/Common/CompressCall2.cpp
+++ b/CPP/7zip/UI/Common/CompressCall2.cpp
@@ -272,7 +272,11 @@ void Benchmark(bool totalMode)
prop.Value = "*";
props.Add(prop);
}
- result = Benchmark(EXTERNAL_CODECS_VARS_L props, g_HWND);
+ result = Benchmark(
+ EXTERNAL_CODECS_VARS_L
+ props,
+ k_NumBenchIterations_Default,
+ g_HWND);
MY_TRY_FINISH
}
diff --git a/CPP/7zip/UI/Console/Main.cpp b/CPP/7zip/UI/Console/Main.cpp
index 8c24aaff..e7d9fd1b 100644
--- a/CPP/7zip/UI/Console/Main.cpp
+++ b/CPP/7zip/UI/Console/Main.cpp
@@ -128,7 +128,7 @@ static const char * const kHelpString =
#ifndef _NO_CRYPTO
" -p{Password} : set Password\n"
#endif
- " -r[-|0] : Recurse subdirectories\n"
+ " -r[-|0] : Recurse subdirectories for name search\n"
" -sa{a|e|s} : set Archive name mode\n"
" -scc{UTF-8|WIN|DOS} : set charset for for console input/output\n"
" -scs{UTF-8|UTF-16LE|UTF-16BE|WIN|DOS|{id}} : set charset for list files\n"
@@ -200,63 +200,55 @@ static void ShowProgInfo(CStdOutStream *so)
#endif
*/
- #ifdef __VERSION__
- << " compiler: " << __VERSION__
- #endif
-
- #ifdef __GNUC__
- << " GCC " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__
- #endif
-
- #ifdef __clang__
- << " CLANG " << __clang_major__ << "." << __clang_minor__
- #endif
-
- #ifdef __xlC__
- << " XLC " << (__xlC__ >> 8) << "." << (__xlC__ & 0xFF)
- #ifdef __xlC_ver__
- << "." << (__xlC_ver__ >> 8) << "." << (__xlC_ver__ & 0xFF)
- #endif
- #endif
-
- #ifdef _MSC_VER
- << " MSC " << _MSC_VER
- #endif
-
- #ifdef __ARM_FEATURE_CRC32
- << " CRC32"
- #endif
-
<< " " << (unsigned)(sizeof(void *)) * 8 << "-bit"
#ifdef __ILP32__
<< " ILP32"
#endif
-
+
#ifdef __ARM_ARCH
<< " arm_v:" << __ARM_ARCH
#ifdef __ARM_ARCH_ISA_THUMB
<< " thumb:" << __ARM_ARCH_ISA_THUMB
#endif
#endif
+ ;
+
#ifdef ENV_HAVE_LOCALE
- << " locale=" << GetLocale()
+ *so << " locale=" << GetLocale();
#endif
#ifndef _WIN32
- << " UTF8=" << (IsNativeUTF8() ? "+" : "-")
- << " use-UTF8=" << (g_ForceToUTF8 ? "+" : "-")
- << " wchar_t=" << (unsigned)(sizeof(wchar_t)) * 8 << "-bit"
- << " Files=" << (unsigned)(sizeof(off_t)) * 8 << "-bit"
+ {
+ const bool is_IsNativeUTF8 = IsNativeUTF8();
+ if (!is_IsNativeUTF8)
+ *so << " UTF8=" << (is_IsNativeUTF8 ? "+" : "-");
+ }
+ if (!g_ForceToUTF8)
+ *so << " use-UTF8=" << (g_ForceToUTF8 ? "+" : "-");
+ {
+ const unsigned wchar_t_size = (unsigned)sizeof(wchar_t);
+ if (wchar_t_size != 4)
+ *so << " wchar_t=" << wchar_t_size * 8 << "-bit";
+ }
+ {
+ const unsigned off_t_size = (unsigned)sizeof(off_t);
+ if (off_t_size != 8)
+ *so << " Files=" << off_t_size * 8 << "-bit";
+ }
#endif
- ;
{
const UInt32 numCpus = NWindows::NSystem::GetNumberOfProcessors();
*so << " Threads:" << numCpus;
}
+ #ifdef _7ZIP_ASM
+ *so << ", ASM";
+ #endif
+
+ /*
{
AString s;
GetCpuName(s);
@@ -264,9 +256,10 @@ static void ShowProgInfo(CStdOutStream *so)
*so << ", " << s;
}
- #ifdef _7ZIP_ASM
- *so << ",ASM";
+ #ifdef __ARM_FEATURE_CRC32
+ << " CRC32"
#endif
+
#if (defined MY_CPU_X86_OR_AMD64 || defined(MY_CPU_ARM_OR_ARM64))
if (CPU_IsSupported_AES()) *so << ",AES";
@@ -281,6 +274,7 @@ static void ShowProgInfo(CStdOutStream *so)
if (CPU_IsSupported_SHA2()) *so << ",SHA2";
#endif
#endif
+ */
*so << endl;
}
diff --git a/CPP/7zip/UI/FileManager/AboutDialog.cpp b/CPP/7zip/UI/FileManager/AboutDialog.cpp
index b3455cf5..082902e2 100644
--- a/CPP/7zip/UI/FileManager/AboutDialog.cpp
+++ b/CPP/7zip/UI/FileManager/AboutDialog.cpp
@@ -19,7 +19,7 @@ static const UInt32 kLangIDs[] =
IDT_ABOUT_INFO
};
-#define kHomePageURL TEXT("http://www.7-zip.org/")
+#define kHomePageURL TEXT("https://www.7-zip.org/")
#define kHelpTopic "start.htm"
#define LLL_(quote) L##quote
diff --git a/CPP/7zip/UI/FileManager/ProgressDialog.cpp b/CPP/7zip/UI/FileManager/ProgressDialog.cpp
index 1bf115ad..b688a901 100644
--- a/CPP/7zip/UI/FileManager/ProgressDialog.cpp
+++ b/CPP/7zip/UI/FileManager/ProgressDialog.cpp
@@ -136,8 +136,11 @@ bool CProgressDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
{
case kCloseMessage:
{
- KillTimer(_timer);
- _timer = 0;
+ if (_timer)
+ {
+ KillTimer(kTimerID);
+ _timer = 0;
+ }
if (_inCancelMessageBox)
{
_externalCloseMessageWasReceived = true;
diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2.cpp b/CPP/7zip/UI/FileManager/ProgressDialog2.cpp
index 8e2d7c75..7b132468 100644
--- a/CPP/7zip/UI/FileManager/ProgressDialog2.cpp
+++ b/CPP/7zip/UI/FileManager/ProgressDialog2.cpp
@@ -348,7 +348,9 @@ bool CProgressDialog::OnInit()
INIT_AS_UNDEFINED(_processed_Prev);
INIT_AS_UNDEFINED(_packed_Prev);
INIT_AS_UNDEFINED(_ratio_Prev);
+
_filesStr_Prev.Empty();
+ _filesTotStr_Prev.Empty();
_foreground = true;
@@ -423,13 +425,14 @@ static const UINT kIDs[] =
IDT_PROGRESS_ELAPSED, IDT_PROGRESS_ELAPSED_VAL,
IDT_PROGRESS_REMAINING, IDT_PROGRESS_REMAINING_VAL,
IDT_PROGRESS_FILES, IDT_PROGRESS_FILES_VAL,
- IDT_PROGRESS_RATIO, IDT_PROGRESS_RATIO_VAL,
+ 0, IDT_PROGRESS_FILES_TOTAL,
IDT_PROGRESS_ERRORS, IDT_PROGRESS_ERRORS_VAL,
IDT_PROGRESS_TOTAL, IDT_PROGRESS_TOTAL_VAL,
IDT_PROGRESS_SPEED, IDT_PROGRESS_SPEED_VAL,
IDT_PROGRESS_PROCESSED, IDT_PROGRESS_PROCESSED_VAL,
- IDT_PROGRESS_PACKED, IDT_PROGRESS_PACKED_VAL
+ IDT_PROGRESS_PACKED, IDT_PROGRESS_PACKED_VAL,
+ IDT_PROGRESS_RATIO, IDT_PROGRESS_RATIO_VAL
};
bool CProgressDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize)
@@ -546,6 +549,7 @@ bool CProgressDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize)
yPos = my;
x = mx + gSize + padSize;
}
+ if (kIDs[i] != 0)
MoveItem(kIDs[i], x, yPos, labelSize, sY);
MoveItem(kIDs[i + 1], x + labelSize, yPos, valueSize, sY);
yPos += sStep;
@@ -617,6 +621,7 @@ static void ConvertSizeToString(UInt64 v, wchar_t *s)
s += MyStringLen(s);
*s++ = ' ';
*s++ = c;
+ *s++ = 'B';
*s++ = 0;
}
}
@@ -829,16 +834,24 @@ void CProgressDialog::UpdateStatInfo(bool showAll)
{
wchar_t s[64];
+
ConvertUInt64ToString(completedFiles, s);
+ if (_filesStr_Prev != s)
+ {
+ _filesStr_Prev = s;
+ SetItemText(IDT_PROGRESS_FILES_VAL, s);
+ }
+
+ s[0] = 0;
if (IS_DEFINED_VAL(totalFiles))
{
- MyStringCat(s, L" / ");
+ MyStringCopy(s, L" / ");
ConvertUInt64ToString(totalFiles, s + MyStringLen(s));
}
- if (_filesStr_Prev != s)
+ if (_filesTotStr_Prev != s)
{
- _filesStr_Prev = s;
- SetItemText(IDT_PROGRESS_FILES_VAL, s);
+ _filesTotStr_Prev = s;
+ SetItemText(IDT_PROGRESS_FILES_TOTAL, s);
}
}
@@ -1024,8 +1037,13 @@ bool CProgressDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
{
case kCloseMessage:
{
- KillTimer(_timer);
- _timer = 0;
+ if (_timer)
+ {
+ /* 21.03 : KillTimer(kTimerID) instead of KillTimer(_timer).
+ But (_timer == kTimerID) in Win10. So it worked too */
+ KillTimer(kTimerID);
+ _timer = 0;
+ }
if (_inCancelMessageBox)
{
_externalCloseMessageWasReceived = true;
diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2.h b/CPP/7zip/UI/FileManager/ProgressDialog2.h
index fc032cd9..c17dd395 100644
--- a/CPP/7zip/UI/FileManager/ProgressDialog2.h
+++ b/CPP/7zip/UI/FileManager/ProgressDialog2.h
@@ -169,7 +169,9 @@ class CProgressDialog: public NWindows::NControl::CModalDialog
UInt64 _processed_Prev;
UInt64 _packed_Prev;
UInt64 _ratio_Prev;
+
UString _filesStr_Prev;
+ UString _filesTotStr_Prev;
unsigned _prevSpeed_MoveBits;
UInt64 _prevSpeed;
diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2Res.h b/CPP/7zip/UI/FileManager/ProgressDialog2Res.h
index b45d7b49..736c7179 100644
--- a/CPP/7zip/UI/FileManager/ProgressDialog2Res.h
+++ b/CPP/7zip/UI/FileManager/ProgressDialog2Res.h
@@ -28,6 +28,7 @@
#define IDT_PROGRESS_PACKED_VAL 110
#define IDT_PROGRESS_FILES_VAL 111
+#define IDT_PROGRESS_FILES_TOTAL 112
#define IDT_PROGRESS_ELAPSED_VAL 120
#define IDT_PROGRESS_REMAINING_VAL 121
@@ -41,7 +42,7 @@
#ifdef UNDER_CE
#define MY_PROGRESS_VAL_UNITS 44
#else
-#define MY_PROGRESS_VAL_UNITS 76
+#define MY_PROGRESS_VAL_UNITS 72
#endif
#define MY_PROGRESS_LABEL_UNITS_MIN 60
#define MY_PROGRESS_LABEL_UNITS_START 90
diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2a.rc b/CPP/7zip/UI/FileManager/ProgressDialog2a.rc
index c183af82..d2fee8cf 100644
--- a/CPP/7zip/UI/FileManager/ProgressDialog2a.rc
+++ b/CPP/7zip/UI/FileManager/ProgressDialog2a.rc
@@ -47,27 +47,32 @@ CAPTION "Progress"
PUSHBUTTON "&Pause", IDB_PAUSE, bx2, by, bxs, bys
PUSHBUTTON "Cancel", IDCANCEL, bx1, by, bxs, bys
+
LTEXT "Elapsed time:", IDT_PROGRESS_ELAPSED, m, y0, x0s, 8
LTEXT "Remaining time:", IDT_PROGRESS_REMAINING, m, y1, x0s, 8
LTEXT "Files:", IDT_PROGRESS_FILES, m, y2, x0s, 8
- LTEXT "Compression ratio:", IDT_PROGRESS_RATIO, m, y3, x0s, 8
+
LTEXT "Errors:", IDT_PROGRESS_ERRORS, m, y4, x0s, 8
+
LTEXT "Total size:", IDT_PROGRESS_TOTAL, x2, y0, x2s, 8
LTEXT "Speed:", IDT_PROGRESS_SPEED, x2, y1, x2s, 8
LTEXT "Processed:", IDT_PROGRESS_PROCESSED,x2, y2, x2s, 8
LTEXT "Compressed size:" , IDT_PROGRESS_PACKED, x2, y3, x2s, 8
+ LTEXT "Compression ratio:", IDT_PROGRESS_RATIO, x2, y4, x2s, 8
+
RTEXT "", IDT_PROGRESS_ELAPSED_VAL, x1, y0, x1s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_REMAINING_VAL, x1, y1, x1s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_FILES_VAL, x1, y2, x1s, MY_TEXT_NOPREFIX
- RTEXT "", IDT_PROGRESS_RATIO_VAL, x1, y3, x1s, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_PROGRESS_FILES_TOTAL x1, y3, x1s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_ERRORS_VAL, x1, y4, x1s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_TOTAL_VAL, x3, y0, x3s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_SPEED_VAL, x3, y1, x3s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_PROCESSED_VAL, x3, y2, x3s, MY_TEXT_NOPREFIX
RTEXT "", IDT_PROGRESS_PACKED_VAL, x3, y3, x3s, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_PROGRESS_RATIO_VAL, x3, y4, x3s, MY_TEXT_NOPREFIX
LTEXT "", IDT_PROGRESS_STATUS, m, z3, xc, MY_TEXT_NOPREFIX
CONTROL "", IDT_PROGRESS_FILE_NAME, "Static", SS_NOPREFIX | SS_LEFTNOWORDWRAP, m, z2, xc, z2s
diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp
index 94dfab4c..41e0927d 100644
--- a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp
+++ b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp
@@ -10,15 +10,27 @@
#include "../../../Common/StringConvert.h"
#include "../../../Common/StringToInt.h"
+#include "../../../Windows/Synchronization.h"
#include "../../../Windows/System.h"
#include "../../../Windows/Thread.h"
+#include "../../../Windows/SystemInfo.h"
+
+#include "../../../Windows/Control/ComboBox.h"
+#include "../../../Windows/Control/Edit.h"
#include "../../Common/MethodProps.h"
+#include "../FileManager/DialogSize.h"
#include "../FileManager/HelpUtils.h"
+#ifdef LANG
+#include "../FileManager/LangUtils.h"
+#endif
#include "../../MyVersion.h"
+#include "../Common/Bench.h"
+
+#include "BenchmarkDialogRes.h"
#include "BenchmarkDialog.h"
using namespace NWindows;
@@ -26,13 +38,350 @@ using namespace NWindows;
#define kHelpTopic "fm/benchmark.htm"
static const UINT_PTR kTimerID = 4;
-static const UINT kTimerElapse = 1000;
+static const UINT kTimerElapse = 1000; // 1000
+
+// use PRINT_ITER_TIME to show time of each iteration in log box
+// #define PRINT_ITER_TIME
+
+static const unsigned kRatingVector_NumBundlesMax = 20;
+
+enum MyBenchMessages
+{
+ k_Message_Finished = WM_APP + 1
+};
+
+enum My_Message_WPARAM
+{
+ k_Msg_WPARM_Thread_Finished = 0,
+ k_Msg_WPARM_Iter_Finished,
+ k_Msg_WPARM_Enc1_Finished
+};
+
+
+struct CBenchPassResult
+{
+ CTotalBenchRes Enc;
+ CTotalBenchRes Dec;
+ #ifdef PRINT_ITER_TIME
+ DWORD Ticks;
+ #endif
+ // CBenchInfo EncInfo; // for debug
+ // CBenchPassResult() {};
+};
+
+
+struct CTotalBenchRes2: public CTotalBenchRes
+{
+ UInt64 UnpackSize;
+
+ void Init()
+ {
+ CTotalBenchRes::Init();
+ UnpackSize = 0;
+ }
+
+ void SetFrom_BenchInfo(const CBenchInfo &info)
+ {
+ NumIterations2 = 1;
+ Generate_From_BenchInfo(info);
+ UnpackSize = info.Get_UnpackSize_Full();
+ }
+
+ void Update_With_Res2(const CTotalBenchRes2 &r)
+ {
+ Update_With_Res(r);
+ UnpackSize += r.UnpackSize;
+ }
+};
+
+
+struct CSyncData
+{
+ UInt32 NumPasses_Finished;
+
+ // UInt64 NumEncProgress; // for debug
+ // UInt64 NumDecProgress; // for debug
+ // CBenchInfo EncInfo; // for debug
+
+ CTotalBenchRes2 Enc_BenchRes_1;
+ CTotalBenchRes2 Enc_BenchRes;
+
+ CTotalBenchRes2 Dec_BenchRes_1;
+ CTotalBenchRes2 Dec_BenchRes;
+
+ #ifdef PRINT_ITER_TIME
+ DWORD TotalTicks;
+ #endif
+
+ int RatingVector_DeletedIndex;
+ // UInt64 RatingVector_NumDeleted;
+
+ bool BenchWasFinished; // all passes were finished
+ bool NeedPrint_Freq;
+ bool NeedPrint_RatingVector;
+ bool NeedPrint_Enc_1;
+ bool NeedPrint_Enc;
+ bool NeedPrint_Dec_1;
+ bool NeedPrint_Dec;
+ bool NeedPrint_Tot; // intermediate Total was updated after current pass
+
+ void Init();
+};
+
+
+void CSyncData::Init()
+{
+ NumPasses_Finished = 0;
+
+ // NumEncProgress = 0;
+ // NumDecProgress = 0;
+
+ Enc_BenchRes.Init();
+ Enc_BenchRes_1.Init();
+ Dec_BenchRes.Init();
+ Dec_BenchRes_1.Init();
+
+ #ifdef PRINT_ITER_TIME
+ TotalTicks = 0;
+ #endif
+
+ RatingVector_DeletedIndex = -1;
+ // RatingVector_NumDeleted = 0;
+
+ BenchWasFinished =
+ NeedPrint_Freq =
+ NeedPrint_RatingVector =
+ NeedPrint_Enc_1 =
+ NeedPrint_Enc =
+ NeedPrint_Dec_1 =
+ NeedPrint_Dec =
+ NeedPrint_Tot = false;
+};
+
+
+struct CBenchProgressSync
+{
+ bool Exit; // GUI asks BenchThread to Exit, and BenchThread reads that variable
+ UInt32 NumThreads;
+ UInt64 DictSize;
+ UInt32 NumPasses_Limit;
+ int Level;
+
+ // must be written by benchmark thread, read by GUI thread */
+ CSyncData sd;
+ CRecordVector<CBenchPassResult> RatingVector;
+
+ NWindows::NSynchronization::CCriticalSection CS;
+
+ AString Text;
+ bool TextWasChanged;
+
+ /* BenchFinish_Task_HRESULT - for result from benchmark code
+ BenchFinish_Thread_HRESULT - for Exceptions and service errors
+ these arreos must be shown even if user escapes benchmark */
+
+ HRESULT BenchFinish_Task_HRESULT;
+ HRESULT BenchFinish_Thread_HRESULT;
+
+ UInt32 NumFreqThreadsPrev;
+ UString FreqString_Sync;
+ UString FreqString_GUI;
+
+ CBenchProgressSync()
+ {
+ NumPasses_Limit = 1;
+ }
+
+ void Init();
+
+ void SendExit()
+ {
+ NWindows::NSynchronization::CCriticalSectionLock lock(CS);
+ Exit = true;
+ }
+};
+
+
+void CBenchProgressSync::Init()
+{
+ Exit = false;
+
+ BenchFinish_Task_HRESULT = S_OK;
+ BenchFinish_Thread_HRESULT = S_OK;
+
+ sd.Init();
+ RatingVector.Clear();
+
+ NumFreqThreadsPrev = 0;
+ FreqString_Sync.Empty();
+ FreqString_GUI.Empty();
+
+ Text.Empty();
+ TextWasChanged = true;
+}
+
+
+
+struct CMyFont
+{
+ HFONT _font;
+ CMyFont(): _font(NULL) {}
+ ~CMyFont()
+ {
+ if (_font)
+ DeleteObject(_font);
+ }
+ void Create(const LOGFONT *lplf)
+ {
+ _font = CreateFontIndirect(lplf);
+ }
+};
+
+
+class CBenchmarkDialog;
+
+struct CThreadBenchmark
+{
+ CBenchmarkDialog *BenchmarkDialog;
+ DECL_EXTERNAL_CODECS_LOC_VARS2;
+ // HRESULT Result;
+
+ HRESULT Process();
+ static THREAD_FUNC_DECL MyThreadFunction(void *param)
+ {
+ /* ((CThreadBenchmark *)param)->Result = */
+ ((CThreadBenchmark *)param)->Process();
+ return 0;
+ }
+};
+
+
+class CBenchmarkDialog:
+ public NWindows::NControl::CModalDialog
+{
+ NWindows::NControl::CComboBox m_Dictionary;
+ NWindows::NControl::CComboBox m_NumThreads;
+ NWindows::NControl::CComboBox m_NumPasses;
+ NWindows::NControl::CEdit _consoleEdit;
+ UINT_PTR _timer;
+
+ UInt32 _startTime;
+ UInt32 _finishTime;
+ bool _finishTime_WasSet;
+
+ bool WasStopped_in_GUI;
+ bool ExitWasAsked_in_GUI;
+ bool NeedRestart;
+
+ CMyFont _font;
+
+ UInt64 RamSize;
+ UInt64 RamSize_Limit;
+ bool RamSize_Defined;
+
+ UInt32 NumPasses_Finished_Prev;
+
+ UString ElapsedSec_Prev;
+
+ void InitSyncNew()
+ {
+ NumPasses_Finished_Prev = (UInt32)(Int32)-1;
+ ElapsedSec_Prev.Empty();
+ Sync.Init();
+ }
+
+ virtual bool OnInit();
+ virtual bool OnDestroy();
+ virtual bool OnSize(WPARAM /* wParam */, int xSize, int ySize);
+ virtual bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam);
+ virtual bool OnCommand(int code, int itemID, LPARAM lParam);
+ virtual void OnHelp();
+ virtual void OnCancel();
+ virtual bool OnTimer(WPARAM timerID, LPARAM callback);
+ virtual bool OnButtonClicked(int buttonID, HWND buttonHWND);
+
+ void Disable_Stop_Button();
+ void OnStopButton();
+ void RestartBenchmark();
+ void StartBenchmark();
+
+ void UpdateGui();
+
+ void PrintTime();
+ void PrintRating(UInt64 rating, UINT controlID);
+ void PrintUsage(UInt64 usage, UINT controlID);
+ void PrintBenchRes(const CTotalBenchRes2 &info, const UINT ids[]);
+
+ UInt32 GetNumberOfThreads();
+ size_t OnChangeDictionary();
+
+ void SetItemText_Number(int itemID, UInt64 val, LPCTSTR post = NULL);
+ void Print_MemUsage(UString &s, UInt64 memUsage) const;
+ bool IsMemoryUsageOK(UInt64 memUsage) const
+ { return memUsage + (1 << 20) <= RamSize_Limit; }
+
+ void MyKillTimer();
+
+ void SendExit_Status(const wchar_t *message)
+ {
+ SetItemText(IDT_BENCH_ERROR_MESSAGE, message);
+ Sync.SendExit();
+ }
+
+public:
+ CBenchProgressSync Sync;
+
+ bool TotalMode;
+ CObjectVector<CProperty> Props;
+
+ CSysString Bench2Text;
+
+ NWindows::CThread _thread;
+ CThreadBenchmark _threadBenchmark;
+
+ CBenchmarkDialog():
+ _timer(0),
+ TotalMode(false),
+ WasStopped_in_GUI(false),
+ ExitWasAsked_in_GUI(false),
+ NeedRestart(false)
+ {}
+
+ ~CBenchmarkDialog();
+
+ bool PostMsg_Finish(LPARAM param)
+ {
+ if ((HWND)*this)
+ return PostMsg(k_Message_Finished, param);
+ // the (HWND)*this is NULL only for some internal code failure
+ return true;
+ }
+
+ INT_PTR Create(HWND wndParent = 0)
+ {
+ BIG_DIALOG_SIZE(332, 228);
+ return CModalDialog::Create(TotalMode ? IDD_BENCH_TOTAL : SIZED_DIALOG(IDD_BENCH), wndParent);
+ }
+ void MessageBoxError(LPCWSTR message)
+ {
+ MessageBoxW(*this, message, L"7-Zip", MB_ICONERROR);
+ }
+ void MessageBoxError_Status(LPCWSTR message)
+ {
+ UString s ("ERROR: ");
+ s += message;
+ MessageBoxError(s);
+ SetItemText(IDT_BENCH_ERROR_MESSAGE, s);
+ }
+};
+
+
+
+
+
+
-#ifdef LANG
-#include "../FileManager/LangUtils.h"
-#endif
-using namespace NWindows;
UString HResultToMessage(HRESULT errorCode);
@@ -65,36 +414,48 @@ static const UInt32 kLangIDs_Colon[] =
#endif
static LPCTSTR const kProcessingString = TEXT("...");
+static LPCTSTR const kGB = TEXT(" GB");
static LPCTSTR const kMB = TEXT(" MB");
-static LPCTSTR const kMIPS = TEXT(" MIPS");
+static LPCTSTR const kKB = TEXT(" KB");
+// static LPCTSTR const kMIPS = TEXT(" MIPS");
static LPCTSTR const kKBs = TEXT(" KB/s");
-static const unsigned kMinDicLogSize =
- #ifdef UNDER_CE
- 20;
- #else
- 21;
- #endif
+static const unsigned kMinDicLogSize = 18;
-static const UInt32 kMinDicSize = (1 << kMinDicLogSize);
-static const UInt32 kMaxDicSize =
+static const UInt32 kMinDicSize = (UInt32)1 << kMinDicLogSize;
+static const size_t kMaxDicSize = (size_t)1 << (22 + sizeof(size_t) / 4 * 5);
+// static const size_t kMaxDicSize = (size_t)1 << 16;
+ /*
#ifdef MY_CPU_64BIT
- (1 << 30);
+ (UInt32)(Int32)-1; // we can use it, if we want 4 GB buffer
+ // (UInt32)15 << 28;
#else
- (1 << 27);
+ (UInt32)1 << 27;
#endif
+ */
+
+
+static int ComboBox_Add_UInt32(NWindows::NControl::CComboBox &cb, UInt32 v)
+{
+ TCHAR s[16];
+ ConvertUInt32ToString(v, s);
+ int index = (int)cb.AddString(s);
+ cb.SetItemData(index, v);
+ return index;
+}
+
bool CBenchmarkDialog::OnInit()
{
#ifdef LANG
LangSetWindowText(*this, IDD_BENCH);
LangSetDlgItems(*this, kLangIDs, ARRAY_SIZE(kLangIDs));
- LangSetDlgItems_Colon(*this, kLangIDs_Colon, ARRAY_SIZE(kLangIDs_Colon));
+ // LangSetDlgItems_Colon(*this, kLangIDs_Colon, ARRAY_SIZE(kLangIDs_Colon));
LangSetDlgItemText(*this, IDT_BENCH_CURRENT2, IDT_BENCH_CURRENT);
LangSetDlgItemText(*this, IDT_BENCH_RESULTING2, IDT_BENCH_RESULTING);
#endif
- Sync.Init();
+ InitSyncNew();
if (TotalMode)
{
@@ -120,154 +481,194 @@ bool CBenchmarkDialog::OnInit()
UInt32 numCPUs = 1;
{
- UString s ("/ ");
+ AString s ("/ ");
NSystem::CProcessAffinity threadsInfo;
threadsInfo.InitST();
#ifndef _7ZIP_ST
-
if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0)
numCPUs = threadsInfo.GetNumProcessThreads();
else
numCPUs = NSystem::GetNumberOfProcessors();
-
#endif
s.Add_UInt32(numCPUs);
s += GetProcessThreadsInfo(threadsInfo);
- SetItemText(IDT_BENCH_HARDWARE_THREADS, s);
- }
-
- {
- UString s;
- {
- AString s1, s2;
- GetSysInfo(s1, s2);
- s = s1;
- SetItemText(IDT_BENCH_SYS1, s);
- if (s1 != s2 && !s2.IsEmpty())
- {
- s = s2;
- SetItemText(IDT_BENCH_SYS2, s);
- }
- }
- /*
+ SetItemTextA(IDT_BENCH_HARDWARE_THREADS, s);
+
{
- GetVersionString(s);
- SetItemText(IDT_BENCH_SYSTEM, s);
+ AString s2;
+ GetSysInfo(s, s2);
+ SetItemTextA(IDT_BENCH_SYS1, s);
+ if (s != s2 && !s2.IsEmpty())
+ SetItemTextA(IDT_BENCH_SYS2, s2);
}
- */
{
- AString s2;
- GetCpuName(s2);
- s = s2;
- SetItemText(IDT_BENCH_CPU, s);
+ GetCpuName_MultiLine(s);
+ SetItemTextA(IDT_BENCH_CPU, s);
}
{
- AString s2;
- AddCpuFeatures(s2);
- s = s2;
- SetItemText(IDT_BENCH_CPU_FEATURE, s);
+ GetOsInfoText(s);
+ s += " : ";
+ AddCpuFeatures(s);
+ SetItemTextA(IDT_BENCH_CPU_FEATURE, s);
}
s = "7-Zip " MY_VERSION_CPU;
- SetItemText(IDT_BENCH_VER, s);
+ SetItemTextA(IDT_BENCH_VER, s);
}
+ // ----- Num Threads ----------
+
if (numCPUs < 1)
numCPUs = 1;
- numCPUs = MyMin(numCPUs, (UInt32)(1 << 8));
+ numCPUs = MyMin(numCPUs, (UInt32)(1 << 6)); // it's WIN32 limit
+
+ UInt32 numThreads = Sync.NumThreads;
+
+ if (numThreads == (UInt32)(Int32)-1)
+ numThreads = numCPUs;
+ if (numThreads > 1)
+ numThreads &= ~1;
+ const UInt32 kNumThreadsMax = (1 << 12);
+ if (numThreads > kNumThreadsMax)
+ numThreads = kNumThreadsMax;
- if (Sync.NumThreads == (UInt32)(Int32)-1)
- {
- Sync.NumThreads = numCPUs;
- if (Sync.NumThreads > 1)
- Sync.NumThreads &= ~1;
- }
m_NumThreads.Attach(GetItem(IDC_BENCH_NUM_THREADS));
+ const UInt32 numTheads_Combo = numCPUs * 2;
+ UInt32 v = 1;
int cur = 0;
- for (UInt32 num = 1; num <= numCPUs * 2;)
+ for (; v <= numTheads_Combo;)
{
- TCHAR s[16];
- ConvertUInt32ToString(num, s);
- int index = (int)m_NumThreads.AddString(s);
- m_NumThreads.SetItemData(index, num);
- if (num <= Sync.NumThreads)
+ int index = ComboBox_Add_UInt32(m_NumThreads, v);
+ const UInt32 vNext = v + (v < 2 ? 1 : 2);
+ if (v <= numThreads)
+ if (numThreads < vNext || vNext > numTheads_Combo)
+ {
+ if (v != numThreads)
+ index = ComboBox_Add_UInt32(m_NumThreads, numThreads);
cur = index;
- if (num > 1)
- num++;
- num++;
+ }
+ v = vNext;
}
m_NumThreads.SetCurSel(cur);
Sync.NumThreads = GetNumberOfThreads();
+
+ // ----- Dictionary ----------
+
m_Dictionary.Attach(GetItem(IDC_BENCH_DICTIONARY));
- cur = 0;
- ramSize = (UInt64)(sizeof(size_t)) << 29;
- ramSize_Defined = NSystem::GetRamSize(ramSize);
+ RamSize = (UInt64)(sizeof(size_t)) << 29;
+ RamSize_Defined = NSystem::GetRamSize(RamSize);
+
#ifdef UNDER_CE
const UInt32 kNormalizedCeSize = (16 << 20);
- if (ramSize > kNormalizedCeSize && ramSize < (33 << 20))
- ramSize = kNormalizedCeSize;
+ if (RamSize > kNormalizedCeSize && RamSize < (33 << 20))
+ RamSize = kNormalizedCeSize;
#endif
+ RamSize_Limit = RamSize / 16 * 15;
- if (Sync.DictionarySize == (UInt32)(Int32)-1)
+ if (Sync.DictSize == (UInt64)(Int64)-1)
{
unsigned dicSizeLog = 25;
-
#ifdef UNDER_CE
dicSizeLog = 20;
#endif
-
- if (ramSize_Defined)
+ if (RamSize_Defined)
for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
- if (GetBenchMemoryUsage(Sync.NumThreads, ((UInt32)1 << dicSizeLog)) + (8 << 20) <= ramSize)
+ if (IsMemoryUsageOK(GetBenchMemoryUsage(
+ Sync.NumThreads, Sync.Level, (UInt64)1 << dicSizeLog, TotalMode)))
break;
- Sync.DictionarySize = (1 << dicSizeLog);
+ Sync.DictSize = (UInt64)1 << dicSizeLog;
}
- if (Sync.DictionarySize < kMinDicSize) Sync.DictionarySize = kMinDicSize;
- if (Sync.DictionarySize > kMaxDicSize) Sync.DictionarySize = kMaxDicSize;
+ if (Sync.DictSize < kMinDicSize) Sync.DictSize = kMinDicSize;
+ if (Sync.DictSize > kMaxDicSize) Sync.DictSize = kMaxDicSize;
- for (unsigned i = kMinDicLogSize; i <= 30; i++)
- for (unsigned j = 0; j < 2; j++)
- {
- UInt32 dict = ((UInt32)1 << i) + ((UInt32)j << (i - 1));
- if (dict > kMaxDicSize)
- continue;
+ cur = 0;
+ for (unsigned i = (kMinDicLogSize - 1) * 2; i <= (32 - 1) * 2; i++)
+ {
+ const size_t dict = (size_t)(2 + (i & 1)) << (i / 2);
+ // if (i == (32 - 1) * 2) dict = kMaxDicSize;
TCHAR s[32];
- ConvertUInt32ToString((dict >> 20), s);
- lstrcat(s, kMB);
- int index = (int)m_Dictionary.AddString(s);
+ const TCHAR *post;
+ UInt32 d;
+ if (dict >= ((UInt32)1 << 31)) { d = (UInt32)(dict >> 30); post = kGB; }
+ else if (dict >= ((UInt32)1 << 21)) { d = (UInt32)(dict >> 20); post = kMB; }
+ else { d = (UInt32)(dict >> 10); post = kKB; }
+ ConvertUInt32ToString(d, s);
+ lstrcat(s, post);
+ const int index = (int)m_Dictionary.AddString(s);
m_Dictionary.SetItemData(index, dict);
- if (dict <= Sync.DictionarySize)
+ if (dict <= Sync.DictSize)
cur = index;
+ if (dict >= kMaxDicSize)
+ break;
}
m_Dictionary.SetCurSel(cur);
- OnChangeSettings();
- Sync._startEvent.Set();
- _timer = SetTimer(kTimerID, kTimerElapse);
+ // ----- Num Passes ----------
+
+ m_NumPasses.Attach(GetItem(IDC_BENCH_NUM_PASSES));
+ cur = 0;
+ v = 1;
+ for (;;)
+ {
+ int index = ComboBox_Add_UInt32(m_NumPasses, v);
+ const bool isLast = (v >= 10000000);
+ UInt32 vNext = v * 10;
+ if (v < 2) vNext = 2;
+ else if (v < 5) vNext = 5;
+ else if (v < 10) vNext = 10;
+
+ if (v <= Sync.NumPasses_Limit)
+ if (isLast || Sync.NumPasses_Limit < vNext)
+ {
+ if (v != Sync.NumPasses_Limit)
+ index = ComboBox_Add_UInt32(m_NumPasses, Sync.NumPasses_Limit);
+ cur = index;
+ }
+ v = vNext;
+ if (isLast)
+ break;
+ }
+ m_NumPasses.SetCurSel(cur);
if (TotalMode)
NormalizeSize(true);
else
NormalizePosition();
+
+ RestartBenchmark();
+
return CModalDialog::OnInit();
}
+
bool CBenchmarkDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize)
{
- if (!TotalMode)
- return false;
int mx, my;
GetMargins(8, mx, my);
+
+ if (!TotalMode)
+ {
+ RECT rect;
+ GetClientRectOfItem(IDT_BENCH_LOG, rect);
+ int x = xSize - rect.left - mx;
+ int y = ySize - rect.top - my;
+ if (x < 0) x = 0;
+ if (y < 0) y = 0;
+ MoveItem(IDT_BENCH_LOG, rect.left, rect.top, x, y, true);
+ return false;
+ }
+
int bx1, bx2, by;
+
GetItemSizes(IDCANCEL, bx1, by);
GetItemSizes(IDHELP, bx2, by);
@@ -299,12 +700,28 @@ bool CBenchmarkDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize)
return false;
}
+
UInt32 CBenchmarkDialog::GetNumberOfThreads()
{
return (UInt32)m_NumThreads.GetItemData_of_CurSel();
}
+#define UINT_TO_STR_3(s, val) { \
+ s[0] = (wchar_t)('0' + (val) / 100); \
+ s[1] = (wchar_t)('0' + (val) % 100 / 10); \
+ s[2] = (wchar_t)('0' + (val) % 10); \
+ s[3] = 0; }
+
+static void NumberToDot3(UInt64 val, WCHAR *s)
+{
+ ConvertUInt64ToString(val / 1000, s);
+ const UInt32 rem = (UInt32)(val % 1000);
+ s += MyStringLen(s);
+ *s++ = '.';
+ UINT_TO_STR_3(s, rem);
+}
+
void CBenchmarkDialog::SetItemText_Number(int itemID, UInt64 val, LPCTSTR post)
{
TCHAR s[64];
@@ -314,7 +731,7 @@ void CBenchmarkDialog::SetItemText_Number(int itemID, UInt64 val, LPCTSTR post)
SetItemText(itemID, s);
}
-static void PrintSize_MB(UString &s, UInt64 size)
+static void AddSize_MB(UString &s, UInt64 size)
{
char temp[32];
ConvertUInt64ToString((size + (1 << 20) - 1) >> 20, temp);
@@ -322,25 +739,36 @@ static void PrintSize_MB(UString &s, UInt64 size)
s += kMB;
}
-
-UInt32 CBenchmarkDialog::OnChangeDictionary()
+void CBenchmarkDialog::Print_MemUsage(UString &s, UInt64 memUsage) const
{
- const UInt32 dict = (UInt32)m_Dictionary.GetItemData_of_CurSel();
- const UInt64 memUsage = GetBenchMemoryUsage(GetNumberOfThreads(), dict);
-
- UString s;
- PrintSize_MB(s, memUsage);
- if (ramSize_Defined)
+ AddSize_MB(s, memUsage);
+ if (RamSize_Defined)
{
s += " / ";
- PrintSize_MB(s, ramSize);
+ AddSize_MB(s, RamSize);
}
+}
+
+size_t CBenchmarkDialog::OnChangeDictionary()
+{
+ const size_t dict = (size_t)m_Dictionary.GetItemData_of_CurSel();
+ const UInt64 memUsage = GetBenchMemoryUsage(GetNumberOfThreads(),
+ Sync.Level,
+ dict,
+ false); // totalBench mode
+
+ UString s;
+ Print_MemUsage(s, memUsage);
#ifdef _7ZIP_LARGE_PAGES
{
AString s2;
Add_LargePages_String(s2);
- s += s2;
+ if (!s2.IsEmpty())
+ {
+ s.Add_Space();
+ s += s2;
+ }
}
#endif
@@ -349,8 +777,11 @@ UInt32 CBenchmarkDialog::OnChangeDictionary()
return dict;
}
+
static const UInt32 g_IDs[] =
{
+ IDT_BENCH_COMPRESS_SIZE1,
+ IDT_BENCH_COMPRESS_SIZE2,
IDT_BENCH_COMPRESS_USAGE1,
IDT_BENCH_COMPRESS_USAGE2,
IDT_BENCH_COMPRESS_SPEED1,
@@ -360,6 +791,8 @@ static const UInt32 g_IDs[] =
IDT_BENCH_COMPRESS_RPU1,
IDT_BENCH_COMPRESS_RPU2,
+ IDT_BENCH_DECOMPR_SIZE1,
+ IDT_BENCH_DECOMPR_SIZE2,
IDT_BENCH_DECOMPR_SPEED1,
IDT_BENCH_DECOMPR_SPEED2,
IDT_BENCH_DECOMPR_RATING1,
@@ -372,108 +805,456 @@ static const UInt32 g_IDs[] =
IDT_BENCH_TOTAL_USAGE_VAL,
IDT_BENCH_TOTAL_RATING_VAL,
IDT_BENCH_TOTAL_RPU_VAL
-
- // IDT_BENCH_FREQ_CUR,
- // IDT_BENCH_FREQ_RES
};
-void CBenchmarkDialog::OnChangeSettings()
+
+static const unsigned k_Ids_Enc_1[] = {
+ IDT_BENCH_COMPRESS_USAGE1,
+ IDT_BENCH_COMPRESS_SPEED1,
+ IDT_BENCH_COMPRESS_RPU1,
+ IDT_BENCH_COMPRESS_RATING1,
+ IDT_BENCH_COMPRESS_SIZE1 };
+
+static const unsigned k_Ids_Enc[] = {
+ IDT_BENCH_COMPRESS_USAGE2,
+ IDT_BENCH_COMPRESS_SPEED2,
+ IDT_BENCH_COMPRESS_RPU2,
+ IDT_BENCH_COMPRESS_RATING2,
+ IDT_BENCH_COMPRESS_SIZE2 };
+
+static const unsigned k_Ids_Dec_1[] = {
+ IDT_BENCH_DECOMPR_USAGE1,
+ IDT_BENCH_DECOMPR_SPEED1,
+ IDT_BENCH_DECOMPR_RPU1,
+ IDT_BENCH_DECOMPR_RATING1,
+ IDT_BENCH_DECOMPR_SIZE1 };
+
+static const unsigned k_Ids_Dec[] = {
+ IDT_BENCH_DECOMPR_USAGE2,
+ IDT_BENCH_DECOMPR_SPEED2,
+ IDT_BENCH_DECOMPR_RPU2,
+ IDT_BENCH_DECOMPR_RATING2,
+ IDT_BENCH_DECOMPR_SIZE2 };
+
+static const unsigned k_Ids_Tot[] = {
+ IDT_BENCH_TOTAL_USAGE_VAL,
+ 0,
+ IDT_BENCH_TOTAL_RPU_VAL,
+ IDT_BENCH_TOTAL_RATING_VAL,
+ 0 };
+
+
+void CBenchmarkDialog::MyKillTimer()
{
- EnableItem(IDB_STOP, true);
- UInt32 dict = OnChangeDictionary();
+ if (_timer != 0)
+ {
+ KillTimer(kTimerID);
+ _timer = 0;
+ }
+}
+
+
+bool CBenchmarkDialog::OnDestroy()
+{
+ /* actually timer was removed before.
+ also the timer must be removed by Windows, when window will be removed. */
+ MyKillTimer(); // it's optional code
+ return false; // we return (false) to perform default dialog operation
+}
+
+void SetErrorMessage_MemUsage(UString &s, UInt64 reqSize, UInt64 ramSize, UInt64 ramLimit, const UString &usageString);
+
+void CBenchmarkDialog::StartBenchmark()
+{
+ NeedRestart = false;
+ WasStopped_in_GUI = false;
+
+ SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE);
+ MyKillTimer(); // optional code. timer was killed before
+
+ const size_t dict = OnChangeDictionary();
+ const UInt32 numThreads = GetNumberOfThreads();
+ const UInt32 numPasses = (UInt32)m_NumPasses.GetItemData_of_CurSel();
+
for (unsigned i = 0; i < ARRAY_SIZE(g_IDs); i++)
SetItemText(g_IDs[i], kProcessingString);
+
+ SetItemText_Empty(IDT_BENCH_LOG);
+ SetItemText_Empty(IDT_BENCH_ELAPSED_VAL);
+ SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE);
+
+ const UInt64 memUsage = GetBenchMemoryUsage(numThreads, Sync.Level, dict,
+ false); // totalBench
+ if (!IsMemoryUsageOK(memUsage))
+ {
+ UString s2 = LangString(IDT_BENCH_MEMORY);
+ if (s2.IsEmpty())
+ GetItemText(IDT_BENCH_MEMORY, s2);
+ UString s;
+ SetErrorMessage_MemUsage(s, memUsage, RamSize, RamSize_Limit, s2);
+ MessageBoxError_Status(s);
+ return;
+ }
+
+ EnableItem(IDB_STOP, true);
+
_startTime = GetTickCount();
+ _finishTime = _startTime;
+ _finishTime_WasSet = false;
+
+ {
+ NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS);
+ InitSyncNew();
+ Sync.DictSize = dict;
+ Sync.NumThreads = numThreads;
+ Sync.NumPasses_Limit = numPasses;
+ }
+
PrintTime();
- NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS);
- Sync.Init();
- Sync.DictionarySize = dict;
- Sync.Changed = true;
- Sync.NumThreads = GetNumberOfThreads();
+
+ _timer = SetTimer(kTimerID, kTimerElapse);
+ if (_thread.Create(CThreadBenchmark::MyThreadFunction, &_threadBenchmark) != 0)
+ {
+ MyKillTimer();
+ MessageBoxError_Status(L"Can't create thread");
+ };
+ return;
}
-void CBenchmarkDialog::OnRestartButton()
+
+void CBenchmarkDialog::RestartBenchmark()
{
- OnChangeSettings();
+ if (ExitWasAsked_in_GUI)
+ return;
+
+ if (_thread.IsCreated())
+ {
+ NeedRestart = true;
+ SendExit_Status(L"Stop for restart ...");
+ }
+ else
+ StartBenchmark();
}
-void CBenchmarkDialog::OnStopButton()
+
+void CBenchmarkDialog::Disable_Stop_Button()
{
+ // if we disable focused button, then focus will be lost
+ if (GetFocus() == GetItem(IDB_STOP))
+ {
+ // SendMsg_NextDlgCtl_Prev();
+ SendMsg_NextDlgCtl_CtlId(IDB_RESTART);
+ }
EnableItem(IDB_STOP, false);
- Sync.Pause();
}
-void CBenchmarkDialog::OnHelp()
+
+void CBenchmarkDialog::OnStopButton()
{
- ShowHelpWindow(kHelpTopic);
+ if (ExitWasAsked_in_GUI)
+ return;
+
+ Disable_Stop_Button();
+
+ WasStopped_in_GUI = true;
+ if (_thread.IsCreated())
+ {
+ SendExit_Status(L"Stop ...");
+ }
}
+
+
void CBenchmarkDialog::OnCancel()
{
- Sync.Stop();
- KillTimer(_timer);
- CModalDialog::OnCancel();
+ ExitWasAsked_in_GUI = true;
+
+ /*
+ SendMsg_NextDlgCtl_Prev();
+ EnableItem(IDCANCEL, false);
+ */
+
+ if (_thread.IsCreated())
+ SendExit_Status(L"Cancel ...");
+ else
+ CModalDialog::OnCancel();
}
-void GetTimeString(UInt64 timeValue, wchar_t *s);
+
+void CBenchmarkDialog::OnHelp()
+{
+ ShowHelpWindow(kHelpTopic);
+}
+
+
+
+// void GetTimeString(UInt64 timeValue, wchar_t *s);
void CBenchmarkDialog::PrintTime()
{
- UInt32 curTime = ::GetTickCount();
- UInt32 elapsedTime = (curTime - _startTime);
- UInt32 elapsedSec = elapsedTime / 1000;
- if (elapsedSec != 0 && Sync.WasPaused())
+ const UInt32 curTime =
+ _finishTime_WasSet ?
+ _finishTime :
+ ::GetTickCount();
+
+ const UInt32 elapsedTime = (curTime - _startTime);
+
+ WCHAR s[64];
+
+ // GetTimeString(elapsedTime / 1000, s);
+ ConvertUInt32ToString(elapsedTime / 1000, s);
+
+ if (_finishTime_WasSet)
+ {
+ WCHAR *p = s + MyStringLen(s);
+ *p++ = '.';
+ UINT_TO_STR_3(p, elapsedTime % 1000);
+ }
+
+ // NumberToDot3((UInt64)elapsedTime, s);
+
+ wcscat(s, L" s");
+
+ // if (WasStopped_in_GUI) wcscat(s, L" X"); // for debug
+
+ if (s == ElapsedSec_Prev)
return;
- WCHAR s[40];
- GetTimeString(elapsedSec, s);
+
+ ElapsedSec_Prev = s;
+
+ // static cnt = 0; cnt++; wcscat(s, L" ");
+ // UString s2; s2.Add_UInt32(cnt); wcscat(s, s2.Ptr());
+
SetItemText(IDT_BENCH_ELAPSED_VAL, s);
}
+
+static UInt64 GetMips(UInt64 ips)
+{
+ return (ips + 500000) / 1000000;
+}
+
+
+static UInt64 GetUsagePercents(UInt64 usage)
+{
+ return Benchmark_GetUsage_Percents(usage);
+}
+
+
+static UInt32 GetRating(const CTotalBenchRes &info)
+{
+ UInt64 numIter = info.NumIterations2;
+ if (numIter == 0)
+ numIter = 1000000;
+ const UInt64 rating64 = GetMips(info.Rating / numIter);
+ // return rating64;
+ UInt32 rating32 = (UInt32)rating64;
+ if (rating32 != rating64)
+ rating32 = (UInt32)(Int32)-1;
+ return rating32;
+};
+
+
+static void AddUsageString(UString &s, const CTotalBenchRes &info)
+{
+ UInt64 numIter = info.NumIterations2;
+ if (numIter == 0)
+ numIter = 1000000;
+ UInt64 usage = GetUsagePercents(info.Usage / numIter);
+
+ wchar_t w[64];
+ ConvertUInt64ToString(usage, w);
+ unsigned len = MyStringLen(w);
+ while (len < 5)
+ {
+ s.Add_Space();
+ len++;
+ }
+ s += w;
+ s += "%";
+}
+
+
+static void Add_Dot3String(UString &s, UInt64 val)
+{
+ WCHAR temp[32];
+ NumberToDot3(val, temp);
+ s += temp;
+}
+
+
+static void AddRatingString(UString &s, const CTotalBenchRes &info)
+{
+ // AddUsageString(s, info);
+ // s += " ";
+ // s.Add_UInt32(GetRating(info));
+ Add_Dot3String(s, GetRating(info));
+};
+
+
+static void AddRatingsLine(UString &s, const CTotalBenchRes &enc, const CTotalBenchRes &dec
+ #ifdef PRINT_ITER_TIME
+ , DWORD ticks
+ #endif
+ )
+{
+ // AddUsageString(s, enc); s += " ";
+
+ AddRatingString(s, enc);
+ s += " ";
+ AddRatingString(s, dec);
+
+ CTotalBenchRes tot_BenchRes;
+ tot_BenchRes.SetSum(enc, dec);
+
+ s += " ";
+ AddRatingString(s, tot_BenchRes);
+
+ s += " "; AddUsageString(s, tot_BenchRes);
+
+
+ #ifdef PRINT_ITER_TIME
+ s += " ";
+ {
+ Add_Dot3String(s, ticks;
+ s += " s";
+ // s.Add_UInt32(ticks); s += " ms";
+ }
+ #endif
+}
+
+
void CBenchmarkDialog::PrintRating(UInt64 rating, UINT controlID)
{
- SetItemText_Number(controlID, rating / 1000000, kMIPS);
+ // SetItemText_Number(controlID, GetMips(rating), kMIPS);
+ WCHAR s[64];
+ NumberToDot3(GetMips(rating), s);
+ MyStringCat(s, L" GIPS");
+ SetItemText(controlID, s);
}
void CBenchmarkDialog::PrintUsage(UInt64 usage, UINT controlID)
{
- SetItemText_Number(controlID, (usage + 5000) / 10000, TEXT("%"));
+ SetItemText_Number(controlID, GetUsagePercents(usage), TEXT("%"));
}
-void CBenchmarkDialog::PrintResults(
- UInt32 dictionarySize,
- const CBenchInfo2 &info,
- UINT usageID, UINT speedID, UINT rpuID, UINT ratingID,
- bool decompressMode)
+
+// void SetItemText_Number
+
+void CBenchmarkDialog::PrintBenchRes(
+ const CTotalBenchRes2 &info,
+ const UINT ids[])
{
- if (info.GlobalTime == 0)
+ if (info.NumIterations2 == 0)
return;
-
+ if (ids[1] != 0)
+ SetItemText_Number(ids[1], (info.Speed >> 10) / info.NumIterations2, kKBs);
+ PrintRating(info.Rating / info.NumIterations2, ids[3]);
+ PrintRating(info.RPU / info.NumIterations2, ids[2]);
+ PrintUsage(info.Usage / info.NumIterations2, ids[0]);
+ if (ids[4] != 0)
{
- const UInt64 speed = info.UnpackSize * info.NumIterations * info.GlobalFreq / info.GlobalTime;
- SetItemText_Number(speedID, speed >> 10, kKBs);
+ UInt64 val = info.UnpackSize;
+ LPCTSTR kPostfix;
+ if (val >= ((UInt64)1 << 40))
+ {
+ kPostfix = kGB;
+ val >>= 30;
+ }
+ else
+ {
+ kPostfix = kMB;
+ val >>= 20;
+ }
+ SetItemText_Number(ids[4], val, kPostfix);
}
- UInt64 rating;
- if (decompressMode)
- rating = info.GetDecompressRating();
- else
- rating = info.GetCompressRating(dictionarySize);
-
- PrintRating(rating, ratingID);
- PrintRating(info.GetRatingPerUsage(rating), rpuID);
- PrintUsage(info.GetUsage(), usageID);
}
-bool CBenchmarkDialog::OnTimer(WPARAM /* timerID */, LPARAM /* callback */)
+
+// static UInt32 k_Message_Finished_cnt = 0;
+// static UInt32 k_OnTimer_cnt = 0;
+
+bool CBenchmarkDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
{
- bool printTime = true;
- if (TotalMode)
+ if (message != k_Message_Finished)
+ return CModalDialog::OnMessage(message, wParam, lParam);
+
{
- if (Sync.WasStopped())
- printTime = false;
+ if (wParam == k_Msg_WPARM_Thread_Finished)
+ {
+ _finishTime = GetTickCount();
+ _finishTime_WasSet = true;
+ MyKillTimer();
+
+ if (_thread.Wait_Close() != 0)
+ {
+ MessageBoxError_Status(L"Thread Wait Error");
+ }
+
+ if (!WasStopped_in_GUI)
+ {
+ WasStopped_in_GUI = true;
+ Disable_Stop_Button();
+ }
+
+ HRESULT res = Sync.BenchFinish_Thread_HRESULT;
+ if (res != S_OK)
+ // if (!ExitWasAsked_in_GUI || res != E_ABORT)
+ MessageBoxError_Status(HResultToMessage(res));
+
+ if (ExitWasAsked_in_GUI)
+ {
+ // SetItemText(IDT_BENCH_ERROR_MESSAGE, "before CModalDialog::OnCancel()");
+ // Sleep (2000);
+ // MessageBoxError(L"test");
+ CModalDialog::OnCancel();
+ return true;
+ }
+
+ SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE);
+
+ res = Sync.BenchFinish_Task_HRESULT;
+ if (res != S_OK)
+ {
+ if (!WasStopped_in_GUI || res != E_ABORT)
+ {
+ UString m;
+ if (res == S_FALSE)
+ m = "Decoding error";
+ else if (res == CLASS_E_CLASSNOTAVAILABLE)
+ m = "Can't find 7z.dll";
+ else
+ m = HResultToMessage(res);
+ MessageBoxError_Status(m);
+ }
+ }
+
+ if (NeedRestart)
+ {
+ StartBenchmark();
+ return true;
+ }
+ }
+ // k_Message_Finished_cnt++;
+ UpdateGui();
+ return true;
}
- if (printTime)
- PrintTime();
+}
+
+
+bool CBenchmarkDialog::OnTimer(WPARAM timerID, LPARAM /* callback */)
+{
+ // k_OnTimer_cnt++;
+ if (timerID == kTimerID)
+ UpdateGui();
+ return true;
+}
+
+
+void CBenchmarkDialog::UpdateGui()
+{
+ PrintTime();
if (TotalMode)
{
@@ -491,103 +1272,147 @@ bool CBenchmarkDialog::OnTimer(WPARAM /* timerID */, LPARAM /* callback */)
}
if (wasChanged)
_consoleEdit.SetText(Bench2Text);
- return true;
+ return;
}
- SetItemText_Number(IDT_BENCH_SIZE_VAL, (Sync.ProcessedSize >> 20), kMB);
+ CSyncData sd;
+ CRecordVector<CBenchPassResult> RatingVector;
- SetItemText_Number(IDT_BENCH_PASSES_VAL, Sync.NumPasses);
+ {
+ NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS);
+ sd = Sync.sd;
- /*
- if (Sync.FirstPath)
- SetItemText_Number(IDT_BENCH_FREQ_CUR, Sync.Freq, TEXT(" MHz"));
- else
- SetItemText_Number(IDT_BENCH_FREQ_RES, Sync.Freq, TEXT(" MHz"));
- */
+ if (sd.NeedPrint_RatingVector)
+ RatingVector = Sync.RatingVector;
+
+ if (sd.NeedPrint_Freq)
+ {
+ Sync.FreqString_GUI = Sync.FreqString_Sync;
+ sd.NeedPrint_RatingVector = true;
+ }
- /*
- if (Sync.FreqWasChanged)
- {
- SetItemText(IDT_BENCH_FREQ, Sync.Freq);
- Sync.FreqWasChanged = false;
+ Sync.sd.NeedPrint_RatingVector = false;
+ Sync.sd.NeedPrint_Enc_1 = false;
+ Sync.sd.NeedPrint_Enc = false;
+ Sync.sd.NeedPrint_Dec_1 = false;
+ Sync.sd.NeedPrint_Dec = false;
+ Sync.sd.NeedPrint_Tot = false;
+ Sync.sd.NeedPrint_Freq = false;
}
- */
+ if (sd.NumPasses_Finished != NumPasses_Finished_Prev)
{
- UInt32 dicSizeTemp = (UInt32)MyMax(Sync.ProcessedSize, UInt64(1) << 20);
- dicSizeTemp = MyMin(dicSizeTemp, Sync.DictionarySize);
- PrintResults(dicSizeTemp,
- Sync.CompressingInfoTemp,
- IDT_BENCH_COMPRESS_USAGE1,
- IDT_BENCH_COMPRESS_SPEED1,
- IDT_BENCH_COMPRESS_RPU1,
- IDT_BENCH_COMPRESS_RATING1);
+ SetItemText_Number(IDT_BENCH_PASSES_VAL, sd.NumPasses_Finished, TEXT(" /"));
+ NumPasses_Finished_Prev = sd.NumPasses_Finished;
}
+ if (sd.NeedPrint_Enc_1) PrintBenchRes(sd.Enc_BenchRes_1, k_Ids_Enc_1);
+ if (sd.NeedPrint_Enc) PrintBenchRes(sd.Enc_BenchRes, k_Ids_Enc);
+ if (sd.NeedPrint_Dec_1) PrintBenchRes(sd.Dec_BenchRes_1, k_Ids_Dec_1);
+ if (sd.NeedPrint_Dec) PrintBenchRes(sd.Dec_BenchRes, k_Ids_Dec);
+
+ if (sd.BenchWasFinished && sd.NeedPrint_Tot)
{
- PrintResults(
- Sync.DictionarySize,
- Sync.CompressingInfo,
- IDT_BENCH_COMPRESS_USAGE2,
- IDT_BENCH_COMPRESS_SPEED2,
- IDT_BENCH_COMPRESS_RPU2,
- IDT_BENCH_COMPRESS_RATING2);
+ CTotalBenchRes2 tot_BenchRes = sd.Enc_BenchRes;
+ tot_BenchRes.Update_With_Res2(sd.Dec_BenchRes);
+ PrintBenchRes(tot_BenchRes, k_Ids_Tot);
}
+
+ if (sd.NeedPrint_RatingVector)
+ // for (unsigned k = 0; k < 1; k++)
{
- PrintResults(
- Sync.DictionarySize,
- Sync.DecompressingInfoTemp,
- IDT_BENCH_DECOMPR_USAGE1,
- IDT_BENCH_DECOMPR_SPEED1,
- IDT_BENCH_DECOMPR_RPU1,
- IDT_BENCH_DECOMPR_RATING1,
- true);
- }
- {
- PrintResults(
- Sync.DictionarySize,
- Sync.DecompressingInfo,
- IDT_BENCH_DECOMPR_USAGE2,
- IDT_BENCH_DECOMPR_SPEED2,
- IDT_BENCH_DECOMPR_RPU2,
- IDT_BENCH_DECOMPR_RATING2,
- true);
- if (Sync.DecompressingInfo.GlobalTime > 0 &&
- Sync.CompressingInfo.GlobalTime > 0)
+ UString s;
+ s += Sync.FreqString_GUI;
+ if (!RatingVector.IsEmpty())
+ {
+ if (!s.IsEmpty())
+ s.Add_LF();
+ s += "Compr Decompr Total CPU"
+ #ifdef PRINT_ITER_TIME
+ " Time"
+ #endif
+ ;
+ s.Add_LF();
+ }
+ // s += "GIPS GIPS GIPS % s"; s.Add_LF();
+ for (unsigned i = 0; i < RatingVector.Size(); i++)
+ {
+ if (i != 0)
+ s.Add_LF();
+ if ((int)i == sd.RatingVector_DeletedIndex)
+ {
+ s += "...";
+ s.Add_LF();
+ }
+ const CBenchPassResult &pair = RatingVector[i];
+ /*
+ s += "g:"; s.Add_UInt32((UInt32)pair.EncInfo.GlobalTime);
+ s += " u:"; s.Add_UInt32((UInt32)pair.EncInfo.UserTime);
+ s += " ";
+ */
+ AddRatingsLine(s, pair.Enc, pair.Dec
+ #ifdef PRINT_ITER_TIME
+ , pair.Ticks
+ #endif
+ );
+ /*
+ {
+ UInt64 v = i + 1;
+ if (sd.RatingVector_DeletedIndex >= 0 && i >= (unsigned)sd.RatingVector_DeletedIndex)
+ v += sd.RatingVector_NumDeleted;
+ char temp[64];
+ ConvertUInt64ToString(v, temp);
+ s += " : ";
+ s += temp;
+ }
+ */
+ }
+
+ if (sd.BenchWasFinished)
{
- UInt64 comprRating = Sync.CompressingInfo.GetCompressRating(Sync.DictionarySize);
- UInt64 decomprRating = Sync.DecompressingInfo.GetDecompressRating();
- PrintRating((comprRating + decomprRating) / 2, IDT_BENCH_TOTAL_RATING_VAL);
- PrintRating((
- Sync.CompressingInfo.GetRatingPerUsage(comprRating) +
- Sync.DecompressingInfo.GetRatingPerUsage(decomprRating)) / 2, IDT_BENCH_TOTAL_RPU_VAL);
- PrintUsage(
- (Sync.CompressingInfo.GetUsage() +
- Sync.DecompressingInfo.GetUsage()) / 2, IDT_BENCH_TOTAL_USAGE_VAL);
+ s.Add_LF();
+ s += "-------------";
+ s.Add_LF();
+ {
+ // average time is not correct because of freq detection in first iteration
+ AddRatingsLine(s, sd.Enc_BenchRes, sd.Dec_BenchRes
+ #ifdef PRINT_ITER_TIME
+ , (DWORD)(sd.TotalTicks / (sd.NumPasses_Finished ? sd.NumPasses_Finished : 1))
+ #endif
+ );
+ }
}
+ // s.Add_LF(); s += "OnTimer: "; s.Add_UInt32(k_OnTimer_cnt);
+ // s.Add_LF(); s += "finished Message: "; s.Add_UInt32(k_Message_Finished_cnt);
+ // static cnt = 0; cnt++; s.Add_LF(); s += "Print: "; s.Add_UInt32(cnt);
+ // s.Add_LF(); s += "NumEncProgress: "; s.Add_UInt32((UInt32)sd.NumEncProgress);
+ // s.Add_LF(); s += "NumDecProgress: "; s.Add_UInt32((UInt32)sd.NumDecProgress);
+ SetItemText(IDT_BENCH_LOG, s);
}
- return true;
}
+
bool CBenchmarkDialog::OnCommand(int code, int itemID, LPARAM lParam)
{
if (code == CBN_SELCHANGE &&
(itemID == IDC_BENCH_DICTIONARY ||
+ itemID == IDC_BENCH_NUM_PASSES ||
itemID == IDC_BENCH_NUM_THREADS))
{
- OnChangeSettings();
+ RestartBenchmark();
return true;
}
return CModalDialog::OnCommand(code, itemID, lParam);
}
+
bool CBenchmarkDialog::OnButtonClicked(int buttonID, HWND buttonHWND)
{
switch (buttonID)
{
case IDB_RESTART:
- OnRestartButton();
+ RestartBenchmark();
return true;
case IDB_STOP:
OnStopButton();
@@ -596,87 +1421,81 @@ bool CBenchmarkDialog::OnButtonClicked(int buttonID, HWND buttonHWND)
return CModalDialog::OnButtonClicked(buttonID, buttonHWND);
}
-struct CThreadBenchmark
-{
- CBenchmarkDialog *BenchmarkDialog;
- DECL_EXTERNAL_CODECS_LOC_VARS2;
- // UInt32 dictionarySize;
- // UInt32 numThreads;
- HRESULT Process();
- HRESULT Result;
- static THREAD_FUNC_DECL MyThreadFunction(void *param)
- {
- ((CThreadBenchmark *)param)->Result = ((CThreadBenchmark *)param)->Process();
- return 0;
- }
-};
+
+
+
+// ---------- Benchmark Thread ----------
struct CBenchCallback: public IBenchCallback
{
- UInt32 dictionarySize;
+ UInt64 dictionarySize;
CBenchProgressSync *Sync;
+ CBenchmarkDialog *BenchmarkDialog;
- // void AddCpuFreq(UInt64 cpuFreq);
- HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
};
-/*
-void CBenchCallback::AddCpuFreq(UInt64 cpuFreq)
-{
- NSynchronization::CCriticalSectionLock lock(Sync->CS);
- {
- wchar_t s[32];
- ConvertUInt64ToString(cpuFreq, s);
- Sync->Freq.Add_Space_if_NotEmpty();
- Sync->Freq += s;
- Sync->FreqWasChanged = true;
- }
-}
-*/
-
-HRESULT CBenchCallback::SetFreq(bool /* showFreq */, UInt64 /* cpuFreq */)
-{
- return S_OK;
-}
-
HRESULT CBenchCallback::SetEncodeResult(const CBenchInfo &info, bool final)
{
- NSynchronization::CCriticalSectionLock lock(Sync->CS);
- if (Sync->Changed || Sync->Paused || Sync->Stopped)
- return E_ABORT;
- Sync->ProcessedSize = info.UnpackSize * info.NumIterations;
- if (final && Sync->CompressingInfo.GlobalTime == 0)
+ bool needPost = false;
{
- (CBenchInfo&)Sync->CompressingInfo = info;
- if (Sync->CompressingInfo.GlobalTime == 0)
- Sync->CompressingInfo.GlobalTime = 1;
+ NSynchronization::CCriticalSectionLock lock(Sync->CS);
+ if (Sync->Exit)
+ return E_ABORT;
+ CSyncData &sd = Sync->sd;
+ // sd.NumEncProgress++;
+ CTotalBenchRes2 &br = sd.Enc_BenchRes_1;
+ {
+ UInt64 dictSize = Sync->DictSize;
+ if (final)
+ {
+ // sd.EncInfo = info;
+ }
+ else
+ {
+ /* if (!final), then CBenchInfo::NumIterations means totalNumber of threads.
+ so we can reduce the dictionary */
+ if (dictSize > info.UnpackSize)
+ dictSize = info.UnpackSize;
+ }
+ br.Rating = info.GetRating_LzmaEnc(dictSize);
+ }
+ br.SetFrom_BenchInfo(info);
+ sd.NeedPrint_Enc_1 = true;
+ if (final)
+ {
+ sd.Enc_BenchRes.Update_With_Res2(br);
+ sd.NeedPrint_Enc = true;
+ needPost = true;
+ }
}
- else
- (CBenchInfo&)Sync->CompressingInfoTemp = info;
+
+ if (needPost)
+ BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished);
return S_OK;
}
+
HRESULT CBenchCallback::SetDecodeResult(const CBenchInfo &info, bool final)
{
NSynchronization::CCriticalSectionLock lock(Sync->CS);
- if (Sync->Changed || Sync->Paused || Sync->Stopped)
+ if (Sync->Exit)
return E_ABORT;
- CBenchInfo info2 = info;
- if (final && Sync->DecompressingInfo.GlobalTime == 0)
- {
- (CBenchInfo&)Sync->DecompressingInfo = info2;
- if (Sync->DecompressingInfo.GlobalTime == 0)
- Sync->DecompressingInfo.GlobalTime = 1;
- }
- else
- (CBenchInfo&)Sync->DecompressingInfoTemp = info2;
+ CSyncData &sd = Sync->sd;
+ // sd.NumDecProgress++;
+ CTotalBenchRes2 &br = sd.Dec_BenchRes_1;
+ br.Rating = info.GetRating_LzmaDec();
+ br.SetFrom_BenchInfo(info);
+ sd.NeedPrint_Dec_1 = true;
+ if (final)
+ sd.Dec_BenchRes.Update_With_Res2(br);
return S_OK;
}
+
struct CBenchCallback2: public IBenchPrintCallback
{
CBenchProgressSync *Sync;
@@ -704,74 +1523,145 @@ void CBenchCallback2::NewLine()
HRESULT CBenchCallback2::CheckBreak()
{
- if (Sync->Changed || Sync->Paused || Sync->Stopped)
+ if (Sync->Exit)
return E_ABORT;
return S_OK;
}
-/*
struct CFreqCallback: public IBenchFreqCallback
{
- CBenchProgressSync *Sync;
+ CBenchmarkDialog *BenchmarkDialog;
- virtual void AddCpuFreq(UInt64 freq);
+ virtual HRESULT AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage);
+ virtual HRESULT FreqsFinished(unsigned numThreads);
};
-void CFreqCallback::AddCpuFreq(UInt64 freq)
+HRESULT CFreqCallback::AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage)
{
- NSynchronization::CCriticalSectionLock lock(Sync->CS);
- Sync->Freq = freq;
+ HRESULT res;
+ {
+ CBenchProgressSync &sync = BenchmarkDialog->Sync;
+ NSynchronization::CCriticalSectionLock lock(sync.CS);
+ UString &s = sync.FreqString_Sync;
+ if (sync.NumFreqThreadsPrev != numThreads)
+ {
+ sync.NumFreqThreadsPrev = numThreads;
+ if (!s.IsEmpty())
+ s.Add_LF();
+ s.Add_UInt32(numThreads);
+ s += "T Frequency (MHz):";
+ s.Add_LF();
+ }
+ s += " ";
+ char temp[64];
+ if (numThreads != 1)
+ {
+ ConvertUInt64ToString(GetUsagePercents(usage), temp);
+ s += temp;
+ s += '%';
+ s.Add_Space();
+ }
+ ConvertUInt64ToString(GetMips(freq), temp);
+ s += temp;
+ // BenchmarkDialog->Sync.sd.NeedPrint_Freq = true;
+ res = sync.Exit ? E_ABORT : S_OK;
+ }
+ // BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished);
+ return res;
+}
+
+HRESULT CFreqCallback::FreqsFinished(unsigned /* numThreads */)
+{
+ HRESULT res;
+ {
+ CBenchProgressSync &sync = BenchmarkDialog->Sync;
+ NSynchronization::CCriticalSectionLock lock(sync.CS);
+ sync.sd.NeedPrint_Freq = true;
+ BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished);
+ res = sync.Exit ? E_ABORT : S_OK;
+ }
+ BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished);
+ return res;
}
-*/
+// define USE_DUMMY only for debug
+// #define USE_DUMMY
+#ifdef USE_DUMMY
+static unsigned dummy = 1;
+static unsigned Dummy(unsigned limit)
+{
+ unsigned sum = 0;
+ for (unsigned k = 0; k < limit; k++)
+ {
+ sum += dummy;
+ if (sum == 0)
+ break;
+ }
+ return sum;
+}
+#endif
+
+
HRESULT CThreadBenchmark::Process()
{
+ /* the first benchmark pass can be slow,
+ if we run benchmark while the window is being created,
+ and (no freq detecion loop) && (dictionary is small) (-mtic is small) */
+
+ // Sleep(300); // for debug
+ #ifdef USE_DUMMY
+ Dummy(1000 * 1000 * 1000); // for debug
+ #endif
+
CBenchProgressSync &sync = BenchmarkDialog->Sync;
- sync.WaitCreating();
+ HRESULT finishHRESULT = S_OK;
+
try
{
- for (;;)
+ for (UInt32 passIndex = 0;; passIndex++)
{
- if (sync.WasStopped())
- return 0;
- if (sync.WasPaused())
- {
- Sleep(200);
- continue;
- }
- UInt32 dictionarySize;
+ // throw 1; // to debug
+ // throw CSystemException(E_INVALIDARG); // to debug
+
+ UInt64 dictionarySize;
UInt32 numThreads;
{
NSynchronization::CCriticalSectionLock lock(sync.CS);
- if (sync.Stopped || sync.Paused)
- continue;
- if (sync.Changed)
- sync.Init();
- dictionarySize = sync.DictionarySize;
+ if (sync.Exit)
+ break;
+ dictionarySize = sync.DictSize;
numThreads = sync.NumThreads;
- /*
- if (sync.CompressingInfo.GlobalTime != 0)
- sync.FirstPath = false;
- */
}
+
+ #ifdef PRINT_ITER_TIME
+ const DWORD startTick = GetTickCount();
+ #endif
CBenchCallback callback;
+
callback.dictionarySize = dictionarySize;
callback.Sync = &sync;
+ callback.BenchmarkDialog = BenchmarkDialog;
+
CBenchCallback2 callback2;
callback2.TotalMode = BenchmarkDialog->TotalMode;
callback2.Sync = &sync;
- // CFreqCallback freqCallback;
- // freqCallback.Sync = &sync;
+
+ CFreqCallback freqCallback;
+ freqCallback.BenchmarkDialog = BenchmarkDialog;
+
HRESULT result;
try
{
CObjectVector<CProperty> props;
+
+ props = BenchmarkDialog->Props;
+
if (BenchmarkDialog->TotalMode)
{
props = BenchmarkDialog->Props;
@@ -787,8 +1677,8 @@ HRESULT CThreadBenchmark::Process()
{
CProperty prop;
prop.Name = 'd';
- prop.Name.Add_UInt32(dictionarySize);
- prop.Name += 'b';
+ prop.Name.Add_UInt32((UInt32)(dictionarySize >> 10));
+ prop.Name += 'k';
props.Add(prop);
}
}
@@ -796,57 +1686,133 @@ HRESULT CThreadBenchmark::Process()
result = Bench(EXTERNAL_CODECS_LOC_VARS
BenchmarkDialog->TotalMode ? &callback2 : NULL,
BenchmarkDialog->TotalMode ? NULL : &callback,
- // &freqCallback,
- props, 1, false);
+ props, 1, false,
+ (!BenchmarkDialog->TotalMode) && passIndex == 0 ? &freqCallback: NULL);
- if (BenchmarkDialog->TotalMode)
- {
- sync.Stop();
- }
+ // result = S_FALSE; // for debug;
+ // throw 1;
}
catch(...)
{
result = E_FAIL;
}
+ #ifdef PRINT_ITER_TIME
+ const DWORD numTicks = GetTickCount() - startTick;
+ #endif
+
+ bool finished = true;
+
+ NSynchronization::CCriticalSectionLock lock(sync.CS);
+
if (result != S_OK)
{
- if (result != E_ABORT)
+ sync.BenchFinish_Task_HRESULT = result;
+ break;
+ }
+
+ {
+ CSyncData &sd = sync.sd;
+
+ sd.NumPasses_Finished++;
+ #ifdef PRINT_ITER_TIME
+ sd.TotalTicks += numTicks;
+ #endif
+
+ if (BenchmarkDialog->TotalMode)
+ break;
+
{
+ CTotalBenchRes tot_BenchRes = sd.Enc_BenchRes_1;
+ tot_BenchRes.Update_With_Res(sd.Dec_BenchRes_1);
+
+ sd.NeedPrint_RatingVector = true;
{
- NSynchronization::CCriticalSectionLock lock(sync.CS);
- sync.Pause();
+ CBenchPassResult pair;
+ // pair.EncInfo = sd.EncInfo; // for debug
+ pair.Enc = sd.Enc_BenchRes_1;
+ pair.Dec = sd.Dec_BenchRes_1;
+ #ifdef PRINT_ITER_TIME
+ pair.Ticks = numTicks;
+ #endif
+ sync.RatingVector.Add(pair);
+ // pair.Dec_Defined = true;
}
- UString message;
- if (result == S_FALSE)
- message = "Decoding error";
- else if (result == CLASS_E_CLASSNOTAVAILABLE)
- message = "Can't find 7z.dll";
- else
- message = HResultToMessage(result);
- BenchmarkDialog->MessageBoxError(message);
+ }
+
+ sd.NeedPrint_Dec = true;
+ sd.NeedPrint_Tot = true;
+
+ if (sync.RatingVector.Size() > kRatingVector_NumBundlesMax)
+ {
+ // sd.RatingVector_NumDeleted++;
+ sd.RatingVector_DeletedIndex = (int)(kRatingVector_NumBundlesMax / 4);
+ sync.RatingVector.Delete((unsigned)(sd.RatingVector_DeletedIndex));
+ }
+
+ if (sync.sd.NumPasses_Finished < sync.NumPasses_Limit)
+ finished = false;
+ else
+ {
+ sync.sd.BenchWasFinished = true;
+ // BenchmarkDialog->_finishTime = GetTickCount();
+ // return 0;
}
}
- else
+
+ if (BenchmarkDialog->TotalMode)
+ break;
+
+ /*
+ if (newTick - prevTick < 1000)
+ numSameTick++;
+ if (numSameTick > 5 || finished)
{
- NSynchronization::CCriticalSectionLock lock(sync.CS);
- sync.NumPasses++;
+ prevTick = newTick;
+ numSameTick = 0;
+ */
+ // for (unsigned i = 0; i < 1; i++)
+ {
+ // we suppose that PostMsg messages will be processed in order.
+ if (!BenchmarkDialog->PostMsg_Finish(k_Msg_WPARM_Iter_Finished))
+ {
+ finished = true;
+ finishHRESULT = E_FAIL;
+ // throw 1234567;
+ }
}
+ if (finished)
+ break;
}
// return S_OK;
}
catch(CSystemException &e)
{
- BenchmarkDialog->MessageBoxError(HResultToMessage(e.ErrorCode));
- return E_FAIL;
+ finishHRESULT = e.ErrorCode;
+ // BenchmarkDialog->MessageBoxError(HResultToMessage(e.ErrorCode));
+ // return E_FAIL;
}
catch(...)
{
- BenchmarkDialog->MessageBoxError(HResultToMessage(E_FAIL));
- return E_FAIL;
+ finishHRESULT = E_FAIL;
+ // BenchmarkDialog->MessageBoxError(HResultToMessage(E_FAIL));
+ // return E_FAIL;
+ }
+
+ if (finishHRESULT != S_OK)
+ {
+ NSynchronization::CCriticalSectionLock lock(sync.CS);
+ sync.BenchFinish_Thread_HRESULT = finishHRESULT;
}
+ if (!BenchmarkDialog->PostMsg_Finish(k_Msg_WPARM_Thread_Finished))
+ {
+ // sync.BenchFinish_Thread_HRESULT = E_FAIL;
+ }
+ return 0;
}
+
+
static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
{
const wchar_t *end;
@@ -859,20 +1825,21 @@ static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
prop = result;
}
+
HRESULT Benchmark(
DECL_EXTERNAL_CODECS_LOC_VARS
- const CObjectVector<CProperty> &props, HWND hwndParent)
+ const CObjectVector<CProperty> &props, UInt32 numIterations, HWND hwndParent)
{
- CThreadBenchmark benchmarker;
- #ifdef EXTERNAL_CODECS
- benchmarker.__externalCodecs = __externalCodecs;
- #endif
-
CBenchmarkDialog bd;
- bd.Props = props;
+
bd.TotalMode = false;
- bd.Sync.DictionarySize = (UInt32)(Int32)-1;
+ bd.Props = props;
+ if (numIterations == 0)
+ numIterations = 1;
+ bd.Sync.NumPasses_Limit = numIterations;
+ bd.Sync.DictSize = (UInt64)(Int64)-1;
bd.Sync.NumThreads = (UInt32)(Int32)-1;
+ bd.Sync.Level = -1;
COneMethodInfo method;
@@ -905,13 +1872,17 @@ HRESULT Benchmark(
#endif
continue;
}
- if (name.IsEqualTo("testtime"))
+ /*
+ if (name.IsEqualTo("time"))
{
// UInt32 testTime = 4;
// RINOK(ParsePropToUInt32(L"", propVariant, testTime));
continue;
}
RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
+ */
+ // here we need to parse DictSize property, and ignore unknown properties
+ method.ParseMethodFromPROPVARIANT(name, propVariant);
}
if (bd.TotalMode)
@@ -923,17 +1894,37 @@ HRESULT Benchmark(
}
{
- UInt32 dict;
+ UInt64 dict;
if (method.Get_DicSize(dict))
- bd.Sync.DictionarySize = dict;
+ bd.Sync.DictSize = dict;
+ }
+ bd.Sync.Level = method.GetLevel();
+
+ // Dummy(1000 * 1000 * 1);
+
+ {
+ CThreadBenchmark &benchmarker = bd._threadBenchmark;
+ #ifdef EXTERNAL_CODECS
+ benchmarker.__externalCodecs = __externalCodecs;
+ #endif
+ benchmarker.BenchmarkDialog = &bd;
}
- benchmarker.BenchmarkDialog = &bd;
+ bd.Create(hwndParent);
+ return S_OK;
+}
+
+
+CBenchmarkDialog::~CBenchmarkDialog()
+{
+ if (_thread.IsCreated())
{
- NWindows::CThread thread;
- RINOK(thread.Create(CThreadBenchmark::MyThreadFunction, &benchmarker));
- bd.Create(hwndParent);
- return thread.Wait_Close();
+ /* the following code will be not executed in normal code flow.
+ it can be called, if there is some internal failure in dialog code. */
+ Attach(NULL);
+ MessageBoxError(L"The flaw in benchmark thread code");
+ Sync.SendExit();
+ _thread.Wait_Close();
}
}
diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.h b/CPP/7zip/UI/GUI/BenchmarkDialog.h
index 1bad8ea9..a280592e 100644
--- a/CPP/7zip/UI/GUI/BenchmarkDialog.h
+++ b/CPP/7zip/UI/GUI/BenchmarkDialog.h
@@ -3,190 +3,13 @@
#ifndef __BENCHMARK_DIALOG_H
#define __BENCHMARK_DIALOG_H
-#include "../../../Windows/Synchronization.h"
+#include "../../Common/CreateCoder.h"
+#include "../../UI/Common/Property.h"
-#include "../../../Windows/Control/ComboBox.h"
-#include "../../../Windows/Control/Edit.h"
-
-#include "../Common/Bench.h"
-
-#include "../FileManager/DialogSize.h"
-
-#include "BenchmarkDialogRes.h"
-
-struct CBenchInfo2 : public CBenchInfo
-{
- void Init() { GlobalTime = UserTime = 0; }
-
- UInt64 GetCompressRating(UInt32 dictSize) const
- {
- return ::GetCompressRating(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
- }
-
- UInt64 GetDecompressRating() const
- {
- return ::GetDecompressRating(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
- }
-};
-
-class CBenchProgressSync
-{
-public:
- bool Stopped;
- bool Paused;
- bool Changed;
- UInt32 DictionarySize;
- UInt32 NumThreads;
- UInt64 NumPasses;
- NWindows::NSynchronization::CManualResetEvent _startEvent;
- NWindows::NSynchronization::CCriticalSection CS;
-
- CBenchInfo2 CompressingInfoTemp;
- CBenchInfo2 CompressingInfo;
- UInt64 ProcessedSize;
-
- CBenchInfo2 DecompressingInfoTemp;
- CBenchInfo2 DecompressingInfo;
-
- AString Text;
- bool TextWasChanged;
-
- // bool FirstPath;
- // UInt64 Freq;
- // UString Freq;
- // bool FreqWasChanged;
-
- CBenchProgressSync()
- {
- if (_startEvent.Create() != S_OK)
- throw 3986437;
- }
-
- void Init()
- {
- Changed = false;
- Stopped = false;
- Paused = false;
- CompressingInfoTemp.Init();
- CompressingInfo.Init();
- ProcessedSize = 0;
-
- DecompressingInfoTemp.Init();
- DecompressingInfo.Init();
-
- NumPasses = 0;
-
- // FirstPath = true;
- // Freq = 0;
- // Freq.SetFromAscii("MHz: ");
- // FreqWasChanged = true;
-
- Text.Empty();
- TextWasChanged = true;
- }
-
- void Stop()
- {
- NWindows::NSynchronization::CCriticalSectionLock lock(CS);
- Stopped = true;
- }
- bool WasStopped()
- {
- NWindows::NSynchronization::CCriticalSectionLock lock(CS);
- return Stopped;
- }
- void Pause()
- {
- NWindows::NSynchronization::CCriticalSectionLock lock(CS);
- Paused = true;
- }
- void Start()
- {
- NWindows::NSynchronization::CCriticalSectionLock lock(CS);
- Paused = false;
- }
- bool WasPaused()
- {
- NWindows::NSynchronization::CCriticalSectionLock lock(CS);
- return Paused;
- }
- void WaitCreating() { _startEvent.Lock(); }
-};
-
-struct CMyFont
-{
- HFONT _font;
- CMyFont(): _font(NULL) {}
- ~CMyFont()
- {
- if (_font)
- DeleteObject(_font);
- }
- void Create(const LOGFONT *lplf)
- {
- _font = CreateFontIndirect(lplf);
- }
-};
-
-
-class CBenchmarkDialog:
- public NWindows::NControl::CModalDialog
-{
- NWindows::NControl::CComboBox m_Dictionary;
- NWindows::NControl::CComboBox m_NumThreads;
- NWindows::NControl::CEdit _consoleEdit;
- UINT_PTR _timer;
- UInt32 _startTime;
- CMyFont _font;
-
- UInt64 ramSize;
- bool ramSize_Defined;
-
- bool OnSize(WPARAM /* wParam */, int xSize, int ySize);
- bool OnTimer(WPARAM timerID, LPARAM callback);
- virtual bool OnInit();
- void OnRestartButton();
- void OnStopButton();
- void OnHelp();
- virtual void OnCancel();
- bool OnButtonClicked(int buttonID, HWND buttonHWND);
- bool OnCommand(int code, int itemID, LPARAM lParam);
-
- void PrintTime();
- void PrintRating(UInt64 rating, UINT controlID);
- void PrintUsage(UInt64 usage, UINT controlID);
- void PrintResults(
- UInt32 dictionarySize,
- const CBenchInfo2 &info, UINT usageID, UINT speedID, UINT rpuID, UINT ratingID,
- bool decompressMode = false);
-
- UInt32 GetNumberOfThreads();
- UInt32 OnChangeDictionary();
- void OnChangeSettings();
-
- void SetItemText_Number(int itemID, UInt64 val, LPCTSTR post = NULL);
-
-public:
- CBenchProgressSync Sync;
- bool TotalMode;
- CObjectVector<CProperty> Props;
-
- CSysString Bench2Text;
-
- CBenchmarkDialog(): _timer(0), TotalMode(false) {}
- INT_PTR Create(HWND wndParent = 0)
- {
- BIG_DIALOG_SIZE(332, 228);
- return CModalDialog::Create(TotalMode ? IDD_BENCH_TOTAL : SIZED_DIALOG(IDD_BENCH), wndParent);
- }
- void MessageBoxError(LPCWSTR message)
- {
- MessageBoxW(*this, message, L"7-Zip", MB_ICONERROR);
- }
-};
+const UInt32 k_NumBenchIterations_Default = 10;
HRESULT Benchmark(
DECL_EXTERNAL_CODECS_LOC_VARS
- const CObjectVector<CProperty> &props, HWND hwndParent = NULL);
+ const CObjectVector<CProperty> &props, UInt32 numIterations, HWND hwndParent = NULL);
#endif
diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.rc b/CPP/7zip/UI/GUI/BenchmarkDialog.rc
index a8455a0f..3e73e46d 100644
--- a/CPP/7zip/UI/GUI/BenchmarkDialog.rc
+++ b/CPP/7zip/UI/GUI/BenchmarkDialog.rc
@@ -23,25 +23,29 @@
#define g4x (m + m)
-#define sRating 60
-#define sSpeed 60
-#define sUsage 60
-#define sRpu 60
-#define sFreq 34
+#define sRating 58
+#define sSpeed 60
+#define sUsage 46
+#define sRpu 58
+#define sSize 52
+// #define sFreq 34
#define xRating (xs - m - m - sRating)
#define xRpu (xRating - sRpu)
#define xUsage (xRpu - sUsage)
#define xSpeed (xUsage - sSpeed)
+#define xSize (xSpeed - sSize)
-#define xFreq (xUsage - sFreq)
+// #define xFreq (xUsage - sFreq)
-#define sLabel (xUsage - g4x)
+#define sLabel (xSize - g4x)
#define sTotalRating (sUsage + sRpu + sRating + m + m)
#define xTotalRating (xs - m - sTotalRating)
-#define g2xs 58
-#define g3xs 36
+#define sPasses 60
+
+#define g2xs 60
+#define g3xs 64
#define g3x (m + g2xs)
#undef GROUP_Y_SIZE
@@ -56,7 +60,10 @@
#define g7xs bx1 - m - g0xs - g1xs - m
-IDD_BENCH DIALOG 0, 0, xs, ys MY_MODAL_DIALOG_STYLE | WS_MINIMIZEBOX
+#define sLog 140 + 0
+
+// MY_MODAL_DIALOG_STYLE
+IDD_BENCH DIALOG 0, 0, xs + sLog, ys MY_MODAL_RESIZE_DIALOG_STYLE | WS_MINIMIZEBOX
CAPTION "Benchmark"
MY_FONT
BEGIN
@@ -70,71 +77,79 @@ BEGIN
COMBOBOX IDC_BENCH_DICTIONARY, g1x, m, g1xs, 140, MY_COMBO
LTEXT "Memory usage:", IDT_BENCH_MEMORY, gc2x, m - 2, g7xs, 8
- LTEXT "", IDT_BENCH_MEMORY_VAL, gc2x, m + 8, g7xs, 8
+ LTEXT "", IDT_BENCH_MEMORY_VAL, gc2x, m + 8, g7xs, MY_TEXT_NOPREFIX
LTEXT "&Number of CPU threads:", IDT_BENCH_NUM_THREADS, m, 30, g0xs, 8
COMBOBOX IDC_BENCH_NUM_THREADS, g1x, 29, g1xs, 140, MY_COMBO
- LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 32, g7xs, 8
+ LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 30, g7xs, MY_TEXT_NOPREFIX
- RTEXT "CPU Usage", IDT_BENCH_USAGE_LABEL, xUsage, 54, sUsage, 8
- RTEXT "Speed", IDT_BENCH_SPEED, xSpeed, 54, sSpeed, 8
- RTEXT "Rating / Usage", IDT_BENCH_RPU_LABEL, xRpu, 54, sRpu, 8
- RTEXT "Rating", IDT_BENCH_RATING_LABEL, xRating, 54, sRating, 8
+ RTEXT "Size", IDT_BENCH_SIZE, xSize, 54, sSize, MY_TEXT_NOPREFIX
+ RTEXT "CPU Usage", IDT_BENCH_USAGE_LABEL, xUsage, 54, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "Speed", IDT_BENCH_SPEED, xSpeed, 54, sSpeed, MY_TEXT_NOPREFIX
+ RTEXT "Rating / Usage", IDT_BENCH_RPU_LABEL, xRpu, 54, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "Rating", IDT_BENCH_RATING_LABEL, xRating, 54, sRating, MY_TEXT_NOPREFIX
GROUPBOX "Compressing", IDG_BENCH_COMPRESSING, m, 64, xc, GROUP_Y_SIZE
- LTEXT "Current", IDT_BENCH_CURRENT, g4x, 76, sLabel, 8
- RTEXT "", IDT_BENCH_COMPRESS_USAGE1, xUsage, 76, sUsage, 8
- RTEXT "", IDT_BENCH_COMPRESS_SPEED1, xSpeed, 76, sSpeed, 8
- RTEXT "", IDT_BENCH_COMPRESS_RPU1, xRpu, 76, sRpu, 8
- RTEXT "", IDT_BENCH_COMPRESS_RATING1, xRating, 76, sRating, 8
+ LTEXT "Current", IDT_BENCH_CURRENT, g4x, 76, sLabel, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_SIZE1, xSize, 76, sSize, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_USAGE1, xUsage, 76, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_SPEED1, xSpeed, 76, sSpeed, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_RPU1, xRpu, 76, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_RATING1, xRating, 76, sRating, MY_TEXT_NOPREFIX
- LTEXT "Resulting", IDT_BENCH_RESULTING, g4x, 89, sLabel, 8
- RTEXT "", IDT_BENCH_COMPRESS_USAGE2, xUsage, 89, sUsage, 8
- RTEXT "", IDT_BENCH_COMPRESS_SPEED2, xSpeed, 89, sSpeed, 8
- RTEXT "", IDT_BENCH_COMPRESS_RPU2, xRpu, 89, sRpu, 8
- RTEXT "", IDT_BENCH_COMPRESS_RATING2, xRating, 89, sRating, 8
+ LTEXT "Resulting", IDT_BENCH_RESULTING, g4x, 89, sLabel, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_SIZE2, xSize, 89, sSize, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_USAGE2, xUsage, 89, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_SPEED2, xSpeed, 89, sSpeed, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_RPU2, xRpu, 89, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_COMPRESS_RATING2, xRating, 89, sRating, MY_TEXT_NOPREFIX
GROUPBOX "Decompressing", IDG_BENCH_DECOMPRESSING, m, 111, xc, GROUP_Y_SIZE
- LTEXT "Current", IDT_BENCH_CURRENT2, g4x, 123, sLabel, 8
- RTEXT "", IDT_BENCH_DECOMPR_USAGE1, xUsage, 123, sUsage, 8
- RTEXT "", IDT_BENCH_DECOMPR_SPEED1, xSpeed, 123, sSpeed, 8
- RTEXT "", IDT_BENCH_DECOMPR_RPU1, xRpu, 123, sRpu, 8
- RTEXT "", IDT_BENCH_DECOMPR_RATING1, xRating, 123, sRating, 8
+ LTEXT "Current", IDT_BENCH_CURRENT2, g4x, 123, sLabel, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_SIZE1, xSize, 123, sSize, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_USAGE1, xUsage, 123, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_SPEED1, xSpeed, 123, sSpeed, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_RPU1, xRpu, 123, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_RATING1, xRating, 123, sRating, MY_TEXT_NOPREFIX
- LTEXT "Resulting", IDT_BENCH_RESULTING2, g4x, 136, sLabel, 8
- RTEXT "", IDT_BENCH_DECOMPR_USAGE2, xUsage, 136, sUsage, 8
- RTEXT "", IDT_BENCH_DECOMPR_SPEED2, xSpeed, 136, sSpeed, 8
- RTEXT "", IDT_BENCH_DECOMPR_RPU2, xRpu, 136, sRpu, 8
- RTEXT "", IDT_BENCH_DECOMPR_RATING2, xRating, 136, sRating, 8
+ LTEXT "Resulting", IDT_BENCH_RESULTING2, g4x, 136, sLabel, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_SIZE2, xSize, 136, sSize, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_USAGE2, xUsage, 136, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_SPEED2, xSpeed, 136, sSpeed, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_RPU2, xRpu, 136, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_DECOMPR_RATING2, xRating, 136, sRating, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_ERROR_MESSAGE, m, 155, xc, MY_TEXT_NOPREFIX
+
GROUPBOX "Total Rating", IDG_BENCH_TOTAL_RATING, xTotalRating, 163, sTotalRating, GROUP_Y2_SIZE
- RTEXT "", IDT_BENCH_TOTAL_USAGE_VAL, xUsage, 176, sUsage, 8
- RTEXT "", IDT_BENCH_TOTAL_RPU_VAL, xRpu, 176, sRpu, 8
- RTEXT "", IDT_BENCH_TOTAL_RATING_VAL, xRating, 176, sRating, 8
+ RTEXT "", IDT_BENCH_TOTAL_USAGE_VAL, xUsage, 176, sUsage, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_TOTAL_RPU_VAL, xRpu, 176, sRpu, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_TOTAL_RATING_VAL, xRating, 176, sRating, MY_TEXT_NOPREFIX
- RTEXT "", IDT_BENCH_CPU, m, 202, xc, 8
- RTEXT "", IDT_BENCH_VER, m + xc - 100, 216, 100, 8
+ // RTEXT "", IDT_BENCH_CPU, m + sPasses, 202, xc - sPasses, 16, SS_NOPREFIX
+ RTEXT "", IDT_BENCH_CPU, m + 0, 202, xc - 0, 16, SS_NOPREFIX
+ RTEXT "", IDT_BENCH_VER, m + xc - 100, 222, 100, MY_TEXT_NOPREFIX
- LTEXT "", IDT_BENCH_CPU_FEATURE, m, 228, xc - 100, 8
- LTEXT "", IDT_BENCH_SYS1, m, 238, xc - 140, 8
- LTEXT "", IDT_BENCH_SYS2, m, 248, xc - 140, 8
-
- // LTEXT "", IDT_BENCH_SYSTEM, m, 232, xc - 80, 8
- // LTEXT "", IDT_BENCH_FREQ_RES, m, 242, 80, 8
-
+ LTEXT "", IDT_BENCH_CPU_FEATURE, m, 222, xc - 100, 16, SS_NOPREFIX // - 100
+ LTEXT "", IDT_BENCH_SYS1, m, 238, xc - 140, MY_TEXT_NOPREFIX
+ LTEXT "", IDT_BENCH_SYS2, m, 248, xc - 140, MY_TEXT_NOPREFIX
+
+ LTEXT "", IDT_BENCH_LOG, m + xc + m, m, sLog - m, yc, SS_LEFTNOWORDWRAP | SS_NOPREFIX
LTEXT "Elapsed time:", IDT_BENCH_ELAPSED, m, 163, g2xs, 8
- LTEXT "Size:", IDT_BENCH_SIZE, m, 176, g2xs, 8
- LTEXT "Passes:", IDT_BENCH_PASSES, m, 189, g2xs, 8
+// LTEXT "Size:", IDT_BENCH_SIZE, m, 176, g2xs, 8
+ LTEXT "Passes:", IDT_BENCH_PASSES, m, 176, g2xs, 8
+ COMBOBOX IDC_BENCH_NUM_PASSES, m, 187, sPasses, 140, MY_COMBO
+
+ RTEXT "", IDT_BENCH_ELAPSED_VAL, g3x, 163, g3xs, MY_TEXT_NOPREFIX
+ // RTEXT "", IDT_BENCH_SIZE_VAL, g3x, 176, g3xs, MY_TEXT_NOPREFIX
+ RTEXT "", IDT_BENCH_PASSES_VAL, g3x, 176, g3xs, MY_TEXT_NOPREFIX
- RTEXT "", IDT_BENCH_ELAPSED_VAL, g3x, 163, g3xs, 8
- RTEXT "", IDT_BENCH_SIZE_VAL, g3x, 176, g3xs, 8
- RTEXT "", IDT_BENCH_PASSES_VAL, g3x, 189, g3xs, 8
END
#ifdef UNDER_CE
diff --git a/CPP/7zip/UI/GUI/BenchmarkDialogRes.h b/CPP/7zip/UI/GUI/BenchmarkDialogRes.h
index 8ee4f681..b7d54b77 100644
--- a/CPP/7zip/UI/GUI/BenchmarkDialogRes.h
+++ b/CPP/7zip/UI/GUI/BenchmarkDialogRes.h
@@ -38,9 +38,17 @@
#define IDT_BENCH_TOTAL_USAGE_VAL 133
#define IDT_BENCH_ELAPSED_VAL 140
-#define IDT_BENCH_SIZE_VAL 141
+// #define IDT_BENCH_SIZE_VAL 141
#define IDT_BENCH_PASSES_VAL 142
+#define IDC_BENCH_NUM_PASSES 143
+#define IDT_BENCH_LOG 160
+#define IDT_BENCH_ERROR_MESSAGE 161
+
+#define IDT_BENCH_COMPRESS_SIZE1 170
+#define IDT_BENCH_COMPRESS_SIZE2 171
+#define IDT_BENCH_DECOMPR_SIZE1 172
+#define IDT_BENCH_DECOMPR_SIZE2 173
// #define IDT_BENCH_FREQ_CUR 150
// #define IDT_BENCH_FREQ_RES 151
diff --git a/CPP/7zip/UI/GUI/CompressDialog.cpp b/CPP/7zip/UI/GUI/CompressDialog.cpp
index 9f2cb146..3298526e 100644
--- a/CPP/7zip/UI/GUI/CompressDialog.cpp
+++ b/CPP/7zip/UI/GUI/CompressDialog.cpp
@@ -83,6 +83,8 @@ static const unsigned kHistorySize = 20;
static const UInt32 kNoSolidBlockSize = 0;
static const UInt32 kSolidBlockSize = 64;
+static const UInt32 kLzmaMaxDictSize = (UInt32)15 << 28;
+
static LPCSTR const kExeExt = ".exe";
#define k7zFormat "7z"
@@ -254,18 +256,20 @@ static bool IsMethodSupportedBySfx(int methodID)
return false;
}
-static bool GetMaxRamSizeForProgram(UInt64 &physSize)
+static bool GetMaxRamSizeForProgram(UInt64 &ramSize, UInt64 &size)
{
- physSize = (UInt64)(sizeof(size_t)) << 29;
- bool ramSize_Defined = NSystem::GetRamSize(physSize);
+ size = (UInt64)(sizeof(size_t)) << 29;
+ bool ramSize_Defined = NSystem::GetRamSize(size);
+ ramSize = size;
+ size = size / 16 * 15;
const UInt64 kMinSysSize = (1 << 24);
- if (physSize <= kMinSysSize)
- physSize = 0;
+ if (size <= kMinSysSize)
+ size = 0;
else
- physSize -= kMinSysSize;
+ size -= kMinSysSize;
const UInt64 kMinUseSize = (1 << 24);
- if (physSize < kMinUseSize)
- physSize = kMinUseSize;
+ if (size < kMinUseSize)
+ size = kMinUseSize;
return ramSize_Defined;
}
@@ -411,7 +415,7 @@ bool CCompressDialog::OnInit()
SetSolidBlockSize();
SetNumThreads();
- TCHAR s[40] = { TEXT('/'), TEXT(' '), 0 };
+ TCHAR s[32] = { TEXT('/'), TEXT(' '), 0 };
ConvertUInt32ToString(NSystem::GetNumberOfProcessors(), s + 2);
SetItemText(IDT_COMPRESS_HARDWARE_THREADS, s);
@@ -687,6 +691,48 @@ static bool IsAsciiString(const UString &s)
return true;
}
+
+static void AddSize_MB(UString &s, UInt64 size)
+{
+ char temp[32];
+ ConvertUInt64ToString((size + (1 << 20) - 1) >> 20, temp);
+ s += temp;
+ s += " MB";
+}
+
+
+void SetErrorMessage_MemUsage(UString &s, UInt64 reqSize, UInt64 ramSize, UInt64 ramLimit, const UString &usageString)
+{
+ s += "The operation was blocked by 7-Zip";
+ s.Add_LF();
+ s += "The operation can require big amount of RAM (memory):";
+ s.Add_LF();
+ s.Add_LF();
+ AddSize_MB(s, reqSize);
+
+ if (!usageString.IsEmpty())
+ {
+ s += " : ";
+ s += usageString;
+ }
+
+ s.Add_LF();
+ AddSize_MB(s, ramSize);
+ s += " : RAM";
+
+ if (ramLimit != 0)
+ {
+ s.Add_LF();
+ AddSize_MB(s, ramLimit);
+ s += " : 7-Zip limit";
+ }
+
+ s.Add_LF();
+ s.Add_LF();
+ s += LangString(IDS_MEM_ERROR);
+}
+
+
void CCompressDialog::OnOK()
{
_password1Control.GetText(Info.Password);
@@ -718,6 +764,24 @@ void CCompressDialog::OnOK()
}
}
+ {
+ UInt64 ramSize;
+ UInt64 maxRamSize;
+ const bool maxRamSize_Defined = GetMaxRamSizeForProgram(ramSize, maxRamSize);
+ UInt64 decompressMem;
+ const UInt64 memUsage = GetMemoryUsage_DecompMem(decompressMem);
+ if (maxRamSize_Defined && memUsage > maxRamSize)
+ {
+ UString s;
+ UString s2 = LangString(IDT_COMPRESS_MEMORY);
+ if (s2.IsEmpty())
+ GetItemText(IDT_COMPRESS_MEMORY, s2);
+ SetErrorMessage_MemUsage(s, memUsage, ramSize, maxRamSize, s2);
+ MessageBoxError(s);
+ return;
+ }
+ }
+
SaveOptionsInMem();
{
UString s;
@@ -736,7 +800,7 @@ void CCompressDialog::OnOK()
Info.PathMode = (NWildcard::ECensorPathMode)k_PathMode_Vals[m_PathMode.GetCurSel()];
Info.Level = GetLevelSpec();
- Info.Dictionary = GetDictionarySpec();
+ Info.Dict64 = GetDictSpec();
Info.Order = GetOrderSpec();
Info.OrderMode = GetOrderMode();
Info.NumThreads = GetNumThreadsSpec();
@@ -1207,29 +1271,37 @@ UString CCompressDialog::GetEncryptionMethodSpec()
return s;
}
-void CCompressDialog::AddDictionarySize(UInt32 size)
+
+void CCompressDialog::AddDict2(size_t sizeReal, size_t sizeShow)
{
Byte c = 0;
unsigned moveBits = 0;
- if ((size & 0xFFFFF) == 0) { moveBits = 20; c = 'M'; }
- else if ((size & 0x3FF) == 0) { moveBits = 10; c = 'K'; }
- TCHAR s[40];
- ConvertUInt32ToString(size >> moveBits, s);
+ if ((sizeShow & 0xFFFFF) == 0) { moveBits = 20; c = 'M'; }
+ else if ((sizeShow & 0x3FF) == 0) { moveBits = 10; c = 'K'; }
+ TCHAR s[32];
+ ConvertUInt64ToString(sizeShow >> moveBits, s);
unsigned pos = MyStringLen(s);
s[pos++] = ' ';
if (moveBits != 0)
s[pos++] = c;
s[pos++] = 'B';
s[pos++] = 0;
- int index = (int)m_Dictionary.AddString(s);
- m_Dictionary.SetItemData(index, size);
+ const int index = (int)m_Dictionary.AddString(s);
+ m_Dictionary.SetItemData(index, sizeReal);
}
+
+void CCompressDialog::AddDict(size_t size)
+{
+ AddDict2(size, size);
+}
+
+
void CCompressDialog::SetDictionary()
{
m_Dictionary.ResetContent();
const CArcInfoEx &ai = (*ArcFormats)[GetFormatIndex()];
- int index = FindRegistryFormat(ai.Name);
+ const int index = FindRegistryFormat(ai.Name);
UInt32 defaultDict = (UInt32)(Int32)-1;
if (index >= 0)
@@ -1239,12 +1311,13 @@ void CCompressDialog::SetDictionary()
defaultDict = fo.Dictionary;
}
- int methodID = GetMethodID();
- UInt32 level = GetLevel2();
+ const int methodID = GetMethodID();
+ const UInt32 level = GetLevel2();
if (methodID < 0)
return;
+ UInt64 ramSize;
UInt64 maxRamSize;
- bool maxRamSize_Defined = GetMaxRamSizeForProgram(maxRamSize);
+ const bool maxRamSize_Defined = GetMaxRamSizeForProgram(ramSize, maxRamSize);
switch (methodID)
{
@@ -1254,38 +1327,44 @@ void CCompressDialog::SetDictionary()
if (defaultDict == (UInt32)(Int32)-1)
{
defaultDict =
- ( level <= 3 ? (1 << (level * 2 + 16)) :
- ( level <= 6 ? (1 << (level + 19)) :
- ( level <= 7 ? (1 << 25) : (1 << 26)
- )));
+ ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+ ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+ ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+ )));
}
- AddDictionarySize(1 << 16);
- AddDictionarySize(1 << 18);
- m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1);
+
+ // we use threshold 3.75 GiB to switch to kLzmaMaxDictSize.
+ if (defaultDict >= ((UInt32)15 << 28))
+ defaultDict = kLzmaMaxDictSize;
- for (unsigned i = 20; i <= 31; i++)
- for (unsigned j = 0; j < 2; j++)
- {
- if (i == 20 && j > 0)
- continue;
- UInt32 dict = ((UInt32)(2 + j) << (i - 1));
-
- if (dict >
- #ifdef MY_CPU_64BIT
- (3 << 29)
- #else
- (1 << 26)
- #endif
- )
- continue;
-
- AddDictionarySize(dict);
- UInt64 decomprSize;
- UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize);
- if (dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize))
- m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1);
- }
+ const size_t kLzmaMaxDictSize_Up = (size_t)1 << (20 + sizeof(size_t) / 4 * 6);
+
+ int curSel = 0;
+ for (unsigned i = (16 - 1) * 2; i <= (32 - 1) * 2; i++)
+ {
+ if (i < (20 - 1) * 2
+ && i != (16 - 1) * 2
+ && i != (18 - 1) * 2)
+ continue;
+ if (i == (20 - 1) * 2 + 1)
+ continue;
+ const size_t dict_up = (size_t)(2 + (i & 1)) << (i / 2);
+ size_t dict = dict_up;
+ if (dict_up >= kLzmaMaxDictSize)
+ dict = kLzmaMaxDictSize; // we reduce dictionary
+
+ AddDict(dict);
+ // AddDict2(dict, dict_up); // for debug : we show 4 GB
+
+ const UInt64 memUsage = GetMemoryUsageComp_Dict(dict);
+ if (dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize))
+ curSel = m_Dictionary.GetCount() - 1;
+ if (dict_up >= kLzmaMaxDictSize_Up)
+ break;
+ }
+
+ m_Dictionary.SetCurSel(curSel);
// SetNearestSelectComboBox(m_Dictionary, defaultDict);
break;
}
@@ -1293,46 +1372,63 @@ void CCompressDialog::SetDictionary()
case kPPMd:
{
if (defaultDict == (UInt32)(Int32)-1)
- {
defaultDict = (UInt32)1 << (level + 19);
- }
- for (unsigned i = 20; i < 31; i++)
- for (unsigned j = 0; j < 2; j++)
- {
- if (i == 20 && j > 0)
- continue;
- UInt32 dict = ((UInt32)(2 + j) << (i - 1));
- if (dict >
- #ifdef MY_CPU_64BIT
- (1 << 30)
- #else
- (1 << 29)
- #endif
- )
- continue;
- AddDictionarySize(dict);
- UInt64 decomprSize;
- UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize);
- if ((dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize))
- || m_Dictionary.GetCount() == 1)
- m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1);
- }
-
+ const UInt32 kPpmd_Default_4g = (UInt32)0 - ((UInt32)1 << 10);
+ const size_t kPpmd_MaxDictSize_Up = (size_t)1 << (29 + sizeof(size_t) / 8);
+
+ if (defaultDict >= ((UInt32)15 << 28)) // threshold
+ defaultDict = kPpmd_Default_4g;
+
+ int curSel = 0;
+ for (unsigned i = (20 - 1) * 2; i <= (32 - 1) * 2; i++)
+ {
+ if (i == (20 - 1) * 2 + 1)
+ continue;
+
+ const size_t dict_up = (size_t)(2 + (i & 1)) << (i / 2);
+ size_t dict = dict_up;
+ if (dict_up >= kPpmd_Default_4g)
+ dict = kPpmd_Default_4g;
+
+ AddDict2(dict, dict_up);
+ // AddDict2((UInt32)((UInt32)0 - 2), dict_up); // for debug
+ // AddDict(dict_up); // for debug
+ const UInt64 memUsage = GetMemoryUsageComp_Dict(dict);
+ if (dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize))
+ curSel = m_Dictionary.GetCount() - 1;
+ if (dict_up >= kPpmd_MaxDictSize_Up)
+ break;
+ }
+ m_Dictionary.SetCurSel(curSel);
// SetNearestSelectComboBox(m_Dictionary, defaultDict);
break;
}
- case kDeflate:
+ case kPPMdZip:
{
- AddDictionarySize(32 << 10);
- m_Dictionary.SetCurSel(0);
+ if (defaultDict == (UInt32)(Int32)-1)
+ defaultDict = (UInt32)1 << (level + 19);
+
+ int curSel = 0;
+ for (unsigned i = 20; i <= 28; i++)
+ {
+ const UInt32 dict = (UInt32)1 << i;
+ AddDict(dict);
+ const UInt64 memUsage = GetMemoryUsageComp_Dict(dict);
+ if ((dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize)))
+ curSel = m_Dictionary.GetCount() - 1;
+ }
+ m_Dictionary.SetCurSel(curSel);
+ // SetNearestSelectComboBox(m_Dictionary, defaultDict);
break;
}
-
+
+ case kDeflate:
case kDeflate64:
{
- AddDictionarySize(64 << 10);
+ const UInt32 dict = (methodID == kDeflate ? (UInt32)(1 << 15) : (UInt32)(1 << 16));
+ AddDict(dict);
m_Dictionary.SetCurSel(0);
break;
}
@@ -1346,39 +1442,22 @@ void CCompressDialog::SetDictionary()
else defaultDict = (100 << 10);
}
+ int curSel = 0;
for (unsigned i = 1; i <= 9; i++)
{
- UInt32 dict = ((UInt32)i * 100) << 10;
- AddDictionarySize(dict);
- if (dict <= defaultDict || m_Dictionary.GetCount() == 0)
- m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1);
+ const UInt32 dict = ((UInt32)i * 100) << 10;
+ AddDict(dict);
+ // AddDict2(i * 100000, dict);
+ if (i <= defaultDict / 100000)
+ curSel = m_Dictionary.GetCount() - 1;
}
-
- break;
- }
-
- case kPPMdZip:
- {
- if (defaultDict == (UInt32)(Int32)-1)
- defaultDict = (UInt32)1 << (level + 19);
-
- for (unsigned i = 20; i <= 28; i++)
- {
- UInt32 dict = (1 << i);
- AddDictionarySize(dict);
- UInt64 decomprSize;
- UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize);
- if ((dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize))
- || m_Dictionary.GetCount() == 1)
- m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1);
- }
-
- // SetNearestSelectComboBox(m_Dictionary, defaultDict);
+ m_Dictionary.SetCurSel(curSel);
break;
}
}
}
+
UInt32 CCompressDialog::GetComboValue(NWindows::NControl::CComboBox &c, int defMax)
{
if (c.GetCount() <= defMax)
@@ -1386,6 +1465,15 @@ UInt32 CCompressDialog::GetComboValue(NWindows::NControl::CComboBox &c, int defM
return (UInt32)c.GetItemData_of_CurSel();
}
+
+UInt64 CCompressDialog::GetComboValue_64(NWindows::NControl::CComboBox &c, int defMax)
+{
+ if (c.GetCount() <= defMax)
+ return (UInt64)(Int64)-1;
+ // LRESULT is signed. so we cast it to unsigned size_t at first:
+ return (UInt64)(size_t)c.GetItemData_of_CurSel();
+}
+
UInt32 CCompressDialog::GetLevel2()
{
UInt32 level = GetLevel();
@@ -1396,7 +1484,7 @@ UInt32 CCompressDialog::GetLevel2()
int CCompressDialog::AddOrder(UInt32 size)
{
- TCHAR s[40];
+ TCHAR s[32];
ConvertUInt32ToString(size, s);
int index = (int)m_Order.AddString(s);
m_Order.SetItemData(index, size);
@@ -1517,7 +1605,7 @@ bool CCompressDialog::GetOrderMode()
}
-static UInt64 Get_Lzma2_ChunkSize(UInt32 dict)
+static UInt64 Get_Lzma2_ChunkSize(UInt64 dict)
{
// we use same default chunk sizes as defined in 7z encoder and lzma2 encoder
UInt64 cs = (UInt64)dict << 2;
@@ -1543,8 +1631,8 @@ void CCompressDialog::SetSolidBlockSize(bool useDictionary)
if (level == 0)
return;
- UInt32 dict = GetDictionarySpec();
- if (dict == (UInt32)(Int32)-1)
+ UInt64 dict = GetDictSpec();
+ if (dict == (UInt64)(Int64)-1)
dict = 1;
UInt32 defaultBlockSize = (UInt32)(Int32)-1;
@@ -1603,7 +1691,7 @@ void CCompressDialog::SetSolidBlockSize(bool useDictionary)
if (defaultBlockSize == (UInt32)(Int32)-1 && ((UInt64)1 << i) >= blockSize)
defaultBlockSize = i;
- TCHAR s[40];
+ TCHAR s[32];
char post;
ConvertUInt32ToString(1 << (i % 10), s);
if (i < 20) post = 'K';
@@ -1665,7 +1753,7 @@ void CCompressDialog::SetNumThreads()
numAlgoThreadsMax = 128;
for (UInt32 i = 1; i <= numHardwareThreads * 2 && i <= numAlgoThreadsMax; i++)
{
- TCHAR s[40];
+ TCHAR s[32];
ConvertUInt32ToString(i, s);
int index = (int)m_NumThreads.AddString(s);
m_NumThreads.SetItemData(index, (UInt32)i);
@@ -1673,7 +1761,8 @@ void CCompressDialog::SetNumThreads()
SetNearestSelectComboBox(m_NumThreads, defaultValue);
}
-UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
+
+UInt64 CCompressDialog::GetMemoryUsage_Dict_DecompMem(UInt64 dict64, UInt64 &decompressMemory)
{
decompressMemory = UInt64(Int64(-1));
UInt32 level = GetLevel2();
@@ -1706,6 +1795,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
case kLZMA:
case kLZMA2:
{
+ const UInt32 dict = (dict64 >= kLzmaMaxDictSize ? kLzmaMaxDictSize : (UInt32)dict64);
UInt32 hs = dict - 1;
hs |= (hs >> 1);
hs |= (hs >> 2);
@@ -1757,7 +1847,15 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
}
if (chunkSize == 0)
- size += numBlockThreads * (size1 + (UInt64)dict * 3 / 2);
+ {
+ const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
+ UInt64 blockSize = (UInt64)dict + (1 << 16)
+ + (numThreads1 > 1 ? (1 << 20) : 0);
+ blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
+ if (blockSize >= kBlockSizeMax)
+ blockSize = kBlockSizeMax;
+ size += numBlockThreads * (size1 + blockSize);
+ }
else
{
size += numBlockThreads * (size1 + chunkSize);
@@ -1771,7 +1869,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
case kPPMd:
{
- decompressMemory = dict + (2 << 20);
+ decompressMemory = dict64 + (2 << 20);
return size + decompressMemory;
}
@@ -1799,7 +1897,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
case kPPMdZip:
{
- decompressMemory = dict + (2 << 20);
+ decompressMemory = dict64 + (2 << 20);
return size + (UInt64)decompressMemory * numThreads;
}
}
@@ -1807,9 +1905,15 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
return (UInt64)(Int64)-1;
}
-UInt64 CCompressDialog::GetMemoryUsage(UInt64 &decompressMemory)
+UInt64 CCompressDialog::GetMemoryUsage_DecompMem(UInt64 &decompressMemory)
{
- return GetMemoryUsage(GetDictionary(), decompressMemory);
+ return GetMemoryUsage_Dict_DecompMem(GetDict(), decompressMemory);
+}
+
+UInt64 CCompressDialog::GetMemoryUsageComp_Dict(UInt64 dict64)
+{
+ UInt64 decompressMemory;
+ return GetMemoryUsage_Dict_DecompMem(dict64, decompressMemory);
}
void CCompressDialog::PrintMemUsage(UINT res, UInt64 value)
@@ -1819,7 +1923,7 @@ void CCompressDialog::PrintMemUsage(UINT res, UInt64 value)
SetItemText(res, TEXT("?"));
return;
}
- TCHAR s[40];
+ TCHAR s[32];
if (value <= ((UInt64)16 << 30))
{
value = (value + (1 << 20) - 1) >> 20;
@@ -1838,7 +1942,7 @@ void CCompressDialog::PrintMemUsage(UINT res, UInt64 value)
void CCompressDialog::SetMemoryUsage()
{
UInt64 decompressMem;
- UInt64 memUsage = GetMemoryUsage(decompressMem);
+ const UInt64 memUsage = GetMemoryUsage_DecompMem(decompressMem);
PrintMemUsage(IDT_COMPRESS_MEMORY_VALUE, memUsage);
PrintMemUsage(IDT_COMPRESS_MEMORY_DE_VALUE, decompressMem);
}
@@ -1847,7 +1951,7 @@ void CCompressDialog::SetParams()
{
const CArcInfoEx &ai = (*ArcFormats)[GetFormatIndex()];
m_Params.SetText(TEXT(""));
- int index = FindRegistryFormat(ai.Name);
+ const int index = FindRegistryFormat(ai.Name);
if (index >= 0)
{
const NCompression::CFormatOptions &fo = m_RegistryInfo.Formats[index];
@@ -1858,13 +1962,34 @@ void CCompressDialog::SetParams()
void CCompressDialog::SaveOptionsInMem()
{
const CArcInfoEx &ai = (*ArcFormats)[Info.FormatIndex];
- int index = FindRegistryFormatAlways(ai.Name);
+ const int index = FindRegistryFormatAlways(ai.Name);
m_Params.GetText(Info.Options);
Info.Options.Trim();
NCompression::CFormatOptions &fo = m_RegistryInfo.Formats[index];
fo.Options = Info.Options;
fo.Level = GetLevelSpec();
- fo.Dictionary = GetDictionarySpec();
+ {
+ const UInt64 dict64 = GetDictSpec();
+ UInt32 dict32;
+ if (dict64 == (UInt64)(Int64)-1)
+ dict32 = (UInt32)(Int32)-1;
+ else
+ {
+ dict32 = (UInt32)dict64;
+ if (dict64 != dict32)
+ {
+ /* here we must write 32-bit value for registry that indicates big_value
+ (UInt32)(Int32)-1 : is used as marker for default size
+ (UInt32)(Int32)-2 : it can be used to indicate big value (4 GiB)
+ the value must be larger than threshold
+ */
+ dict32 = (UInt32)(Int32)-2;
+ // dict32 = kLzmaMaxDictSize; // it must be larger than threshold
+ }
+ }
+ fo.Dictionary = dict32;
+ }
+
fo.Order = GetOrderSpec();
fo.Method = GetMethodSpec();
fo.EncryptionMethod = GetEncryptionMethodSpec();
diff --git a/CPP/7zip/UI/GUI/CompressDialog.h b/CPP/7zip/UI/GUI/CompressDialog.h
index 6658de5c..234e0239 100644
--- a/CPP/7zip/UI/GUI/CompressDialog.h
+++ b/CPP/7zip/UI/GUI/CompressDialog.h
@@ -42,7 +42,7 @@ namespace NCompressDialog
UInt32 Level;
UString Method;
- UInt32 Dictionary;
+ UInt64 Dict64;
bool OrderMode;
UInt32 Order;
UString Options;
@@ -79,7 +79,8 @@ namespace NCompressDialog
DeleteAfterCompressing(false),
FormatIndex(-1)
{
- Level = Dictionary = Order = UInt32(-1);
+ Level = Order = (UInt32)(Int32)-1;
+ Dict64 = (UInt64)(Int64)(-1);
OrderMode = false;
Method.Empty();
Options.Empty();
@@ -88,6 +89,7 @@ namespace NCompressDialog
};
}
+
class CCompressDialog: public NWindows::NControl::CModalDialog
{
NWindows::NControl::CComboBox m_ArchivePath;
@@ -142,17 +144,19 @@ class CCompressDialog: public NWindows::NControl::CModalDialog
void SetEncryptionMethod();
- void AddDictionarySize(UInt32 size);
+ void AddDict2(size_t sizeReal, size_t sizeShow);
+ void AddDict(size_t size);
void SetDictionary();
UInt32 GetComboValue(NWindows::NControl::CComboBox &c, int defMax = 0);
+ UInt64 GetComboValue_64(NWindows::NControl::CComboBox &c, int defMax = 0);
UInt32 GetLevel() { return GetComboValue(m_Level); }
UInt32 GetLevelSpec() { return GetComboValue(m_Level, 1); }
UInt32 GetLevel2();
- UInt32 GetDictionary() { return GetComboValue(m_Dictionary); }
- UInt32 GetDictionarySpec() { return GetComboValue(m_Dictionary, 1); }
+ UInt64 GetDict() { return GetComboValue_64(m_Dictionary); }
+ UInt64 GetDictSpec() { return GetComboValue_64(m_Dictionary, 1); }
UInt32 GetOrder() { return GetComboValue(m_Order); }
UInt32 GetOrderSpec() { return GetComboValue(m_Order, 1); }
UInt32 GetNumThreadsSpec() { return GetComboValue(m_NumThreads, 1); }
@@ -166,8 +170,10 @@ class CCompressDialog: public NWindows::NControl::CModalDialog
void SetSolidBlockSize(bool useDictionary = false);
void SetNumThreads();
- UInt64 GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory);
- UInt64 GetMemoryUsage(UInt64 &decompressMemory);
+ UInt64 GetMemoryUsage_Dict_DecompMem(UInt64 dict, UInt64 &decompressMemory);
+ UInt64 GetMemoryUsage_DecompMem(UInt64 &decompressMemory);
+ UInt64 GetMemoryUsageComp_Dict(UInt64 dict64);
+
void PrintMemUsage(UINT res, UInt64 value);
void SetMemoryUsage();
void SetParams();
@@ -196,6 +202,11 @@ public:
CCompressDialog(): CurrentDirWasChanged(false) {};
+ void MessageBoxError(LPCWSTR message)
+ {
+ MessageBoxW(*this, message, L"7-Zip", MB_ICONERROR);
+ }
+
protected:
void CheckSFXControlsEnable();
diff --git a/CPP/7zip/UI/GUI/GUI.cpp b/CPP/7zip/UI/GUI/GUI.cpp
index f5676d85..37567019 100644
--- a/CPP/7zip/UI/GUI/GUI.cpp
+++ b/CPP/7zip/UI/GUI/GUI.cpp
@@ -193,7 +193,12 @@ static int Main2()
if (options.Command.CommandType == NCommandType::kBenchmark)
{
- HRESULT res = Benchmark(EXTERNAL_CODECS_VARS_L options.Properties);
+ HRESULT res = Benchmark(
+ EXTERNAL_CODECS_VARS_L
+ options.Properties,
+ options.NumIterations_Defined ?
+ options.NumIterations :
+ k_NumBenchIterations_Default);
/*
if (res == S_FALSE)
{
diff --git a/CPP/7zip/UI/GUI/GUI.dsp b/CPP/7zip/UI/GUI/GUI.dsp
index 41c1e04d..7e65f481 100644
--- a/CPP/7zip/UI/GUI/GUI.dsp
+++ b/CPP/7zip/UI/GUI/GUI.dsp
@@ -1164,6 +1164,10 @@ SOURCE=..\..\..\Windows\SystemInfo.h
# End Source File
# Begin Source File
+SOURCE=..\..\..\Windows\Thread.h
+# End Source File
+# Begin Source File
+
SOURCE=..\..\..\Windows\TimeUtils.cpp
# End Source File
# Begin Source File
diff --git a/CPP/7zip/UI/GUI/UpdateGUI.cpp b/CPP/7zip/UI/GUI/UpdateGUI.cpp
index af07e550..28f19d25 100644
--- a/CPP/7zip/UI/GUI/UpdateGUI.cpp
+++ b/CPP/7zip/UI/GUI/UpdateGUI.cpp
@@ -142,7 +142,7 @@ static void SetOutProperties(
UInt32 level,
bool setMethod,
const UString &method,
- UInt32 dictionary,
+ UInt64 dict64,
bool orderMode,
UInt32 order,
bool solidIsSpecified, UInt64 solidBlockSize,
@@ -157,13 +157,13 @@ static void SetOutProperties(
{
if (!method.IsEmpty())
AddProp(properties, is7z ? "0": "m", method);
- if (dictionary != (UInt32)(Int32)-1)
+ if (dict64 != (UInt64)(Int64)-1)
{
AString name;
if (is7z)
name = "0";
name += (orderMode ? "mem" : "d");
- AddProp(properties, name, GetNumInBytesString(dictionary));
+ AddProp(properties, name, GetNumInBytesString(dict64));
}
if (order != (UInt32)(Int32)-1)
{
@@ -389,7 +389,7 @@ static HRESULT ShowDialog(
di.Level,
!methodOverride,
di.Method,
- di.Dictionary,
+ di.Dict64,
di.OrderMode, di.Order,
di.SolidIsSpecified, di.SolidBlockSize,
di.MultiThreadIsAllowed, di.NumThreads,
diff --git a/CPP/7zip/warn_clang_mac.mak b/CPP/7zip/warn_clang_mac.mak
index 41044a2c..aadf14f7 100644
--- a/CPP/7zip/warn_clang_mac.mak
+++ b/CPP/7zip/warn_clang_mac.mak
@@ -27,6 +27,8 @@ CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \
-Wno-atomic-implicit-seq-cst \
-Wconversion \
-Wno-sign-conversion \
+ -Wno-suggest-override \
+ -Wno-suggest-destructor-override \
CFLAGS_WARN_MAC = \
-Wno-poison-system-directories \
diff --git a/CPP/7zip/warn_gcc.mak b/CPP/7zip/warn_gcc.mak
index 5fb747dc..3185326a 100644
--- a/CPP/7zip/warn_gcc.mak
+++ b/CPP/7zip/warn_gcc.mak
@@ -42,6 +42,10 @@ CFLAGS_WARN_GCC_9 = \
# -Wno-sign-conversion \
+CFLAGS_WARN_GCC_10 = $(CFLAGS_WARN_GCC_9) \
+ -Wmaybe-uninitialized \
+ -Wmisleading-indentation \
+
CFLAGS_WARN_GCC_PPMD_UNALIGNED = \
-Wno-strict-aliasing \
diff --git a/CPP/Common/LzFindPrepare.cpp b/CPP/Common/LzFindPrepare.cpp
new file mode 100644
index 00000000..8845e4a5
--- /dev/null
+++ b/CPP/Common/LzFindPrepare.cpp
@@ -0,0 +1,7 @@
+// Sha256Prepare.cpp
+
+#include "StdAfx.h"
+
+#include "../../C/LzFind.h"
+
+static struct CLzFindPrepare { CLzFindPrepare() { LzFindPrepare(); } } g_CLzFindPrepare;
diff --git a/CPP/Common/MyBuffer2.h b/CPP/Common/MyBuffer2.h
index de5ebbdd..372d478c 100644
--- a/CPP/Common/MyBuffer2.h
+++ b/CPP/Common/MyBuffer2.h
@@ -25,6 +25,19 @@ public:
operator const Byte *() const { return _data; }
size_t Size() const { return _size; }
+ void Alloc(size_t size)
+ {
+ if (!_data || size != _size)
+ {
+ ::MidFree(_data);
+ _size = 0;
+ _data = NULL;
+ _data = (Byte *)::MidAlloc(size);
+ if (_data)
+ _size = size;
+ }
+ }
+
void AllocAtLeast(size_t size)
{
if (!_data || size > _size)
@@ -105,5 +118,22 @@ public:
}
};
+/*
+ CMidAlignedBuffer must return aligned pointer.
+ - in Windows it uses CMidBuffer(): MidAlloc() : VirtualAlloc()
+ VirtualAlloc(): Memory allocated is automatically initialized to zero.
+ MidAlloc(0) returns NULL
+ - in non-Windows systems it uses g_AlignedAlloc.
+ g_AlignedAlloc::Alloc(size = 0) can return non NULL.
+*/
+
+typedef
+#ifdef _WIN32
+ CMidBuffer
+#else
+ CAlignedBuffer
+#endif
+ CMidAlignedBuffer;
+
#endif
diff --git a/CPP/Windows/Control/Dialog.cpp b/CPP/Windows/Control/Dialog.cpp
index 020694a6..6a9d7d9b 100644
--- a/CPP/Windows/Control/Dialog.cpp
+++ b/CPP/Windows/Control/Dialog.cpp
@@ -26,6 +26,14 @@ static INT_PTR APIENTRY DialogProcedure(HWND dialogHWND, UINT message, WPARAM wP
return FALSE;
if (message == WM_INITDIALOG)
dialog->Attach(dialogHWND);
+
+ /* MSDN: The dialog box procedure should return
+ TRUE - if it processed the message
+ FALSE - if it did not process the message
+ If the dialog box procedure returns FALSE,
+ the dialog manager performs the default dialog operation in response to the message.
+ */
+
try { return BoolToBOOL(dialog->OnMessage(message, wParam, lParam)); }
catch(...) { return TRUE; }
}
@@ -39,6 +47,7 @@ bool CDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam)
case WM_NOTIFY: return OnNotify((UINT)wParam, (LPNMHDR) lParam);
case WM_TIMER: return OnTimer(wParam, lParam);
case WM_SIZE: return OnSize(wParam, LOWORD(lParam), HIWORD(lParam));
+ case WM_DESTROY: return OnDestroy();
case WM_HELP: OnHelp(); return true;
/*
OnHelp(
diff --git a/CPP/Windows/Control/Dialog.h b/CPP/Windows/Control/Dialog.h
index 33caa5b2..f804a9e7 100644
--- a/CPP/Windows/Control/Dialog.h
+++ b/CPP/Windows/Control/Dialog.h
@@ -31,6 +31,12 @@ public:
bool SetItemText(int itemID, LPCTSTR s)
{ return BOOLToBool(SetDlgItemText(_window, itemID, s)); }
+ bool SetItemTextA(int itemID, LPCSTR s)
+ { return BOOLToBool(SetDlgItemTextA(_window, itemID, s)); }
+
+ bool SetItemText_Empty(int itemID)
+ { return SetItemText(itemID, TEXT("")); }
+
#ifndef _UNICODE
bool SetItemText(int itemID, LPCWSTR s)
{
@@ -51,6 +57,12 @@ public:
*/
#endif
+ bool GetItemText(int itemID, UString &s)
+ {
+ CWindow window(GetItem(itemID));
+ return window.GetText(s);
+ }
+
bool SetItemInt(int itemID, UINT value, bool isSigned)
{ return BOOLToBool(SetDlgItemInt(_window, itemID, value, BoolToBOOL(isSigned))); }
bool GetItemInt(int itemID, bool isSigned, UINT &value)
@@ -65,6 +77,13 @@ public:
HWND GetNextTabItem(HWND control, bool previous)
{ return GetNextDlgTabItem(_window, control, BoolToBOOL(previous)); }
+ LRESULT SendMsg_NextDlgCtl(WPARAM wParam, LPARAM lParam)
+ { return SendMsg(WM_NEXTDLGCTL, wParam, lParam); }
+ LRESULT SendMsg_NextDlgCtl_HWND(HWND hwnd) { return SendMsg_NextDlgCtl((WPARAM)hwnd, TRUE); }
+ LRESULT SendMsg_NextDlgCtl_CtlId(int id) { return SendMsg_NextDlgCtl_HWND(GetItem(id)); }
+ LRESULT SendMsg_NextDlgCtl_Next() { return SendMsg_NextDlgCtl(0, FALSE); }
+ LRESULT SendMsg_NextDlgCtl_Prev() { return SendMsg_NextDlgCtl(1, FALSE); }
+
bool MapRect(LPRECT rect)
{ return BOOLToBool(MapDialogRect(_window, rect)); }
@@ -92,6 +111,7 @@ public:
virtual bool OnCommand(WPARAM wParam, LPARAM lParam);
virtual bool OnCommand(int code, int itemID, LPARAM lParam);
virtual bool OnSize(WPARAM /* wParam */, int /* xSize */, int /* ySize */) { return false; }
+ virtual bool OnDestroy() { return false; }
/*
#ifdef UNDER_CE
diff --git a/CPP/Windows/ErrorMsg.cpp b/CPP/Windows/ErrorMsg.cpp
index bfa21e50..63fd7922 100644
--- a/CPP/Windows/ErrorMsg.cpp
+++ b/CPP/Windows/ErrorMsg.cpp
@@ -19,6 +19,14 @@ namespace NError {
static bool MyFormatMessage(DWORD errorCode, UString &message)
{
+ #ifndef _SFX
+ if ((HRESULT)errorCode == MY_HRES_ERROR__INTERNAL_ERROR)
+ {
+ message = "Internal Error: The failure in hardware (RAM or CPU), OS or program";
+ return true;
+ }
+ #endif
+
#ifdef _WIN32
LPVOID msgBuf;
diff --git a/CPP/Windows/Registry.cpp b/CPP/Windows/Registry.cpp
index 2c4643bc..b20157d5 100644
--- a/CPP/Windows/Registry.cpp
+++ b/CPP/Windows/Registry.cpp
@@ -3,6 +3,7 @@
#include "StdAfx.h"
#include <wchar.h>
+// #include <stdio.h>
#ifndef _UNICODE
#include "../Common/StringConvert.h"
@@ -17,12 +18,27 @@ namespace NWindows {
namespace NRegistry {
#define MYASSERT(expr) // _ASSERTE(expr)
+#define MY_ASSUME(expr)
+
+/*
+static void Error()
+{
+ #ifdef _CONSOLE
+ printf("\nregistry error\n");
+ #else
+ MessageBoxW(0, L"registry error", L"", 0);
+ // exit(1);
+ #endif
+}
+
+#define MY_ASSUME(expr) { if (!(expr)) Error(); }
+*/
LONG CKey::Create(HKEY parentKey, LPCTSTR keyName,
LPTSTR keyClass, DWORD options, REGSAM accessMask,
LPSECURITY_ATTRIBUTES securityAttributes, LPDWORD disposition) throw()
{
- MYASSERT(parentKey != NULL);
+ MY_ASSUME(parentKey != NULL);
DWORD dispositionReal;
HKEY key = NULL;
LONG res = RegCreateKeyEx(parentKey, keyName, 0, keyClass,
@@ -39,7 +55,7 @@ LONG CKey::Create(HKEY parentKey, LPCTSTR keyName,
LONG CKey::Open(HKEY parentKey, LPCTSTR keyName, REGSAM accessMask) throw()
{
- MYASSERT(parentKey != NULL);
+ MY_ASSUME(parentKey != NULL);
HKEY key = NULL;
LONG res = RegOpenKeyEx(parentKey, keyName, 0, accessMask, &key);
if (res == ERROR_SUCCESS)
@@ -66,7 +82,7 @@ LONG CKey::Close() throw()
// winNT to be deleted must not have subkeys
LONG CKey::DeleteSubKey(LPCTSTR subKeyName) throw()
{
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return RegDeleteKey(_object, subKeyName);
}
@@ -101,14 +117,14 @@ static inline bool UINT32ToBool(UInt32 value) { return (value != 0); }
LONG CKey::DeleteValue(LPCTSTR name) throw()
{
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return ::RegDeleteValue(_object, name);
}
#ifndef _UNICODE
LONG CKey::DeleteValue(LPCWSTR name)
{
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
if (g_IsNT)
return ::RegDeleteValueW(_object, name);
return DeleteValue(name == 0 ? 0 : (LPCSTR)GetSystemString(name));
@@ -117,7 +133,7 @@ LONG CKey::DeleteValue(LPCWSTR name)
LONG CKey::SetValue(LPCTSTR name, UInt32 value) throw()
{
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return RegSetValueEx(_object, name, 0, REG_DWORD,
(const BYTE *)&value, sizeof(UInt32));
}
@@ -130,7 +146,7 @@ LONG CKey::SetValue(LPCTSTR name, bool value) throw()
LONG CKey::SetValue(LPCTSTR name, LPCTSTR value) throw()
{
MYASSERT(value != NULL);
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return RegSetValueEx(_object, name, 0, REG_SZ,
(const BYTE *)value, ((DWORD)lstrlen(value) + 1) * sizeof(TCHAR));
}
@@ -139,7 +155,7 @@ LONG CKey::SetValue(LPCTSTR name, LPCTSTR value) throw()
LONG CKey::SetValue(LPCTSTR name, const CSysString &value)
{
MYASSERT(value != NULL);
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return RegSetValueEx(_object, name, NULL, REG_SZ,
(const BYTE *)(const TCHAR *)value, (value.Len() + 1) * sizeof(TCHAR));
}
@@ -150,7 +166,7 @@ LONG CKey::SetValue(LPCTSTR name, const CSysString &value)
LONG CKey::SetValue(LPCWSTR name, LPCWSTR value)
{
MYASSERT(value != NULL);
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
if (g_IsNT)
return RegSetValueExW(_object, name, 0, REG_SZ,
(const BYTE * )value, (DWORD)((wcslen(value) + 1) * sizeof(wchar_t)));
@@ -164,7 +180,7 @@ LONG CKey::SetValue(LPCWSTR name, LPCWSTR value)
LONG CKey::SetValue(LPCTSTR name, const void *value, UInt32 size) throw()
{
MYASSERT(value != NULL);
- MYASSERT(_object != NULL);
+ MY_ASSUME(_object != NULL);
return RegSetValueEx(_object, name, 0, REG_BINARY,
(const BYTE *)value, size);
}
diff --git a/CPP/Windows/Synchronization.h b/CPP/Windows/Synchronization.h
index 98ea0b69..7d2e8d2a 100644
--- a/CPP/Windows/Synchronization.h
+++ b/CPP/Windows/Synchronization.h
@@ -153,6 +153,10 @@ public:
{
return Semaphore_Create(&_object, initCount, maxCount);
}
+ WRes OptCreateInit(UInt32 initCount, UInt32 maxCount)
+ {
+ return Semaphore_OptCreateInit(&_object, initCount, maxCount);
+ }
WRes Release() { return Semaphore_Release1(&_object); }
WRes Release(UInt32 releaseCount) { return Semaphore_ReleaseN(&_object, releaseCount); }
WRes Lock() { return Semaphore_Wait(&_object); }
diff --git a/CPP/Windows/SystemInfo.cpp b/CPP/Windows/SystemInfo.cpp
index 55403efc..9346afd5 100644
--- a/CPP/Windows/SystemInfo.cpp
+++ b/CPP/Windows/SystemInfo.cpp
@@ -12,12 +12,12 @@
#else
+#include <unistd.h>
#include <sys/utsname.h>
#ifdef __APPLE__
#include <sys/sysctl.h>
#elif !defined(_AIX)
-
#include <sys/auxv.h>
#ifdef MY_CPU_ARM_OR_ARM64
@@ -25,13 +25,56 @@
#endif
#endif
+#ifdef __linux__
+#include "../Windows/FileIO.h"
#endif
+#endif // WIN32
+
#include "SystemInfo.h"
#include "System.h"
using namespace NWindows;
+#ifdef __linux__
+
+static bool ReadFile_to_Buffer(CFSTR fileName, CByteBuffer &buf)
+{
+ NWindows::NFile::NIO::CInFile file;
+ if (!file.Open(fileName))
+ return false;
+ /*
+ UInt64 size;
+ if (!file.GetLength(size))
+ {
+ // GetLength() doesn't work "/proc/cpuinfo"
+ return false;
+ }
+ if (size >= ((UInt32)1 << 29))
+ return false;
+ */
+ size_t size = 0;
+ size_t addSize = ((size_t)1 << 12);
+ for (;;)
+ {
+ // printf("\nsize = %d\n", (unsigned)size);
+ buf.ChangeSize_KeepData(size + addSize, size);
+ size_t processed;
+ if (!file.ReadFull(buf + size, addSize, processed))
+ return false;
+ if (processed == 0)
+ {
+ buf.ChangeSize_KeepData(size, size);
+ return true;
+ }
+ size += processed;
+ addSize *= 2;
+ }
+}
+
+#endif
+
+
#ifndef __APPLE__
static void PrintHex(AString &s, UInt64 v)
{
@@ -56,7 +99,7 @@ static void PrintCpuChars(AString &s, UInt32 v)
}
-static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
+static void x86cpuid_to_String(const Cx86cpuid &c, AString &s, AString &ver)
{
s.Empty();
@@ -87,13 +130,10 @@ static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
s.Trim();
}
- s.Add_Space_if_NotEmpty();
{
char temp[32];
ConvertUInt32ToHex(c.ver, temp);
- s += '(';
- s += temp;
- s += ')';
+ ver += temp;
}
}
@@ -402,18 +442,52 @@ void GetSysInfo(AString &s1, AString &s2)
void GetCpuName(AString &s);
-void GetCpuName(AString &s)
+
+static void AddBracedString(AString &dest, AString &src)
{
- s.Empty();
+ if (!src.IsEmpty())
+ {
+ AString s;
+ s += '(';
+ s += src;
+ s += ')';
+ dest.Add_OptSpaced(s);
+ }
+}
+
+struct CCpuName
+{
+ AString CpuName;
+ AString Revision;
+ AString Microcode;
+ AString LargePages;
+
+ void Fill();
+
+ void Get_Revision_Microcode_LargePages(AString &s)
+ {
+ s.Empty();
+ AddBracedString(s, Revision);
+ AddBracedString(s, Microcode);
+ s.Add_OptSpaced(LargePages);
+ }
+};
+
+void CCpuName::Fill()
+{
+ CpuName.Empty();
+ Revision.Empty();
+ Microcode.Empty();
+ LargePages.Empty();
+
+ AString &s = CpuName;
#ifdef MY_CPU_X86_OR_AMD64
{
Cx86cpuid cpuid;
if (x86cpuid_CheckAndRead(&cpuid))
{
- AString s2;
- x86cpuid_to_String(cpuid, s2);
- s += s2;
+ x86cpuid_to_String(cpuid, s, Revision);
}
else
{
@@ -484,11 +558,10 @@ void GetCpuName(AString &s)
}
if (res[0] == ERROR_SUCCESS || res[1] == ERROR_SUCCESS)
{
- s.Add_OptSpaced("(");
for (int i = 0; i < 2; i++)
{
if (i == 1)
- s += "->";
+ Microcode += "->";
if (res[i] != ERROR_SUCCESS)
continue;
const CByteBuffer &buf = bufs[i];
@@ -497,13 +570,12 @@ void GetCpuName(AString &s)
UInt32 high = GetUi32(buf);
if (high != 0)
{
- PrintHex(s, high);
- s += ".";
+ PrintHex(Microcode, high);
+ Microcode += ".";
}
- PrintHex(s, GetUi32(buf + 4));
+ PrintHex(Microcode, GetUi32(buf + 4));
}
}
- s += ")";
}
}
}
@@ -511,7 +583,7 @@ void GetCpuName(AString &s)
#ifdef _7ZIP_LARGE_PAGES
- Add_LargePages_String(s);
+ Add_LargePages_String(LargePages);
#endif
}
@@ -532,13 +604,10 @@ void AddCpuFeatures(AString &s)
// s += TypeToString2(k_PF, ARRAY_SIZE(k_PF), i);
}
}
- s.Add_Space_if_NotEmpty();
- s += "f:";
+ s.Add_OptSpaced("f:");
PrintHex(s, flags);
- #else // _WIN32
-
- #ifdef __APPLE__
+ #elif defined(__APPLE__)
{
UInt32 v = 0;
if (My_sysctlbyname_Get_UInt32("hw.pagesize", &v) == 0)
@@ -549,10 +618,46 @@ void AddCpuFeatures(AString &s)
}
}
- #elif !defined(_AIX)
+ #else
+
+ const long v = sysconf(_SC_PAGESIZE);
+ if (v != -1)
+ {
+ s.Add_Space_if_NotEmpty();
+ s += "PageSize:";
+ s.Add_UInt32((UInt32)(v >> 10));
+ s += "KB";
+ }
+
+ #if !defined(_AIX)
+
+ #ifdef __linux__
+
+ CByteBuffer buf;
+ if (ReadFile_to_Buffer("/sys/kernel/mm/transparent_hugepage/enabled", buf))
+ // if (ReadFile_to_Buffer("/proc/cpuinfo", buf))
+ {
+ s.Add_OptSpaced("THP:");
+ AString s2;
+ s2.SetFrom_CalcLen((const char *)(const void *)(const Byte *)buf, (unsigned)buf.Size());
+ const int pos = s2.Find('[');
+ if (pos >= 0)
+ {
+ const int pos2 = s2.Find(']', pos + 1);
+ if (pos2 >= 0)
+ {
+ s2.DeleteFrom(pos2);
+ s2.DeleteFrontal(pos + 1);
+ }
+ }
+ s += s2;
+ }
+ // else throw CSystemException(MY_SRes_HRESULT_FROM_WRes(errno));
+
+ #endif
+
- s.Add_Space_if_NotEmpty();
- s += "hwcap:";
+ s.Add_OptSpaced("hwcap:");
{
unsigned long h = getauxval(AT_HWCAP);
PrintHex(s, h);
@@ -561,6 +666,9 @@ void AddCpuFeatures(AString &s)
if (h & HWCAP_SHA1) s += ":SHA1";
if (h & HWCAP_SHA2) s += ":SHA2";
if (h & HWCAP_AES) s += ":AES";
+ if (h & HWCAP_ASIMD) s += ":ASIMD";
+ #elif defined(MY_CPU_ARM)
+ if (h & HWCAP_NEON) s += ":NEON";
#endif
}
@@ -580,9 +688,8 @@ void AddCpuFeatures(AString &s)
#endif
}
}
-
- #endif
- #endif // _WIN32
+ #endif // _AIX
+ #endif // _WIN32
}
@@ -609,11 +716,11 @@ static BOOL My_RtlGetVersion(OSVERSIONINFOEXW *vi)
#endif
-void GetSystemInfoText(AString &sRes)
+void GetOsInfoText(AString &sRes)
{
- {
- {
- AString s;
+ sRes.Empty();
+ AString s;
+
#ifdef _WIN32
#ifndef UNDER_CE
// OSVERSIONINFO vi;
@@ -634,16 +741,16 @@ void GetSystemInfoText(AString &sRes)
s += " SP:"; s.Add_UInt32(vi.wServicePackMajor);
s += "."; s.Add_UInt32(vi.wServicePackMinor);
}
- s += " Suite:"; PrintHex(s, vi.wSuiteMask);
- s += " Type:"; s.Add_UInt32(vi.wProductType);
+ // s += " Suite:"; PrintHex(s, vi.wSuiteMask);
+ // s += " Type:"; s.Add_UInt32(vi.wProductType);
// s += " "; s += GetOemString(vi.szCSDVersion);
}
+ /*
{
- s += " OEMCP:";
- s.Add_UInt32(GetOEMCP());
- s += " ACP:";
- s.Add_UInt32(GetACP());
+ s += " OEMCP:"; s.Add_UInt32(GetOEMCP());
+ s += " ACP:"; s.Add_UInt32(GetACP());
}
+ */
#endif
#else // _WIN32
@@ -666,8 +773,14 @@ void GetSystemInfoText(AString &sRes)
#endif // _WIN32
sRes += s;
- sRes.Add_LF();
- }
+}
+
+
+
+void GetSystemInfoText(AString &sRes)
+{
+ GetOsInfoText(sRes);
+ sRes.Add_LF();
{
AString s, s1, s2;
@@ -712,5 +825,73 @@ void GetSystemInfoText(AString &sRes)
}
#endif
*/
+}
+
+
+void GetCpuName(AString &s);
+void GetCpuName(AString &s)
+{
+ CCpuName cpuName;
+ cpuName.Fill();
+ s = cpuName.CpuName;
+ AString s2;
+ cpuName.Get_Revision_Microcode_LargePages(s2);
+ s.Add_OptSpaced(s2);
+}
+
+
+void GetCpuName_MultiLine(AString &s);
+void GetCpuName_MultiLine(AString &s)
+{
+ CCpuName cpuName;
+ cpuName.Fill();
+ s = cpuName.CpuName;
+ AString s2;
+ cpuName.Get_Revision_Microcode_LargePages(s2);
+ if (!s2.IsEmpty())
+ {
+ s.Add_LF();
+ s += s2;
}
}
+
+void GetCompiler(AString &s)
+{
+ #ifdef __VERSION__
+ s += __VERSION__;
+ #endif
+
+ #ifdef __GNUC__
+ s += " GCC ";
+ s.Add_UInt32(__GNUC__);
+ s += '.';
+ s.Add_UInt32(__GNUC_MINOR__);
+ s += '.';
+ s.Add_UInt32(__GNUC_PATCHLEVEL__);
+ #endif
+
+ #ifdef __clang__
+ s += " CLANG ";
+ s.Add_UInt32(__clang_major__);
+ s += '.';
+ s.Add_UInt32(__clang_minor__);
+ #endif
+
+ #ifdef __xlC__
+ s += " XLC ";
+ s.Add_UInt32(__xlC__ >> 8);
+ s += '.';
+ s.Add_UInt32(__xlC__ & 0xFF);
+ #ifdef __xlC_ver__
+ s += '.';
+ s.Add_UInt32(__xlC_ver__ >> 8);
+ s += '.';
+ s.Add_UInt32(__xlC_ver__ & 0xFF);
+ #endif
+ #endif
+
+ #ifdef _MSC_VER
+ s += " MSC ";
+ s.Add_UInt32(_MSC_VER);
+ #endif
+}
diff --git a/CPP/Windows/SystemInfo.h b/CPP/Windows/SystemInfo.h
index 856bb2ba..e941d0aa 100644
--- a/CPP/Windows/SystemInfo.h
+++ b/CPP/Windows/SystemInfo.h
@@ -5,8 +5,14 @@
#include "../Common/MyString.h"
+
+void GetCpuName_MultiLine(AString &s);
+
+void GetOsInfoText(AString &sRes);
void GetSystemInfoText(AString &s);
void PrintSize_KMGT_Or_Hex(AString &s, UInt64 v);
void Add_LargePages_String(AString &s);
+void GetCompiler(AString &s);
+
#endif
diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs
index 14793a67..c18e7068 100644
--- a/DOC/7zip.wxs
+++ b/DOC/7zip.wxs
@@ -1,7 +1,7 @@
<?xml version="1.0"?>
<?define VerMajor = "21" ?>
-<?define VerMinor = "02" ?>
+<?define VerMinor = "03" ?>
<?define VerBuild = "00" ?>
<?define MmVer = "$(var.VerMajor).$(var.VerMinor)" ?>
<?define MmHex = "$(var.VerMajor)$(var.VerMinor)" ?>
diff --git a/DOC/readme.txt b/DOC/readme.txt
index 18e5e387..d5e16452 100644
--- a/DOC/readme.txt
+++ b/DOC/readme.txt
@@ -1,4 +1,4 @@
-7-Zip 21.02 Sources
+7-Zip 21.03 Sources
-------------------
7-Zip is a file archiver for Windows.
@@ -42,11 +42,11 @@ You can download LZMA SDK from:
LZMA SDK is written and placed in the public domain by Igor Pavlov.
-How to compile
---------------
+How to compile in Windows
+-------------------------
To compile the sources to Windows binaries you need Visual Studio compiler and/or Windows SDK.
-You can use latest Windows Studio 2017 to compile binaries for x86, x64 and arm64 platforms.
+You can use latest Windows Studio 2017/2019 to compile binaries for x86, x64 and arm64 platforms.
Also you can use old compilers for some platforms:
x86 : Visual C++ 6.0 with Platform SDK
x64 : Windows Server 2003 R2 Platform SDK
@@ -73,6 +73,7 @@ The dsp file compiling can be used for development and debug purposes.
The final 7-Zip binaries are compiled via makefiles, that provide best
optimization options.
+
How to compile with makefile
----------------------------
@@ -90,10 +91,78 @@ MY_DYNAMIC_LINK
-Compiling under Unix/Linux
---------------------------
-Check this site for Posix/Linux version:
-http://sourceforge.net/projects/p7zip/
+Compiling 7-Zip for Unix/Linux
+------------------------------
+
+There are several otpions to compile 7-Zip with different compilers: gcc and clang.
+Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler.
+So if you compile the version with Assembeler code, you will get faster 7-Zip binary.
+
+7-Zip's assembler code uses the following syntax for different platforms:
+
+1) x86 and x86-64 (AMD64): MASM syntax.
+ There are 2 programs that supports MASM syntax in Linux.
+' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some
+ cpu instructions used in 7-Zip.
+ So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version
+ of 7-Zip x86 and x86-64:
+ https://github.com/nidud/asmc
+
+2) arm64: GNU assembler for ARM64 with preprocessor.
+ That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64.
+
+There are different binaries that can be compiled from 7-Zip source.
+There are 2 main files in folder for compiling:
+ makefile - that can be used for compiling Windows version of 7-Zip with nmake command
+ makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command
+
+At first you must change the current folder to folder that contains `makefile.gcc`:
+
+ cd CPP/7zip/Bundles/Alone2
+
+Then you can compile `makefile.gcc` with the command:
+
+ make -j -f makefile.gcc
+
+Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile
+7-Zip binaries with optimized code and optimzing options.
+
+To compile with GCC without assembler:
+ cd CPP/7zip/Bundles/Alone2
+ make -j -f ../../cmpl_gcc.mak
+
+To compile with CLANG without assembler:
+ make -j -f ../../cmpl_clang.mak
+
+To compile 7-Zip for x86-64 with asmc assembler:
+ make -j -f ../../cmpl_gcc_x64.mak
+
+To compile 7-Zip for arm64 with assembler:
+ make -j -f ../../cmpl_gcc_arm64.mak
+
+To compile 7-Zip for arm64 for macOS:
+ make -j -f ../../cmpl_mac_arm64.mak
+
+Also you can change some compiler options in the mak files:
+ cmpl_gcc.mak
+ var_gcc.mak
+ warn_gcc.mak
+
+
+7-Zip and p7zip
+===============
+Now there are two different ports of 7-Zip for Linux/macOS:
+
+1) p7zip - another port of 7-Zip for Linux, made by an independent developer.
+ The latest version of p7zip now is 16.02, and that p7zip 16.02 is outdated now.
+ http://sourceforge.net/projects/p7zip/
+
+2) 7-Zip for Linux/macOS - this package - it's new code with all changes from latest 7-Zip for Windows.
+
+These two ports are not identical.
+Note also that some Linux specific things can be implemented better in p7zip than in new 7-Zip for Linux.
+
+
Notes:
@@ -127,7 +196,7 @@ DOC Documentation
7zip.wix - installer script for WIX
-Asm - Source code in Assembler (optimized code for CRC calculation and Intel-AES encryption)
+Asm - Source code in Assembler : optimized code for CRC, SHA, AES, LZMA decoding.
C - Source code in C
@@ -146,6 +215,7 @@ Windows common files for Windows related code
Bundle Modules that are bundles of other modules (files)
Alone 7za.exe: Standalone version of 7-Zip console that supports only 7z/xz/cab/zip/gzip/bzip2/tar.
+ Alone2 7zz.exe: Standalone version of 7-Zip console that supports all formats.
Alone7z 7zr.exe: Standalone version of 7-Zip console that supports only 7z (reduced version)
Fm Standalone version of 7-Zip File Manager
Format7z 7za.dll: .7z support