Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kornelski/7z.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'Asm/x86/LzFindOpt.asm')
-rw-r--r--Asm/x86/LzFindOpt.asm513
1 files changed, 513 insertions, 0 deletions
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm
new file mode 100644
index 00000000..42e10bda
--- /dev/null
+++ b/Asm/x86/LzFindOpt.asm
@@ -0,0 +1,513 @@
+; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
+; 2021-07-21: Igor Pavlov : Public domain
+;
+
+ifndef x64
+; x64=1
+; .err <x64_IS_REQUIRED>
+endif
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
+
+MY_ALIGN macro num:req
+ align num
+endm
+
+MY_ALIGN_32 macro
+ MY_ALIGN 32
+endm
+
+MY_ALIGN_64 macro
+ MY_ALIGN 64
+endm
+
+
+t0_L equ x0_L
+t0_x equ x0
+t0 equ r0
+t1_x equ x3
+t1 equ r3
+
+cp_x equ t1_x
+cp_r equ t1
+m equ x5
+m_r equ r5
+len_x equ x6
+len equ r6
+diff_x equ x7
+diff equ r7
+len0 equ r10
+len1_x equ x11
+len1 equ r11
+maxLen_x equ x12
+maxLen equ r12
+d equ r13
+ptr0 equ r14
+ptr1 equ r15
+
+d_lim equ m_r
+cycSize equ len_x
+hash_lim equ len0
+delta1_x equ len1_x
+delta1_r equ len1
+delta_x equ maxLen_x
+delta_r equ maxLen
+hash equ ptr0
+src equ ptr1
+
+
+
+if (IS_LINUX gt 0)
+
+; r1 r2 r8 r9 : win32
+; r7 r6 r2 r1 r8 r9 : linux
+
+lenLimit equ r8
+lenLimit_x equ x8
+; pos_r equ r2
+pos equ x2
+cur equ r1
+son equ r9
+
+else
+
+lenLimit equ REG_ABI_PARAM_2
+lenLimit_x equ REG_ABI_PARAM_2_x
+pos equ REG_ABI_PARAM_1_x
+cur equ REG_ABI_PARAM_0
+son equ REG_ABI_PARAM_3
+
+endif
+
+
+if (IS_LINUX gt 0)
+ maxLen_OFFS equ (REG_SIZE * (6 + 1))
+else
+ cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))
+ d_OFFS equ (REG_SIZE + cutValue_OFFS)
+ maxLen_OFFS equ (REG_SIZE + d_OFFS)
+endif
+ hash_OFFS equ (REG_SIZE + maxLen_OFFS)
+ limit_OFFS equ (REG_SIZE + hash_OFFS)
+ size_OFFS equ (REG_SIZE + limit_OFFS)
+ cycPos_OFFS equ (REG_SIZE + size_OFFS)
+ cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)
+ posRes_OFFS equ (REG_SIZE + cycSize_OFFS)
+
+if (IS_LINUX gt 0)
+else
+ cutValue_PAR equ [r0 + cutValue_OFFS]
+ d_PAR equ [r0 + d_OFFS]
+endif
+ maxLen_PAR equ [r0 + maxLen_OFFS]
+ hash_PAR equ [r0 + hash_OFFS]
+ limit_PAR equ [r0 + limit_OFFS]
+ size_PAR equ [r0 + size_OFFS]
+ cycPos_PAR equ [r0 + cycPos_OFFS]
+ cycSize_PAR equ [r0 + cycSize_OFFS]
+ posRes_PAR equ [r0 + posRes_OFFS]
+
+
+ cutValue_VAR equ DWORD PTR [r4 + 8 * 0]
+ cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]
+ cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]
+ cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]
+ hash_VAR equ QWORD PTR [r4 + 8 * 2]
+ limit_VAR equ QWORD PTR [r4 + 8 * 3]
+ size_VAR equ QWORD PTR [r4 + 8 * 4]
+ distances equ QWORD PTR [r4 + 8 * 5]
+ maxLen_VAR equ QWORD PTR [r4 + 8 * 6]
+
+ Old_RSP equ QWORD PTR [r4 + 8 * 7]
+ LOCAL_SIZE equ 8 * 8
+
+COPY_VAR_32 macro dest_var, src_var
+ mov x3, src_var
+ mov dest_var, x3
+endm
+
+COPY_VAR_64 macro dest_var, src_var
+ mov r3, src_var
+ mov dest_var, r3
+endm
+
+
+; MY_ALIGN_64
+MY_PROC GetMatchesSpecN_2, 13
+MY_PUSH_PRESERVED_ABI_REGS
+ mov r0, RSP
+ lea r3, [r0 - LOCAL_SIZE]
+ and r3, -64
+ mov RSP, r3
+ mov Old_RSP, r0
+
+if (IS_LINUX gt 0)
+ mov d, REG_ABI_PARAM_5 ; r13 = r9
+ mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8
+ mov son, REG_ABI_PARAM_3 ; r9 = r1
+ mov r8, REG_ABI_PARAM_2 ; r8 = r2
+ mov pos, REG_ABI_PARAM_1_x ; r2 = x6
+ mov r1, REG_ABI_PARAM_0 ; r1 = r7
+else
+ COPY_VAR_32 cutValue_VAR, cutValue_PAR
+ mov d, d_PAR
+endif
+
+ COPY_VAR_64 limit_VAR, limit_PAR
+
+ mov hash_lim, size_PAR
+ mov size_VAR, hash_lim
+
+ mov cp_x, cycPos_PAR
+ mov hash, hash_PAR
+
+ mov cycSize, cycSize_PAR
+ mov cycSize_VAR, cycSize
+
+ ; we want cur in (rcx). So we change the cur and lenLimit variables
+ sub lenLimit, cur
+ neg lenLimit_x
+ inc lenLimit_x
+
+ mov t0_x, maxLen_PAR
+ sub t0, lenLimit
+ mov maxLen_VAR, t0
+
+ jmp main_loop
+
+MY_ALIGN_64
+fill_empty:
+ ; ptr0 = *ptr1 = kEmptyHashValue;
+ mov QWORD PTR [ptr1], 0
+ inc pos
+ inc cp_x
+ mov DWORD PTR [d - 4], 0
+ cmp d, limit_VAR
+ jae fin
+ cmp hash, hash_lim
+ je fin
+
+; MY_ALIGN_64
+main_loop:
+ ; UInt32 delta = *hash++;
+ mov diff_x, [hash] ; delta
+ add hash, 4
+ ; mov cycPos_VAR, cp_x
+
+ inc cur
+ add d, 4
+ mov m, pos
+ sub m, diff_x; ; matchPos
+
+ ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
+ lea ptr1, [son + 8 * cp_r]
+ ; mov cycSize, cycSize_VAR
+ cmp pos, cycSize
+ jb directMode ; if (pos < cycSize_VAR)
+
+ ; CYC MODE
+
+ cmp diff_x, cycSize
+ jae fill_empty ; if (delta >= cycSize_VAR)
+
+ xor t0_x, t0_x
+ mov cycPos_VAR, cp_x
+ sub cp_x, diff_x
+ ; jae prepare_for_tree_loop
+ ; add cp_x, cycSize
+ cmovb t0_x, cycSize
+ add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
+ jmp prepare_for_tree_loop
+
+
+directMode:
+ cmp diff_x, pos
+ je fill_empty ; if (delta == pos)
+ jae fin_error ; if (delta >= pos)
+
+ mov cycPos_VAR, cp_x
+ mov cp_x, m
+
+prepare_for_tree_loop:
+ mov len0, lenLimit
+ mov hash_VAR, hash
+ ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
+ lea ptr0, [ptr1 + 4]
+ ; UInt32 *_distances = ++d;
+ mov distances, d
+
+ neg len0
+ mov len1, len0
+
+ mov t0_x, cutValue_VAR
+ mov maxLen, maxLen_VAR
+ mov cutValueCur_VAR, t0_x
+
+MY_ALIGN_32
+tree_loop:
+ neg diff
+ mov len, len0
+ cmp len1, len0
+ cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);
+ add diff, cur
+
+ mov t0_x, [son + cp_r * 8] ; prefetch
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ lea cp_r, [son + cp_r * 8]
+ cmp [cur + 1 * len], t0_L
+ je matched_1
+
+ jb left_0
+
+ mov [ptr1], m
+ mov m, [cp_r + 4]
+ lea ptr1, [cp_r + 4]
+ sub diff, cur ; FIX32
+ jmp next_node
+
+MY_ALIGN_32
+left_0:
+ mov [ptr0], m
+ mov m, [cp_r]
+ mov ptr0, cp_r
+ sub diff, cur ; FIX32
+ ; jmp next_node
+
+; ------------ NEXT NODE ------------
+; MY_ALIGN_32
+next_node:
+ mov cycSize, cycSize_VAR
+ dec cutValueCur_VAR
+ je finish_tree
+
+ add diff_x, pos ; prev_match = pos + diff
+ cmp m, diff_x
+ jae fin_error ; if (new_match >= prev_match)
+
+ mov diff_x, pos
+ sub diff_x, m ; delta = pos - new_match
+ cmp pos, cycSize
+ jae cyc_mode_2 ; if (pos >= cycSize)
+
+ mov cp_x, m
+ test m, m
+ jne tree_loop ; if (m != 0)
+
+finish_tree:
+ ; ptr0 = *ptr1 = kEmptyHashValue;
+ mov DWORD PTR [ptr0], 0
+ mov DWORD PTR [ptr1], 0
+
+ inc pos
+
+ ; _distances[-1] = (UInt32)(d - _distances);
+ mov t0, distances
+ mov t1, d
+ sub t1, t0
+ shr t1_x, 2
+ mov [t0 - 4], t1_x
+
+ cmp d, limit_VAR
+ jae fin ; if (d >= limit)
+
+ mov cp_x, cycPos_VAR
+ mov hash, hash_VAR
+ mov hash_lim, size_VAR
+ inc cp_x
+ cmp hash, hash_lim
+ jne main_loop ; if (hash != size)
+ jmp fin
+
+
+MY_ALIGN_32
+cyc_mode_2:
+ cmp diff_x, cycSize
+ jae finish_tree ; if (delta >= cycSize)
+
+ mov cp_x, cycPos_VAR
+ xor t0_x, t0_x
+ sub cp_x, diff_x ; cp_x = cycPos - delta
+ cmovb t0_x, cycSize
+ add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
+ jmp tree_loop
+
+
+MY_ALIGN_32
+matched_1:
+
+ inc len
+ ; cmp len_x, lenLimit_x
+ je short lenLimit_reach
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ cmp [cur + 1 * len], t0_L
+ jne mismatch
+
+
+MY_ALIGN_32
+match_loop:
+ ; while (++len != lenLimit) (len[diff] != len[0]) ;
+
+ inc len
+ ; cmp len_x, lenLimit_x
+ je short lenLimit_reach
+ movzx t0_x, BYTE PTR [diff + 1 * len]
+ cmp BYTE PTR [cur + 1 * len], t0_L
+ je match_loop
+
+mismatch:
+ jb left_2
+
+ mov [ptr1], m
+ mov m, [cp_r + 4]
+ lea ptr1, [cp_r + 4]
+ mov len1, len
+
+ jmp max_update
+
+MY_ALIGN_32
+left_2:
+ mov [ptr0], m
+ mov m, [cp_r]
+ mov ptr0, cp_r
+ mov len0, len
+
+max_update:
+ sub diff, cur ; restore diff
+
+ cmp maxLen, len
+ jae next_node
+
+ mov maxLen, len
+ add len, lenLimit
+ mov [d], len_x
+ mov t0_x, diff_x
+ not t0_x
+ mov [d + 4], t0_x
+ add d, 8
+
+ jmp next_node
+
+
+
+MY_ALIGN_32
+lenLimit_reach:
+
+ mov delta_r, cur
+ sub delta_r, diff
+ lea delta1_r, [delta_r - 1]
+
+ mov t0_x, [cp_r]
+ mov [ptr1], t0_x
+ mov t0_x, [cp_r + 4]
+ mov [ptr0], t0_x
+
+ mov [d], lenLimit_x
+ mov [d + 4], delta1_x
+ add d, 8
+
+ ; _distances[-1] = (UInt32)(d - _distances);
+ mov t0, distances
+ mov t1, d
+ sub t1, t0
+ shr t1_x, 2
+ mov [t0 - 4], t1_x
+
+ mov hash, hash_VAR
+ mov hash_lim, size_VAR
+
+ inc pos
+ mov cp_x, cycPos_VAR
+ inc cp_x
+
+ mov d_lim, limit_VAR
+ mov cycSize, cycSize_VAR
+ ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
+ ; break;
+ cmp hash, hash_lim
+ je fin
+ cmp d, d_lim
+ jae fin
+ cmp delta_x, [hash]
+ jne main_loop
+ movzx t0_x, BYTE PTR [diff]
+ cmp [cur], t0_L
+ jne main_loop
+
+ ; jmp main_loop ; bypass for debug
+
+ mov cycPos_VAR, cp_x
+ shl len, 3 ; cycSize * 8
+ sub diff, cur ; restore diff
+ xor t0_x, t0_x
+ cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)
+ lea cp_r, [son + 8 * cp_r] ; dest
+ lea src, [cp_r + 8 * diff]
+ cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)
+ add src, t0
+ add len, son ; len = son + cycSize * 8
+
+
+MY_ALIGN_32
+long_loop:
+ add hash, 4
+
+ ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
+
+ mov t0, [src]
+ add src, 8
+ mov [cp_r], t0
+ add cp_r, 8
+ cmp src, len
+ cmove src, son ; if end of (son) buffer is reached, we wrap to begin
+
+ mov DWORD PTR [d], 2
+ mov [d + 4], lenLimit_x
+ mov [d + 8], delta1_x
+ add d, 12
+
+ inc cur
+
+ cmp hash, hash_lim
+ je long_footer
+ cmp delta_x, [hash]
+ jne long_footer
+ movzx t0_x, BYTE PTR [diff + 1 * cur]
+ cmp [cur], t0_L
+ jne long_footer
+ cmp d, d_lim
+ jb long_loop
+
+long_footer:
+ sub cp_r, son
+ shr cp_r, 3
+ add pos, cp_x
+ sub pos, cycPos_VAR
+ mov cycSize, cycSize_VAR
+
+ cmp d, d_lim
+ jae fin
+ cmp hash, hash_lim
+ jne main_loop
+ jmp fin
+
+
+
+fin_error:
+ xor d, d
+
+fin:
+ mov RSP, Old_RSP
+ mov t0, [r4 + posRes_OFFS]
+ mov [t0], pos
+ mov r0, d
+
+MY_POP_PRESERVED_ABI_REGS
+MY_ENDP
+
+_TEXT$LZFINDOPT ENDS
+
+end