Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nodejs/node.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s')
-rw-r--r--deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s2353
1 files changed, 22 insertions, 2331 deletions
diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s
index b66bd34406c..5566d587610 100644
--- a/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s
+++ b/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s
@@ -11,14 +11,6 @@ _sha256_block_data_order:
movl 8(%r11),%r11d
testl $536870912,%r11d
jnz _shaext_shortcut
- andl $296,%r11d
- cmpl $296,%r11d
- je L$avx2_shortcut
- andl $1073741824,%r9d
- andl $268435968,%r10d
- orl %r9d,%r10d
- cmpl $1342177792,%r10d
- je L$avx_shortcut
testl $512,%r10d
jnz L$ssse3_shortcut
pushq %rbx
@@ -1762,9 +1754,9 @@ _shaext_shortcut:
movdqu 16(%rdi),%xmm2
movdqa 512-128(%rcx),%xmm7
- pshufd $27,%xmm1,%xmm0
- pshufd $177,%xmm1,%xmm1
- pshufd $27,%xmm2,%xmm2
+ pshufd $0x1b,%xmm1,%xmm0
+ pshufd $0xb1,%xmm1,%xmm1
+ pshufd $0x1b,%xmm2,%xmm2
movdqa %xmm7,%xmm8
.byte 102,15,58,15,202,8
punpcklqdq %xmm0,%xmm2
@@ -1783,7 +1775,7 @@ L$oop_shaext:
.byte 102,15,56,0,231
movdqa %xmm2,%xmm10
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
nop
movdqa %xmm1,%xmm9
.byte 15,56,203,202
@@ -1792,7 +1784,7 @@ L$oop_shaext:
paddd %xmm4,%xmm0
.byte 102,15,56,0,239
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
leaq 64(%rsi),%rsi
.byte 15,56,204,220
.byte 15,56,203,202
@@ -1801,7 +1793,7 @@ L$oop_shaext:
paddd %xmm5,%xmm0
.byte 102,15,56,0,247
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
@@ -1813,7 +1805,7 @@ L$oop_shaext:
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
@@ -1824,7 +1816,7 @@ L$oop_shaext:
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
@@ -1835,7 +1827,7 @@ L$oop_shaext:
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
nop
@@ -1846,7 +1838,7 @@ L$oop_shaext:
paddd %xmm5,%xmm0
.byte 15,56,205,245
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
@@ -1857,7 +1849,7 @@ L$oop_shaext:
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
@@ -1868,7 +1860,7 @@ L$oop_shaext:
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
@@ -1879,7 +1871,7 @@ L$oop_shaext:
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
nop
@@ -1890,7 +1882,7 @@ L$oop_shaext:
paddd %xmm5,%xmm0
.byte 15,56,205,245
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm6,%xmm7
.byte 102,15,58,15,253,4
nop
@@ -1901,7 +1893,7 @@ L$oop_shaext:
paddd %xmm6,%xmm0
.byte 15,56,205,222
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm3,%xmm7
.byte 102,15,58,15,254,4
nop
@@ -1912,7 +1904,7 @@ L$oop_shaext:
paddd %xmm3,%xmm0
.byte 15,56,205,227
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm4,%xmm7
.byte 102,15,58,15,251,4
nop
@@ -1923,7 +1915,7 @@ L$oop_shaext:
paddd %xmm4,%xmm0
.byte 15,56,205,236
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
movdqa %xmm5,%xmm7
.byte 102,15,58,15,252,4
.byte 15,56,203,202
@@ -1932,7 +1924,7 @@ L$oop_shaext:
movdqa 448-128(%rcx),%xmm0
paddd %xmm5,%xmm0
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
.byte 15,56,205,245
movdqa %xmm8,%xmm7
.byte 15,56,203,202
@@ -1941,7 +1933,7 @@ L$oop_shaext:
paddd %xmm6,%xmm0
nop
.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
+ pshufd $0x0e,%xmm0,%xmm0
decq %rdx
nop
.byte 15,56,203,202
@@ -1950,9 +1942,9 @@ L$oop_shaext:
paddd %xmm9,%xmm1
jnz L$oop_shaext
- pshufd $177,%xmm2,%xmm2
- pshufd $27,%xmm1,%xmm7
- pshufd $177,%xmm1,%xmm1
+ pshufd $0xb1,%xmm2,%xmm2
+ pshufd $0x1b,%xmm1,%xmm7
+ pshufd $0xb1,%xmm1,%xmm1
punpckhqdq %xmm2,%xmm1
.byte 102,15,58,15,215,8
@@ -3054,2304 +3046,3 @@ L$ssse3_00_47:
leaq 48(%rsi),%rsp
L$epilogue_ssse3:
.byte 0xf3,0xc3
-
-
-.p2align 6
-sha256_block_data_order_avx:
-L$avx_shortcut:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- movq %rsp,%r11
- shlq $4,%rdx
- subq $96,%rsp
- leaq (%rsi,%rdx,4),%rdx
- andq $-64,%rsp
- movq %rdi,64+0(%rsp)
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
-L$prologue_avx:
-
- vzeroupper
- movl 0(%rdi),%eax
- movl 4(%rdi),%ebx
- movl 8(%rdi),%ecx
- movl 12(%rdi),%edx
- movl 16(%rdi),%r8d
- movl 20(%rdi),%r9d
- movl 24(%rdi),%r10d
- movl 28(%rdi),%r11d
- vmovdqa K256+512+32(%rip),%xmm8
- vmovdqa K256+512+64(%rip),%xmm9
- jmp L$loop_avx
-.p2align 4
-L$loop_avx:
- vmovdqa K256+512(%rip),%xmm7
- vmovdqu 0(%rsi),%xmm0
- vmovdqu 16(%rsi),%xmm1
- vmovdqu 32(%rsi),%xmm2
- vmovdqu 48(%rsi),%xmm3
- vpshufb %xmm7,%xmm0,%xmm0
- leaq K256(%rip),%rbp
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd 0(%rbp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 32(%rbp),%xmm1,%xmm5
- vpaddd 64(%rbp),%xmm2,%xmm6
- vpaddd 96(%rbp),%xmm3,%xmm7
- vmovdqa %xmm4,0(%rsp)
- movl %eax,%r14d
- vmovdqa %xmm5,16(%rsp)
- movl %ebx,%edi
- vmovdqa %xmm6,32(%rsp)
- xorl %ecx,%edi
- vmovdqa %xmm7,48(%rsp)
- movl %r8d,%r13d
- jmp L$avx_00_47
-
-.p2align 4
-L$avx_00_47:
- subq $-128,%rbp
- vpalignr $4,%xmm0,%xmm1,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- vpalignr $4,%xmm2,%xmm3,%xmm7
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
- vpsrld $3,%xmm4,%xmm7
- xorl %r10d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r15d
- vpslld $14,%xmm4,%xmm5
- addl %r12d,%r11d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%edi
- vpshufd $250,%xmm3,%xmm7
- shrdl $2,%r14d,%r14d
- addl %r11d,%edx
- addl %edi,%r11d
- vpsrld $11,%xmm6,%xmm6
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%r11d
- movl %r8d,%r12d
- shrdl $9,%r14d,%r14d
- vpslld $11,%xmm5,%xmm5
- xorl %edx,%r13d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r11d,%r14d
- andl %edx,%r12d
- xorl %edx,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 4(%rsp),%r10d
- movl %r11d,%edi
- xorl %r9d,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- shrdl $11,%r14d,%r14d
- xorl %eax,%edi
- addl %r12d,%r10d
- vpsrlq $17,%xmm7,%xmm7
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r11d,%r14d
- vpaddd %xmm4,%xmm0,%xmm0
- addl %r13d,%r10d
- xorl %eax,%r15d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %r10d,%ecx
- addl %r15d,%r10d
- movl %ecx,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- movl %edx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r13d
- vpshufb %xmm8,%xmm6,%xmm6
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- vpaddd %xmm6,%xmm0,%xmm0
- andl %ecx,%r12d
- xorl %ecx,%r13d
- addl 8(%rsp),%r9d
- vpshufd $80,%xmm0,%xmm7
- movl %r10d,%r15d
- xorl %r8d,%r12d
- shrdl $11,%r14d,%r14d
- vpsrld $10,%xmm7,%xmm6
- xorl %r11d,%r15d
- addl %r12d,%r9d
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- andl %r15d,%edi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %r11d,%edi
- shrdl $2,%r14d,%r14d
- addl %r9d,%ebx
- vpsrlq $2,%xmm7,%xmm7
- addl %edi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- vpshufb %xmm9,%xmm6,%xmm6
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r12d
- vpaddd %xmm6,%xmm0,%xmm0
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vpaddd 0(%rbp),%xmm0,%xmm6
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%edi
- xorl %edx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%edi
- addl %r12d,%r8d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- shrdl $2,%r14d,%r14d
- addl %r8d,%eax
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,0(%rsp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- vpalignr $4,%xmm3,%xmm0,%xmm7
- shrdl $9,%r14d,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
- vpsrld $3,%xmm4,%xmm7
- xorl %ecx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r15d
- vpslld $14,%xmm4,%xmm5
- addl %r12d,%edx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%edi
- vpshufd $250,%xmm0,%xmm7
- shrdl $2,%r14d,%r14d
- addl %edx,%r11d
- addl %edi,%edx
- vpsrld $11,%xmm6,%xmm6
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%edx
- movl %eax,%r12d
- shrdl $9,%r14d,%r14d
- vpslld $11,%xmm5,%xmm5
- xorl %r11d,%r13d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %edx,%r14d
- andl %r11d,%r12d
- xorl %r11d,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 20(%rsp),%ecx
- movl %edx,%edi
- xorl %ebx,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- shrdl $11,%r14d,%r14d
- xorl %r8d,%edi
- addl %r12d,%ecx
- vpsrlq $17,%xmm7,%xmm7
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %edx,%r14d
- vpaddd %xmm4,%xmm1,%xmm1
- addl %r13d,%ecx
- xorl %r8d,%r15d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %ecx,%r10d
- addl %r15d,%ecx
- movl %r10d,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- movl %r11d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r13d
- vpshufb %xmm8,%xmm6,%xmm6
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- vpaddd %xmm6,%xmm1,%xmm1
- andl %r10d,%r12d
- xorl %r10d,%r13d
- addl 24(%rsp),%ebx
- vpshufd $80,%xmm1,%xmm7
- movl %ecx,%r15d
- xorl %eax,%r12d
- shrdl $11,%r14d,%r14d
- vpsrld $10,%xmm7,%xmm6
- xorl %edx,%r15d
- addl %r12d,%ebx
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- andl %r15d,%edi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- vpxor %xmm7,%xmm6,%xmm6
- xorl %edx,%edi
- shrdl $2,%r14d,%r14d
- addl %ebx,%r9d
- vpsrlq $2,%xmm7,%xmm7
- addl %edi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- vpshufb %xmm9,%xmm6,%xmm6
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r12d
- vpaddd %xmm6,%xmm1,%xmm1
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpaddd 32(%rbp),%xmm1,%xmm6
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%edi
- xorl %r11d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%edi
- addl %r12d,%eax
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- shrdl $2,%r14d,%r14d
- addl %eax,%r8d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,16(%rsp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- vpalignr $4,%xmm0,%xmm1,%xmm7
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
- vpsrld $3,%xmm4,%xmm7
- xorl %r10d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r15d
- vpslld $14,%xmm4,%xmm5
- addl %r12d,%r11d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%edi
- vpshufd $250,%xmm1,%xmm7
- shrdl $2,%r14d,%r14d
- addl %r11d,%edx
- addl %edi,%r11d
- vpsrld $11,%xmm6,%xmm6
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%r11d
- movl %r8d,%r12d
- shrdl $9,%r14d,%r14d
- vpslld $11,%xmm5,%xmm5
- xorl %edx,%r13d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r11d,%r14d
- andl %edx,%r12d
- xorl %edx,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 36(%rsp),%r10d
- movl %r11d,%edi
- xorl %r9d,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- shrdl $11,%r14d,%r14d
- xorl %eax,%edi
- addl %r12d,%r10d
- vpsrlq $17,%xmm7,%xmm7
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r11d,%r14d
- vpaddd %xmm4,%xmm2,%xmm2
- addl %r13d,%r10d
- xorl %eax,%r15d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %r10d,%ecx
- addl %r15d,%r10d
- movl %ecx,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- movl %edx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r13d
- vpshufb %xmm8,%xmm6,%xmm6
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- vpaddd %xmm6,%xmm2,%xmm2
- andl %ecx,%r12d
- xorl %ecx,%r13d
- addl 40(%rsp),%r9d
- vpshufd $80,%xmm2,%xmm7
- movl %r10d,%r15d
- xorl %r8d,%r12d
- shrdl $11,%r14d,%r14d
- vpsrld $10,%xmm7,%xmm6
- xorl %r11d,%r15d
- addl %r12d,%r9d
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- andl %r15d,%edi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %r11d,%edi
- shrdl $2,%r14d,%r14d
- addl %r9d,%ebx
- vpsrlq $2,%xmm7,%xmm7
- addl %edi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- vpshufb %xmm9,%xmm6,%xmm6
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r12d
- vpaddd %xmm6,%xmm2,%xmm2
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vpaddd 64(%rbp),%xmm2,%xmm6
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%edi
- xorl %edx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%edi
- addl %r12d,%r8d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- shrdl $2,%r14d,%r14d
- addl %r8d,%eax
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,32(%rsp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- vpalignr $4,%xmm1,%xmm2,%xmm7
- shrdl $9,%r14d,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
- vpsrld $3,%xmm4,%xmm7
- xorl %ecx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r15d
- vpslld $14,%xmm4,%xmm5
- addl %r12d,%edx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%edi
- vpshufd $250,%xmm2,%xmm7
- shrdl $2,%r14d,%r14d
- addl %edx,%r11d
- addl %edi,%edx
- vpsrld $11,%xmm6,%xmm6
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%edx
- movl %eax,%r12d
- shrdl $9,%r14d,%r14d
- vpslld $11,%xmm5,%xmm5
- xorl %r11d,%r13d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %edx,%r14d
- andl %r11d,%r12d
- xorl %r11d,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 52(%rsp),%ecx
- movl %edx,%edi
- xorl %ebx,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- shrdl $11,%r14d,%r14d
- xorl %r8d,%edi
- addl %r12d,%ecx
- vpsrlq $17,%xmm7,%xmm7
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %edx,%r14d
- vpaddd %xmm4,%xmm3,%xmm3
- addl %r13d,%ecx
- xorl %r8d,%r15d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %ecx,%r10d
- addl %r15d,%ecx
- movl %r10d,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- movl %r11d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r13d
- vpshufb %xmm8,%xmm6,%xmm6
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- vpaddd %xmm6,%xmm3,%xmm3
- andl %r10d,%r12d
- xorl %r10d,%r13d
- addl 56(%rsp),%ebx
- vpshufd $80,%xmm3,%xmm7
- movl %ecx,%r15d
- xorl %eax,%r12d
- shrdl $11,%r14d,%r14d
- vpsrld $10,%xmm7,%xmm6
- xorl %edx,%r15d
- addl %r12d,%ebx
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- andl %r15d,%edi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- vpxor %xmm7,%xmm6,%xmm6
- xorl %edx,%edi
- shrdl $2,%r14d,%r14d
- addl %ebx,%r9d
- vpsrlq $2,%xmm7,%xmm7
- addl %edi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- vpshufb %xmm9,%xmm6,%xmm6
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r12d
- vpaddd %xmm6,%xmm3,%xmm3
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpaddd 96(%rbp),%xmm3,%xmm6
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%edi
- xorl %r11d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%edi
- addl %r12d,%eax
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- shrdl $2,%r14d,%r14d
- addl %eax,%r8d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,48(%rsp)
- cmpb $0,131(%rbp)
- jne L$avx_00_47
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
- xorl %r10d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r15d
- addl %r12d,%r11d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%edi
- shrdl $2,%r14d,%r14d
- addl %r11d,%edx
- addl %edi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- xorl %edx,%r13d
- addl 4(%rsp),%r10d
- movl %r11d,%edi
- xorl %r9d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %eax,%edi
- addl %r12d,%r10d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- shrdl $2,%r14d,%r14d
- addl %r10d,%ecx
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- xorl %ecx,%r13d
- addl 8(%rsp),%r9d
- movl %r10d,%r15d
- xorl %r8d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r15d
- addl %r12d,%r9d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%edi
- shrdl $2,%r14d,%r14d
- addl %r9d,%ebx
- addl %edi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%edi
- xorl %edx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%edi
- addl %r12d,%r8d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- shrdl $2,%r14d,%r14d
- addl %r8d,%eax
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
- xorl %ecx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r15d
- addl %r12d,%edx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%edi
- shrdl $2,%r14d,%r14d
- addl %edx,%r11d
- addl %edi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- xorl %r11d,%r13d
- addl 20(%rsp),%ecx
- movl %edx,%edi
- xorl %ebx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%edi
- addl %r12d,%ecx
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- shrdl $2,%r14d,%r14d
- addl %ecx,%r10d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- xorl %r10d,%r13d
- addl 24(%rsp),%ebx
- movl %ecx,%r15d
- xorl %eax,%r12d
- shrdl $11,%r14d,%r14d
- xorl %edx,%r15d
- addl %r12d,%ebx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%edi
- shrdl $2,%r14d,%r14d
- addl %ebx,%r9d
- addl %edi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%edi
- xorl %r11d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%edi
- addl %r12d,%eax
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- shrdl $2,%r14d,%r14d
- addl %eax,%r8d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r13d
- xorl %r10d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
- xorl %r10d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r15d
- addl %r12d,%r11d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%edi
- shrdl $2,%r14d,%r14d
- addl %r11d,%edx
- addl %edi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r13d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- xorl %edx,%r13d
- addl 36(%rsp),%r10d
- movl %r11d,%edi
- xorl %r9d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %eax,%edi
- addl %r12d,%r10d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- shrdl $2,%r14d,%r14d
- addl %r10d,%ecx
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r13d
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- xorl %ecx,%r13d
- addl 40(%rsp),%r9d
- movl %r10d,%r15d
- xorl %r8d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r15d
- addl %r12d,%r9d
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%edi
- shrdl $2,%r14d,%r14d
- addl %r9d,%ebx
- addl %edi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r13d
- xorl %edx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%edi
- xorl %edx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%edi
- addl %r12d,%r8d
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- shrdl $2,%r14d,%r14d
- addl %r8d,%eax
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- shrdl $9,%r14d,%r14d
- xorl %eax,%r13d
- xorl %ecx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
- xorl %ecx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r15d
- addl %r12d,%edx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%edi
- shrdl $2,%r14d,%r14d
- addl %edx,%r11d
- addl %edi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r13d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- xorl %r11d,%r13d
- addl 52(%rsp),%ecx
- movl %edx,%edi
- xorl %ebx,%r12d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%edi
- addl %r12d,%ecx
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- shrdl $2,%r14d,%r14d
- addl %ecx,%r10d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r13d
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- xorl %r10d,%r13d
- addl 56(%rsp),%ebx
- movl %ecx,%r15d
- xorl %eax,%r12d
- shrdl $11,%r14d,%r14d
- xorl %edx,%r15d
- addl %r12d,%ebx
- shrdl $6,%r13d,%r13d
- andl %r15d,%edi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%edi
- shrdl $2,%r14d,%r14d
- addl %ebx,%r9d
- addl %edi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r13d
- xorl %r11d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%edi
- xorl %r11d,%r12d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%edi
- addl %r12d,%eax
- shrdl $6,%r13d,%r13d
- andl %edi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- shrdl $2,%r14d,%r14d
- addl %eax,%r8d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- movq 64+0(%rsp),%rdi
- movl %r14d,%eax
-
- addl 0(%rdi),%eax
- leaq 64(%rsi),%rsi
- addl 4(%rdi),%ebx
- addl 8(%rdi),%ecx
- addl 12(%rdi),%edx
- addl 16(%rdi),%r8d
- addl 20(%rdi),%r9d
- addl 24(%rdi),%r10d
- addl 28(%rdi),%r11d
-
- cmpq 64+16(%rsp),%rsi
-
- movl %eax,0(%rdi)
- movl %ebx,4(%rdi)
- movl %ecx,8(%rdi)
- movl %edx,12(%rdi)
- movl %r8d,16(%rdi)
- movl %r9d,20(%rdi)
- movl %r10d,24(%rdi)
- movl %r11d,28(%rdi)
- jb L$loop_avx
-
- movq 64+24(%rsp),%rsi
- vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
-L$epilogue_avx:
- .byte 0xf3,0xc3
-
-
-.p2align 6
-sha256_block_data_order_avx2:
-L$avx2_shortcut:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- movq %rsp,%r11
- subq $544,%rsp
- shlq $4,%rdx
- andq $-1024,%rsp
- leaq (%rsi,%rdx,4),%rdx
- addq $448,%rsp
- movq %rdi,64+0(%rsp)
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
-L$prologue_avx2:
-
- vzeroupper
- subq $-64,%rsi
- movl 0(%rdi),%eax
- movq %rsi,%r12
- movl 4(%rdi),%ebx
- cmpq %rdx,%rsi
- movl 8(%rdi),%ecx
- cmoveq %rsp,%r12
- movl 12(%rdi),%edx
- movl 16(%rdi),%r8d
- movl 20(%rdi),%r9d
- movl 24(%rdi),%r10d
- movl 28(%rdi),%r11d
- vmovdqa K256+512+32(%rip),%ymm8
- vmovdqa K256+512+64(%rip),%ymm9
- jmp L$oop_avx2
-.p2align 4
-L$oop_avx2:
- vmovdqa K256+512(%rip),%ymm7
- vmovdqu -64+0(%rsi),%xmm0
- vmovdqu -64+16(%rsi),%xmm1
- vmovdqu -64+32(%rsi),%xmm2
- vmovdqu -64+48(%rsi),%xmm3
-
- vinserti128 $1,(%r12),%ymm0,%ymm0
- vinserti128 $1,16(%r12),%ymm1,%ymm1
- vpshufb %ymm7,%ymm0,%ymm0
- vinserti128 $1,32(%r12),%ymm2,%ymm2
- vpshufb %ymm7,%ymm1,%ymm1
- vinserti128 $1,48(%r12),%ymm3,%ymm3
-
- leaq K256(%rip),%rbp
- vpshufb %ymm7,%ymm2,%ymm2
- vpaddd 0(%rbp),%ymm0,%ymm4
- vpshufb %ymm7,%ymm3,%ymm3
- vpaddd 32(%rbp),%ymm1,%ymm5
- vpaddd 64(%rbp),%ymm2,%ymm6
- vpaddd 96(%rbp),%ymm3,%ymm7
- vmovdqa %ymm4,0(%rsp)
- xorl %r14d,%r14d
- vmovdqa %ymm5,32(%rsp)
- leaq -64(%rsp),%rsp
- movl %ebx,%edi
- vmovdqa %ymm6,0(%rsp)
- xorl %ecx,%edi
- vmovdqa %ymm7,32(%rsp)
- movl %r9d,%r12d
- subq $-32*4,%rbp
- jmp L$avx2_00_47
-
-.p2align 4
-L$avx2_00_47:
- leaq -64(%rsp),%rsp
- vpalignr $4,%ymm0,%ymm1,%ymm4
- addl 0+128(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- vpalignr $4,%ymm2,%ymm3,%ymm7
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- vpsrld $7,%ymm4,%ymm6
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- vpaddd %ymm7,%ymm0,%ymm0
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %ebx,%edi
- vpshufd $250,%ymm3,%ymm7
- xorl %r13d,%r14d
- leal (%r11,%rdi,1),%r11d
- movl %r8d,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 4+128(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%edx,%edi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- vpslld $11,%ymm5,%ymm5
- andnl %r9d,%edx,%r12d
- xorl %edi,%r13d
- rorxl $6,%edx,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%edi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%edi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- vpsrlq $17,%ymm7,%ymm7
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %eax,%r15d
- vpaddd %ymm4,%ymm0,%ymm0
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 8+128(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- vpshufb %ymm8,%ymm6,%ymm6
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- vpaddd %ymm6,%ymm0,%ymm0
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- vpshufd $80,%ymm0,%ymm7
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- vpsrld $10,%ymm7,%ymm6
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r11d,%edi
- vpsrlq $17,%ymm7,%ymm7
- xorl %r13d,%r14d
- leal (%r9,%rdi,1),%r9d
- movl %ecx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 12+128(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ebx,%edi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %edx,%ebx,%r12d
- xorl %edi,%r13d
- rorxl $6,%ebx,%r14d
- vpshufb %ymm9,%ymm6,%ymm6
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%edi
- vpaddd %ymm6,%ymm0,%ymm0
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%edi
- vpaddd 0(%rbp),%ymm0,%ymm6
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- vmovdqa %ymm6,0(%rsp)
- vpalignr $4,%ymm1,%ymm2,%ymm4
- addl 32+128(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- vpalignr $4,%ymm3,%ymm0,%ymm7
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- vpsrld $7,%ymm4,%ymm6
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- vpaddd %ymm7,%ymm1,%ymm1
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r9d,%edi
- vpshufd $250,%ymm0,%ymm7
- xorl %r13d,%r14d
- leal (%rdx,%rdi,1),%edx
- movl %eax,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 36+128(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%r11d,%edi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- vpslld $11,%ymm5,%ymm5
- andnl %ebx,%r11d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r11d,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%edi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%edi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- vpsrlq $17,%ymm7,%ymm7
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- vpaddd %ymm4,%ymm1,%ymm1
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 40+128(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- vpxor %ymm7,%ymm6,%ymm6
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- vpshufb %ymm8,%ymm6,%ymm6
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- vpaddd %ymm6,%ymm1,%ymm1
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- vpshufd $80,%ymm1,%ymm7
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- vpsrld $10,%ymm7,%ymm6
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %edx,%edi
- vpsrlq $17,%ymm7,%ymm7
- xorl %r13d,%r14d
- leal (%rbx,%rdi,1),%ebx
- movl %r10d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 44+128(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r9d,%edi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r11d,%r9d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r9d,%r14d
- vpshufb %ymm9,%ymm6,%ymm6
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%edi
- vpaddd %ymm6,%ymm1,%ymm1
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%edi
- vpaddd 32(%rbp),%ymm1,%ymm6
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vmovdqa %ymm6,32(%rsp)
- leaq -64(%rsp),%rsp
- vpalignr $4,%ymm2,%ymm3,%ymm4
- addl 0+128(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- vpalignr $4,%ymm0,%ymm1,%ymm7
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- vpsrld $7,%ymm4,%ymm6
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- vpaddd %ymm7,%ymm2,%ymm2
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %ebx,%edi
- vpshufd $250,%ymm1,%ymm7
- xorl %r13d,%r14d
- leal (%r11,%rdi,1),%r11d
- movl %r8d,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 4+128(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%edx,%edi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- vpslld $11,%ymm5,%ymm5
- andnl %r9d,%edx,%r12d
- xorl %edi,%r13d
- rorxl $6,%edx,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%edi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%edi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- vpsrlq $17,%ymm7,%ymm7
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %eax,%r15d
- vpaddd %ymm4,%ymm2,%ymm2
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 8+128(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- vpshufb %ymm8,%ymm6,%ymm6
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- vpaddd %ymm6,%ymm2,%ymm2
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- vpshufd $80,%ymm2,%ymm7
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- vpsrld $10,%ymm7,%ymm6
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r11d,%edi
- vpsrlq $17,%ymm7,%ymm7
- xorl %r13d,%r14d
- leal (%r9,%rdi,1),%r9d
- movl %ecx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 12+128(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ebx,%edi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %edx,%ebx,%r12d
- xorl %edi,%r13d
- rorxl $6,%ebx,%r14d
- vpshufb %ymm9,%ymm6,%ymm6
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%edi
- vpaddd %ymm6,%ymm2,%ymm2
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%edi
- vpaddd 64(%rbp),%ymm2,%ymm6
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- vmovdqa %ymm6,0(%rsp)
- vpalignr $4,%ymm3,%ymm0,%ymm4
- addl 32+128(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- vpalignr $4,%ymm1,%ymm2,%ymm7
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- vpsrld $7,%ymm4,%ymm6
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- vpaddd %ymm7,%ymm3,%ymm3
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r9d,%edi
- vpshufd $250,%ymm2,%ymm7
- xorl %r13d,%r14d
- leal (%rdx,%rdi,1),%edx
- movl %eax,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 36+128(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%r11d,%edi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- vpslld $11,%ymm5,%ymm5
- andnl %ebx,%r11d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r11d,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%edi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%edi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- vpsrlq $17,%ymm7,%ymm7
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- vpaddd %ymm4,%ymm3,%ymm3
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 40+128(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- vpxor %ymm7,%ymm6,%ymm6
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- vpshufb %ymm8,%ymm6,%ymm6
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- vpaddd %ymm6,%ymm3,%ymm3
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- vpshufd $80,%ymm3,%ymm7
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- vpsrld $10,%ymm7,%ymm6
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %edx,%edi
- vpsrlq $17,%ymm7,%ymm7
- xorl %r13d,%r14d
- leal (%rbx,%rdi,1),%ebx
- movl %r10d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 44+128(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r9d,%edi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r11d,%r9d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r9d,%r14d
- vpshufb %ymm9,%ymm6,%ymm6
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%edi
- vpaddd %ymm6,%ymm3,%ymm3
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%edi
- vpaddd 96(%rbp),%ymm3,%ymm6
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vmovdqa %ymm6,32(%rsp)
- leaq 128(%rbp),%rbp
- cmpb $0,3(%rbp)
- jne L$avx2_00_47
- addl 0+64(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %ebx,%edi
- xorl %r13d,%r14d
- leal (%r11,%rdi,1),%r11d
- movl %r8d,%r12d
- addl 4+64(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%edi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %edi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%edi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%edi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8+64(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r11d,%edi
- xorl %r13d,%r14d
- leal (%r9,%rdi,1),%r9d
- movl %ecx,%r12d
- addl 12+64(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%edi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %edi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%edi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%edi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32+64(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r9d,%edi
- xorl %r13d,%r14d
- leal (%rdx,%rdi,1),%edx
- movl %eax,%r12d
- addl 36+64(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%edi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%edi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%edi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40+64(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %edx,%edi
- xorl %r13d,%r14d
- leal (%rbx,%rdi,1),%ebx
- movl %r10d,%r12d
- addl 44+64(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%edi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%edi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%edi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- addl 0(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %ebx,%edi
- xorl %r13d,%r14d
- leal (%r11,%rdi,1),%r11d
- movl %r8d,%r12d
- addl 4(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%edi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %edi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%edi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%edi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r11d,%edi
- xorl %r13d,%r14d
- leal (%r9,%rdi,1),%r9d
- movl %ecx,%r12d
- addl 12(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%edi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %edi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%edi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%edi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r9d,%edi
- xorl %r13d,%r14d
- leal (%rdx,%rdi,1),%edx
- movl %eax,%r12d
- addl 36(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%edi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%edi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%edi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %edx,%edi
- xorl %r13d,%r14d
- leal (%rbx,%rdi,1),%ebx
- movl %r10d,%r12d
- addl 44(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%edi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%edi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%edi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- movq 512(%rsp),%rdi
- addl %r14d,%eax
-
- leaq 448(%rsp),%rbp
-
- addl 0(%rdi),%eax
- addl 4(%rdi),%ebx
- addl 8(%rdi),%ecx
- addl 12(%rdi),%edx
- addl 16(%rdi),%r8d
- addl 20(%rdi),%r9d
- addl 24(%rdi),%r10d
- addl 28(%rdi),%r11d
-
- movl %eax,0(%rdi)
- movl %ebx,4(%rdi)
- movl %ecx,8(%rdi)
- movl %edx,12(%rdi)
- movl %r8d,16(%rdi)
- movl %r9d,20(%rdi)
- movl %r10d,24(%rdi)
- movl %r11d,28(%rdi)
-
- cmpq 80(%rbp),%rsi
- je L$done_avx2
-
- xorl %r14d,%r14d
- movl %ebx,%edi
- xorl %ecx,%edi
- movl %r9d,%r12d
- jmp L$ower_avx2
-.p2align 4
-L$ower_avx2:
- addl 0+16(%rbp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %ebx,%edi
- xorl %r13d,%r14d
- leal (%r11,%rdi,1),%r11d
- movl %r8d,%r12d
- addl 4+16(%rbp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%edi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %edi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%edi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%edi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8+16(%rbp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r11d,%edi
- xorl %r13d,%r14d
- leal (%r9,%rdi,1),%r9d
- movl %ecx,%r12d
- addl 12+16(%rbp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%edi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %edi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%edi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%edi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32+16(%rbp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %r9d,%edi
- xorl %r13d,%r14d
- leal (%rdx,%rdi,1),%edx
- movl %eax,%r12d
- addl 36+16(%rbp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%edi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%edi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%edi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40+16(%rbp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%edi
- xorl %r12d,%r14d
- xorl %edx,%edi
- xorl %r13d,%r14d
- leal (%rbx,%rdi,1),%ebx
- movl %r10d,%r12d
- addl 44+16(%rbp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%edi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %edi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%edi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%edi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %edi,%r15d
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- leaq -64(%rbp),%rbp
- cmpq %rsp,%rbp
- jae L$ower_avx2
-
- movq 512(%rsp),%rdi
- addl %r14d,%eax
-
- leaq 448(%rsp),%rsp
-
- addl 0(%rdi),%eax
- addl 4(%rdi),%ebx
- addl 8(%rdi),%ecx
- addl 12(%rdi),%edx
- addl 16(%rdi),%r8d
- addl 20(%rdi),%r9d
- leaq 128(%rsi),%rsi
- addl 24(%rdi),%r10d
- movq %rsi,%r12
- addl 28(%rdi),%r11d
- cmpq 64+16(%rsp),%rsi
-
- movl %eax,0(%rdi)
- cmoveq %rsp,%r12
- movl %ebx,4(%rdi)
- movl %ecx,8(%rdi)
- movl %edx,12(%rdi)
- movl %r8d,16(%rdi)
- movl %r9d,20(%rdi)
- movl %r10d,24(%rdi)
- movl %r11d,28(%rdi)
-
- jbe L$oop_avx2
- leaq (%rsp),%rbp
-
-L$done_avx2:
- leaq (%rbp),%rsp
- movq 64+24(%rsp),%rsi
- vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
-L$epilogue_avx2:
- .byte 0xf3,0xc3