Welcome to mirror list, hosted at ThFree Co, Russian Federation.

checkasm.asm « x86 « checkasm « tests - github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 8f19ef97f7121b3bc8482a76b32b2c301dfe80f6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
; Copyright © 2018, VideoLAN and dav1d authors
; Copyright © 2018, Two Orioles, LLC
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
;
; 1. Redistributions of source code must retain the above copyright notice, this
;    list of conditions and the following disclaimer.
;
; 2. Redistributions in binary form must reproduce the above copyright notice,
;    this list of conditions and the following disclaimer in the documentation
;    and/or other materials provided with the distribution.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

%include "config.asm"
%undef private_prefix
%define private_prefix checkasm
%include "ext/x86/x86inc.asm"

SECTION_RODATA 16

%if ARCH_X86_64
; just random numbers to reduce the chance of incidental match
%if WIN64
x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
n7:  dq 0x21f86d66c8ca00ce
n8:  dq 0x75b6ba21077c48ad
%endif
n9:  dq 0xed56bb2dcb3c7736
n10: dq 0x8bda43d3fd1a7e06
n11: dq 0xb64a9c9e5d318408
n12: dq 0xdf9a54b303f1d3a3
n13: dq 0x4a75479abd64e097
n14: dq 0x249214109d5d1c88
%endif

errmsg_stack: db "stack corruption", 0
errmsg_register: db "failed to preserve register:%s", 0
errmsg_vzeroupper: db "missing vzeroupper", 0

SECTION .bss

check_vzeroupper: resd 1

SECTION .text

cextern fail_func

; max number of args used by any asm function.
; (max_args % 4) must equal 3 for stack alignment
%define max_args 15

%if UNIX64
    DECLARE_REG_TMP 0
%else
    DECLARE_REG_TMP 4
%endif

;-----------------------------------------------------------------------------
; unsigned checkasm_init_x86(char *name)
;-----------------------------------------------------------------------------
cglobal init_x86, 0, 5
%if ARCH_X86_64
    push          rbx
%endif
    movifnidn      t0, r0mp
    mov           eax, 0x80000000
    cpuid
    cmp           eax, 0x80000004
    jb .no_brand ; processor brand string not supported
    mov           eax, 0x80000002
    cpuid
    mov     [t0+4* 0], eax
    mov     [t0+4* 1], ebx
    mov     [t0+4* 2], ecx
    mov     [t0+4* 3], edx
    mov           eax, 0x80000003
    cpuid
    mov     [t0+4* 4], eax
    mov     [t0+4* 5], ebx
    mov     [t0+4* 6], ecx
    mov     [t0+4* 7], edx
    mov           eax, 0x80000004
    cpuid
    mov     [t0+4* 8], eax
    mov     [t0+4* 9], ebx
    mov     [t0+4*10], ecx
    mov     [t0+4*11], edx
    xor           eax, eax
    cpuid
    jmp .check_xcr1
.no_brand: ; use manufacturer id as a fallback
    xor           eax, eax
    mov      [t0+4*3], eax
    cpuid
    mov      [t0+4*0], ebx
    mov      [t0+4*1], edx
    mov      [t0+4*2], ecx
.check_xcr1:
    test          eax, eax
    jz .end2 ; cpuid leaf 1 not supported
    mov           t0d, eax ; max leaf
    mov           eax, 1
    cpuid
    and           ecx, 0x18000000
    cmp           ecx, 0x18000000
    jne .end2 ; osxsave/avx not supported
    cmp           t0d, 13 ; cpuid leaf 13 not supported
    jb .end2
    mov           t0d, eax ; cpuid signature
    mov           eax, 13
    mov           ecx, 1
    cpuid
    test           al, 0x04
    jz .end ; xcr1 not supported
    mov           ecx, 1
    xgetbv
    test           al, 0x04
    jnz .end ; always-dirty ymm state
%if ARCH_X86_64 == 0 && PIC
    LEA           eax, check_vzeroupper
    mov         [eax], ecx
%else
    mov [check_vzeroupper], ecx
%endif
.end:
    mov           eax, t0d
.end2:
%if ARCH_X86_64
    pop           rbx
%endif
    RET

%if ARCH_X86_64
%if WIN64
    %define stack_param rsp+32 ; shadow space
    %define num_fn_args rsp+stack_offset+17*8
    %assign num_reg_args 4
    %assign free_regs 7
    %assign clobber_mask_stack_bit 16
    DECLARE_REG_TMP 4
%else
    %define stack_param rsp
    %define num_fn_args rsp+stack_offset+11*8
    %assign num_reg_args 6
    %assign free_regs 9
    %assign clobber_mask_stack_bit 64
    DECLARE_REG_TMP 7
%endif

%macro CLOBBER_UPPER 2 ; reg, mask_bit
    mov          r13d, %1d
    or            r13, r8
    test          r9b, %2
    cmovnz         %1, r13
%endmacro

cglobal checked_call, 2, 15, 16, max_args*8+64+8
    mov          r10d, [num_fn_args]
    mov            r8, 0xdeadbeef00000000
    mov           r9d, [num_fn_args+r10*8+8] ; clobber_mask
    mov            t0, [num_fn_args+r10*8]   ; func

    ; Clobber the upper halves of 32-bit parameters
    CLOBBER_UPPER  r0, 1
    CLOBBER_UPPER  r1, 2
    CLOBBER_UPPER  r2, 4
    CLOBBER_UPPER  r3, 8
%if UNIX64
    CLOBBER_UPPER  r4, 16
    CLOBBER_UPPER  r5, 32
%else ; WIN64
%assign i 6
%rep 16-6
    mova       m %+ i, [x %+ i]
    %assign i i+1
%endrep
%endif

    xor          r11d, r11d
    sub          r10d, num_reg_args
    cmovs        r10d, r11d ; num stack args

    ; write stack canaries to the area above parameters passed on the stack
    mov           r12, [rsp+stack_offset] ; return address
    not           r12
%assign i 0
%rep 8 ; 64 bytes
    mov [stack_param+(r10+i)*8], r12
    %assign i i+1
%endrep

    test         r10d, r10d
    jz .stack_setup_done ; no stack parameters
.copy_stack_parameter:
    mov           r12, [stack_param+stack_offset+8+r11*8]
    CLOBBER_UPPER r12, clobber_mask_stack_bit
    shr           r9d, 1
    mov [stack_param+r11*8], r12
    inc          r11d
    cmp          r11d, r10d
    jl .copy_stack_parameter
.stack_setup_done:

%assign i 14
%rep 15-free_regs
    mov        r %+ i, [n %+ i]
    %assign i i-1
%endrep
    call           t0

    ; check for stack corruption
    mov           r0d, [num_fn_args]
    xor           r3d, r3d
    sub           r0d, num_reg_args
    cmovs         r0d, r3d ; num stack args

    mov            r3, [rsp+stack_offset]
    mov            r4, [stack_param+r0*8]
    not            r3
    xor            r4, r3
%assign i 1
%rep 6
    mov            r5, [stack_param+(r0+i)*8]
    xor            r5, r3
    or             r4, r5
    %assign i i+1
%endrep
    xor            r3, [stack_param+(r0+7)*8]
    or             r4, r3
    jz .stack_ok
    ; Save the return value located in rdx:rax first to prevent clobbering.
    mov           r10, rax
    mov           r11, rdx
    lea            r0, [errmsg_stack]
    jmp .fail
.stack_ok:

    ; check for failure to preserve registers
%assign i 14
%rep 15-free_regs
    cmp        r %+ i, [n %+ i]
    setne         r4b
    lea           r3d, [r4+r3*2]
    %assign i i-1
%endrep
%if WIN64
    lea            r0, [rsp+32] ; account for shadow space
    mov            r5, r0
    test          r3d, r3d
    jz .gpr_ok
%else
    test          r3d, r3d
    jz .gpr_xmm_ok
    mov            r0, rsp
%endif
%assign i free_regs
%rep 15-free_regs
%if i < 10
    mov    dword [r0], " r0" + (i << 16)
    lea            r4, [r0+3]
%else
    mov    dword [r0], " r10" + ((i - 10) << 24)
    lea            r4, [r0+4]
%endif
    test          r3b, 1 << (i - free_regs)
    cmovnz         r0, r4
    %assign i i+1
%endrep
%if WIN64 ; xmm registers
.gpr_ok:
%assign i 6
%rep 16-6
    pxor       m %+ i, [x %+ i]
    %assign i i+1
%endrep
    packsswb       m6, m7
    packsswb       m8, m9
    packsswb      m10, m11
    packsswb      m12, m13
    packsswb      m14, m15
    packsswb       m6, m6
    packsswb       m8, m10
    packsswb      m12, m14
    packsswb       m6, m6
    packsswb       m8, m12
    packsswb       m6, m8
    pxor           m7, m7
    pcmpeqb        m6, m7
    pmovmskb      r3d, m6
    cmp           r3d, 0xffff
    je .xmm_ok
    mov           r7d, " xmm"
%assign i 6
%rep 16-6
    mov        [r0+0], r7d
%if i < 10
    mov   byte [r0+4], "0" + i
    lea            r4, [r0+5]
%else
    mov   word [r0+4], "10" + ((i - 10) << 8)
    lea            r4, [r0+6]
%endif
    test          r3d, 1 << i
    cmovz          r0, r4
    %assign i i+1
%endrep
.xmm_ok:
    cmp            r0, r5
    je .gpr_xmm_ok
    mov     byte [r0], 0
    mov           r11, rdx
    mov            r1, r5
%else
    mov     byte [r0], 0
    mov           r11, rdx
    mov            r1, rsp
%endif
    mov           r10, rax
    lea            r0, [errmsg_register]
    jmp .fail
.gpr_xmm_ok:
    ; Check for dirty YMM state, i.e. missing vzeroupper
    mov           ecx, [check_vzeroupper]
    test          ecx, ecx
    jz .ok ; not supported, skip
    mov           r10, rax
    mov           r11, rdx
    xgetbv
    test           al, 0x04
    jz .restore_retval ; clean ymm state
    lea            r0, [errmsg_vzeroupper]
    vzeroupper
.fail:
    ; Call fail_func() with a descriptive message to mark it as a failure.
    xor           eax, eax
    call fail_func
.restore_retval:
    mov           rax, r10
    mov           rdx, r11
.ok:
    RET

; trigger a warmup of vector units
%macro WARMUP 0
cglobal warmup, 0, 0
    xorps          m0, m0
    mulps          m0, m0
    RET
%endmacro

INIT_YMM avx2
WARMUP
INIT_ZMM avx512
WARMUP

%else

; just random numbers to reduce the chance of incidental match
%assign n3 0x6549315c
%assign n4 0xe02f3e23
%assign n5 0xb78d0d1d
%assign n6 0x33627ba7

;-----------------------------------------------------------------------------
; void checkasm_checked_call(void *func, ...)
;-----------------------------------------------------------------------------
cglobal checked_call, 1, 7
    mov            r3, [esp+stack_offset]      ; return address
    mov            r1, [esp+stack_offset+17*4] ; num_stack_params
    mov            r2, 27
    not            r3
    sub            r2, r1
.push_canary:
    push           r3
    dec            r2
    jg .push_canary
.push_parameter:
    push dword [esp+32*4]
    dec            r1
    jg .push_parameter
    mov            r3, n3
    mov            r4, n4
    mov            r5, n5
    mov            r6, n6
    call           r0

    ; check for failure to preserve registers
    cmp            r3, n3
    setne         r3h
    cmp            r4, n4
    setne         r3b
    shl           r3d, 16
    cmp            r5, n5
    setne         r3h
    cmp            r6, n6
    setne         r3b
    test           r3, r3
    jz .gpr_ok
    lea            r1, [esp+16]
    mov       [esp+4], r1
%assign i 3
%rep 4
    mov    dword [r1], " r0" + (i << 16)
    lea            r4, [r1+3]
    test           r3, 1 << ((6 - i) * 8)
    cmovnz         r1, r4
    %assign i i+1
%endrep
    mov     byte [r1], 0
    mov            r5, eax
    mov            r6, edx
    LEA            r1, errmsg_register
    jmp .fail
.gpr_ok:
    ; check for stack corruption
    mov            r3, [esp+48*4] ; num_stack_params
    mov            r6, [esp+31*4] ; return address
    mov            r4, [esp+r3*4]
    sub            r3, 26
    not            r6
    xor            r4, r6
.check_canary:
    mov            r5, [esp+(r3+27)*4]
    xor            r5, r6
    or             r4, r5
    inc            r3
    jl .check_canary
    mov            r5, eax
    mov            r6, edx
    test           r4, r4
    jz .stack_ok
    LEA            r1, errmsg_stack
    jmp .fail
.stack_ok:
    ; check for dirty YMM state, i.e. missing vzeroupper
    LEA           ecx, check_vzeroupper
    mov           ecx, [ecx]
    test          ecx, ecx
    jz .ok ; not supported, skip
    xgetbv
    test           al, 0x04
    jz .ok ; clean ymm state
    LEA            r1, errmsg_vzeroupper
    vzeroupper
.fail:
    mov         [esp], r1
    call fail_func
.ok:
    add           esp, 27*4
    mov           eax, r5
    mov           edx, r6
    RET

%endif ; ARCH_X86_64