diff options
author | Janne Grunau <janne-vlc@jannau.net> | 2018-09-29 14:57:29 +0300 |
---|---|---|
committer | Henrik Gramner <gramner@twoorioles.com> | 2018-09-29 18:22:24 +0300 |
commit | 35e777f3780327ea055339408bf7f9c1313254ce (patch) | |
tree | 1a2deaf17576a2d8be070e721e62b8358f0173eb | |
parent | e72c2e45748a9f80223d57333772419f00fc4e7e (diff) |
build: add support for arm/aarch64 asm and integrate checkasm
-rw-r--r-- | meson.build | 22 | ||||
-rw-r--r-- | src/arm/32/util.S | 50 | ||||
-rw-r--r-- | src/arm/64/util.S | 62 | ||||
-rw-r--r-- | src/arm/asm.S | 94 | ||||
-rw-r--r-- | src/arm/cpu.c | 32 | ||||
-rw-r--r-- | src/arm/cpu.h | 37 | ||||
-rw-r--r-- | src/cpu.c | 54 | ||||
-rw-r--r-- | src/cpu.h | 10 | ||||
-rw-r--r-- | src/meson.build | 55 | ||||
-rw-r--r-- | src/x86/cpu.c | 19 | ||||
-rw-r--r-- | src/x86/cpu.h | 3 | ||||
-rw-r--r-- | tests/checkasm/arm/checkasm_32.S | 172 | ||||
-rw-r--r-- | tests/checkasm/arm/checkasm_64.S | 170 | ||||
-rw-r--r-- | tests/checkasm/checkasm.c | 2 | ||||
-rw-r--r-- | tests/checkasm/checkasm.h | 28 | ||||
-rw-r--r-- | tests/meson.build | 9 |
16 files changed, 770 insertions, 49 deletions
diff --git a/meson.build b/meson.build index 1d47dfa..c65ca14 100644 --- a/meson.build +++ b/meson.build @@ -59,7 +59,9 @@ endforeach # ASM option is_asm_enabled = (get_option('build_asm') == true and - host_machine.cpu_family().startswith('x86')) + (host_machine.cpu_family().startswith('x86')) or + host_machine.cpu_family() == 'aarch64' or + host_machine.cpu_family().startswith('arm')) cdata.set10('HAVE_ASM', is_asm_enabled) @@ -183,6 +185,21 @@ if host_machine.cpu_family().startswith('x86') endif endif +cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64') +cdata.set10('ARCH_ARM', host_machine.cpu_family().startswith('arm')) +if (is_asm_enabled and + (host_machine.cpu_family() == 'aarch64' or + host_machine.cpu_family().startswith('arm'))) + + as_func_code = '''__asm__ ( +".func meson_test" +".endfunc" +); +''' + have_as_func = cc.compiles(as_func_code) + cdata.set10('HAVE_AS_FUNC', have_as_func) +endif + if host_machine.cpu_family().startswith('x86') cdata.set10('ARCH_X86', true) if host_machine.cpu_family() == 'x86_64' @@ -205,6 +222,7 @@ else endif if cc.symbols_have_underscore_prefix() + cdata.set10('PREFIX', true) cdata_asm.set10('PREFIX', true) endif @@ -216,7 +234,7 @@ config_h_target = configure_file(output: 'config.h', configuration: cdata) # # ASM specific stuff # -if is_asm_enabled +if is_asm_enabled and host_machine.cpu_family().startswith('x86') # Generate config.asm config_asm_target = configure_file(output: 'config.asm', output_format: 'nasm', configuration: cdata_asm) diff --git a/src/arm/32/util.S b/src/arm/32/util.S new file mode 100644 index 0000000..7f4405b --- /dev/null +++ b/src/arm/32/util.S @@ -0,0 +1,50 @@ +/****************************************************************************** + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2015 Martin Storsjo + * Copyright © 2015 Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +#ifndef __DAVID_SRC_ARM_32_UTIL_S__ +#define __DAVID_SRC_ARM_32_UTIL_S__ + +#include "config.h" +#include "src/arm/asm.S" + +.macro movrel rd, val +#if defined(PIC) + ldr \rd, 1f + b 2f +1: +@ FIXME: thumb + .word \val - (2f + 8) +2: + add \rd, \rd, pc +#else + movw \rd, #:lower16:\val + movt \rd, #:upper16:\val +#endif +.endm + +#endif /* __DAVID_SRC_ARM_32_UTIL_S__ */ diff --git a/src/arm/64/util.S b/src/arm/64/util.S new file mode 100644 index 0000000..53bc676 --- /dev/null +++ b/src/arm/64/util.S @@ -0,0 +1,62 @@ +/****************************************************************************** + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2015 Martin Storsjo + * Copyright © 2015 Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +#ifndef __DAVID_SRC_ARM_64_UTIL_S__ +#define __DAVID_SRC_ARM_64_UTIL_S__ + +#include "config.h" +#include "src/arm/asm.S" + +.macro movrel rd, val, offset=0 +#if defined(PIC) && defined(__APPLE__) + .if \offset < 0 + adrp \rd, \val@PAGE + add \rd, \rd, \val@PAGEOFF + sub \rd, \rd, -(\offset) + .else + adrp \rd, \val+(\offset)@PAGE + add \rd, \rd, \val+(\offset)@PAGEOFF + .endif +#elif defined(PIC) && defined(_WIN32) + .if \offset < 0 + adrp \rd, \val + add \rd, \rd, :lo12:\val + sub \rd, \rd, -(\offset) + .else + adrp \rd, \val+(\offset) + add \rd, \rd, :lo12:\val+(\offset) + .endif +#elif defined(PIC) + adrp \rd, \val+(\offset) + add \rd, \rd, :lo12:\val+(\offset) +#else + ldr \rd, =\val+\offset +#endif +.endm + +#endif /* __DAVID_SRC_ARM_64_UTIL_S__ */ diff --git a/src/arm/asm.S b/src/arm/asm.S new file mode 100644 index 0000000..4628356 --- /dev/null +++ b/src/arm/asm.S @@ -0,0 +1,94 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DAV1D_SRC_ARM_ASM_S__ +#define __DAV1D_SRC_ARM_ASM_S__ + +#include "config.h" + +#ifndef PRIVATE_PREFIX +#define PRIVATE_PREFIX dav1d_ +#endif + +#define PASTE(a,b) a ## b +#define CONCAT(a,b) PASTE(a,b) + +#ifdef PREFIX +#define EXTERN CONCAT(_,PRIVATE_PREFIX) +#else +#define EXTERN PRIVATE_PREFIX +#endif + +.macro function name, export=0, align=2 + .macro endfunc +#ifdef __ELF__ + .size \name, . - \name +#endif +#if HAVE_AS_FUNC + .endfunc +#endif + .purgem endfunc + .endm + .text + .align \align + .if \export + .global EXTERN\name +#ifdef __ELF__ + .type EXTERN\name, %function +#endif +#if HAVE_AS_FUNC + .func EXTERN\name +#endif +EXTERN\name: + .else +#ifdef __ELF__ + .type \name, %function +#endif +#if HAVE_AS_FUNC + .func \name +#endif + .endif +\name: +.endm + +.macro const name, align=2 + .macro endconst +#ifdef __ELF__ + .size \name, . - \name +#endif + .purgem endconst + .endm +#if !defined(__MACH__) + .section .rodata +#else + .const_data +#endif + .align \align +\name: +.endm + +#endif /* __DAV1D_SRC_ARM_ASM_S__ */ diff --git a/src/arm/cpu.c b/src/arm/cpu.c new file mode 100644 index 0000000..b0633ee --- /dev/null +++ b/src/arm/cpu.c @@ -0,0 +1,32 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "src/arm/cpu.h" + +unsigned dav1d_get_cpu_flags_arm(void) { + return DAV1D_ARM_CPU_FLAG_NEON; +} diff --git a/src/arm/cpu.h b/src/arm/cpu.h new file mode 100644 index 0000000..4788901 --- /dev/null +++ b/src/arm/cpu.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DAV1D_SRC_ARM_CPU_H__ +#define __DAV1D_SRC_ARM_CPU_H__ + +enum CpuFlags { + DAV1D_ARM_CPU_FLAG_NEON = 1 << 0, +}; + +unsigned dav1d_get_cpu_flags_arm(void); + +#endif /* __DAV1D_SRC_ARM_CPU_H__ */ diff --git a/src/cpu.c b/src/cpu.c new file mode 100644 index 0000000..ac6f46c --- /dev/null +++ b/src/cpu.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> + +#include "config.h" +#include "src/cpu.h" + +static unsigned flags_mask = -1; + +unsigned dav1d_get_cpu_flags(void) { + static unsigned flags; + static uint8_t checked = 0; + + if (!checked) { +#if ARCH_AARCH64 || ARCH_ARM + flags = dav1d_get_cpu_flags_arm(); +#elif ARCH_X86 && HAVE_ASM + flags = dav1d_get_cpu_flags_x86(); +#else + flags = 0; +#endif + checked = 1; + } + return flags & flags_mask; +} + +void dav1d_set_cpu_flags_mask(const unsigned mask) { + flags_mask = mask; +} @@ -30,11 +30,13 @@ #include "config.h" -#if ARCH_X86 +#if ARCH_AARCH64 || ARCH_ARM +#include "src/arm/cpu.h" +#elif ARCH_X86 #include "src/x86/cpu.h" -#else -#define dav1d_get_cpu_flags 0 -#define dav1d_set_cpu_flags_mask(mask) while (0) #endif +unsigned dav1d_get_cpu_flags(void); +void dav1d_set_cpu_flags_mask(const unsigned mask); + #endif /* __DAV1D_SRC_CPU_H__ */ diff --git a/src/meson.build b/src/meson.build index 577dc23..00bc307 100644 --- a/src/meson.build +++ b/src/meson.build @@ -29,6 +29,7 @@ # libdav1d source files libdav1d_sources = files( 'picture.c', + 'cpu.c', 'data.c', 'ref.c', 'getbits.c', @@ -73,26 +74,42 @@ libdav1d_entrypoints_sources = files( ) # ASM specific sources +libdav1d_nasm_objs = [] if is_asm_enabled - - libdav1d_sources += files( - 'x86/cpu.c', - ) - - libdav1d_tmpl_sources += files( - 'x86/mc_init.c', - ) - - # NASM source files - libdav1d_sources_asm = files( - 'x86/cpuid.asm', - 'x86/mc.asm', - ) - - # Compile the ASM sources with NASM - libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm) -else - libdav1d_nasm_objs = [] + if (host_machine.cpu_family() == 'aarch64' or + host_machine.cpu_family().startswith('arm')) + + libdav1d_sources += files( + 'arm/cpu.c', + ) + libdav1d_tmpl_sources += files( + ) + if host_machine.cpu_family() == 'aarch64' + libdav1d_tmpl_sources += files( + ) + elif host_machine.cpu_family().startswith('arm') + libdav1d_tmpl_sources += files( + ) + endif + elif host_machine.cpu_family().startswith('x86') + + libdav1d_sources += files( + 'x86/cpu.c', + ) + + libdav1d_tmpl_sources += files( + 'x86/mc_init.c', + ) + + # NASM source files + libdav1d_sources_asm = files( + 'x86/cpuid.asm', + 'x86/mc.asm', + ) + + # Compile the ASM sources with NASM + libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm) + endif endif diff --git a/src/x86/cpu.c b/src/x86/cpu.c index 64b3a5a..ebee4a0 100644 --- a/src/x86/cpu.c +++ b/src/x86/cpu.c @@ -32,7 +32,7 @@ void dav1d_cpu_cpuid(uint32_t *info, int leaf); uint64_t dav1d_cpu_xgetbv(int xcr); -static unsigned get_cpu_flags(void) { +unsigned dav1d_get_cpu_flags_x86(void) { uint32_t info[4] = {0}, n_ids; unsigned flags = 0; @@ -65,20 +65,3 @@ static unsigned get_cpu_flags(void) { return flags; } - -static unsigned flags_mask = -1; - -unsigned dav1d_get_cpu_flags(void) { - static unsigned flags; - static uint8_t checked = 0; - - if (!checked) { - flags = get_cpu_flags(); - checked = 1; - } - return flags & flags_mask; -} - -void dav1d_set_cpu_flags_mask(const unsigned mask) { - flags_mask = mask; -} diff --git a/src/x86/cpu.h b/src/x86/cpu.h index e27b6a8..e630d97 100644 --- a/src/x86/cpu.h +++ b/src/x86/cpu.h @@ -40,7 +40,6 @@ enum CpuFlags { DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F + CD + BW + DQ + VL */ }; -unsigned dav1d_get_cpu_flags(void); -void dav1d_set_cpu_flags_mask(unsigned mask); +unsigned dav1d_get_cpu_flags_x86(void); #endif /* __DAV1D_SRC_X86_CPU_H__ */ diff --git a/tests/checkasm/arm/checkasm_32.S b/tests/checkasm/arm/checkasm_32.S new file mode 100644 index 0000000..8584191 --- /dev/null +++ b/tests/checkasm/arm/checkasm_32.S @@ -0,0 +1,172 @@ +/****************************************************************************** + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2015 Martin Storsjo + * Copyright © 2015 Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +#define PRIVATE_PREFIX checkasm_ + +#include "src/arm/asm.S" +#include "src/arm/32/util.S" + +const register_init, align=3 + .quad 0x21f86d66c8ca00ce + .quad 0x75b6ba21077c48ad + .quad 0xed56bb2dcb3c7736 + .quad 0x8bda43d3fd1a7e06 + .quad 0xb64a9c9e5d318408 + .quad 0xdf9a54b303f1d3a3 + .quad 0x4a75479abd64e097 + .quad 0x249214109d5d1c88 +endconst + +const error_message_fpscr + .asciz "failed to preserve register FPSCR, changed bits: %x" +error_message_gpr: + .asciz "failed to preserve register r%d" +error_message_vfp: + .asciz "failed to preserve register d%d" +endconst + +@ max number of args used by any asm function. +#define MAX_ARGS 15 + +#define ARG_STACK 4*(MAX_ARGS - 4) + +@ align the used stack space to 8 to preserve the stack alignment +#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed) + +.macro clobbercheck variant +.equ pushed, 4*9 +function checked_call_\variant, export=1 + push {r4-r11, lr} +.ifc \variant, vfp + vpush {d8-d15} + fmrx r4, FPSCR + push {r4} +.equ pushed, pushed + 16*4 + 4 +.endif + + movrel r12, register_init +.ifc \variant, vfp + vldm r12, {d8-d15} +.endif + ldm r12, {r4-r11} + + sub sp, sp, #ARG_STACK_A +.equ pos, 0 +.rept MAX_ARGS-4 + ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos] + str r12, [sp, #pos] +.equ pos, pos + 4 +.endr + + mov r12, r0 + mov r0, r2 + mov r1, r3 + ldrd r2, r3, [sp, #ARG_STACK_A + pushed] + blx r12 + add sp, sp, #ARG_STACK_A + + push {r0, r1} + movrel r12, register_init +.ifc \variant, vfp +.macro check_reg_vfp, dreg, offset + ldrd r2, r3, [r12, #8 * (\offset)] + vmov r0, lr, \dreg + eor r2, r2, r0 + eor r3, r3, lr + orrs r2, r2, r3 + bne 4f +.endm + +.irp n, 8, 9, 10, 11, 12, 13, 14, 15 + @ keep track of the checked double/SIMD register + mov r1, #\n + check_reg_vfp d\n, \n-8 +.endr +.purgem check_reg_vfp + + fmrx r1, FPSCR + ldr r3, [sp, #8] + eor r1, r1, r3 + @ Ignore changes in bits 0-4 and 7 + bic r1, r1, #0x9f + @ Ignore changes in the topmost 5 bits + bics r1, r1, #0xf8000000 + bne 3f +.endif + + @ keep track of the checked GPR + mov r1, #4 +.macro check_reg reg1, reg2= + ldrd r2, r3, [r12], #8 + eors r2, r2, \reg1 + bne 2f + add r1, r1, #1 +.ifnb \reg2 + eors r3, r3, \reg2 + bne 2f +.endif + add r1, r1, #1 +.endm + check_reg r4, r5 + check_reg r6, r7 +@ r9 is a volatile register in the ios ABI +#ifdef __APPLE__ + check_reg r8 +#else + check_reg r8, r9 +#endif + check_reg r10, r11 +.purgem check_reg + + b 0f +4: + movrel r0, error_message_vfp + b 1f +3: + movrel r0, error_message_fpscr + b 1f +2: + movrel r0, error_message_gpr +1: +#ifdef PREFIX + blx _checkasm_fail_func +#else + blx checkasm_fail_func +#endif +0: + pop {r0, r1} +.ifc \variant, vfp + pop {r2} + fmxr FPSCR, r2 + vpop {d8-d15} +.endif + pop {r4-r11, pc} +endfunc +.endm + +clobbercheck vfp diff --git a/tests/checkasm/arm/checkasm_64.S b/tests/checkasm/arm/checkasm_64.S new file mode 100644 index 0000000..8f28cc9 --- /dev/null +++ b/tests/checkasm/arm/checkasm_64.S @@ -0,0 +1,170 @@ +/****************************************************************************** + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2015 Martin Storsjo + * Copyright © 2015 Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *****************************************************************************/ + +#define PRIVATE_PREFIX checkasm_ + +#include "src/arm/asm.S" +#include "src/arm/64/util.S" + +const register_init, align=4 + .quad 0x21f86d66c8ca00ce + .quad 0x75b6ba21077c48ad + .quad 0xed56bb2dcb3c7736 + .quad 0x8bda43d3fd1a7e06 + .quad 0xb64a9c9e5d318408 + .quad 0xdf9a54b303f1d3a3 + .quad 0x4a75479abd64e097 + .quad 0x249214109d5d1c88 + .quad 0x1a1b2550a612b48c + .quad 0x79445c159ce79064 + .quad 0x2eed899d5a28ddcd + .quad 0x86b2536fcd8cf636 + .quad 0xb0856806085e7943 + .quad 0x3f2bf84fc0fcca4e + .quad 0xacbd382dcf5b8de2 + .quad 0xd229e1f5b281303f + .quad 0x71aeaff20b095fd9 + .quad 0xab63e2e11fa38ed9 +endconst + + +const error_message + .asciz "failed to preserve register" +endconst + + +// max number of args used by any asm function. +#define MAX_ARGS 15 + +#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15) + +function stack_clobber, export=1 + mov x3, sp + mov x2, #CLOBBER_STACK +1: + stp x0, x1, [sp, #-16]! + subs x2, x2, #16 + b.gt 1b + mov sp, x3 + ret +endfunc + +#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15) + +function checked_call, export=1 + stp x29, x30, [sp, #-16]! + mov x29, sp + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! + + movrel x9, register_init + ldp d8, d9, [x9], #16 + ldp d10, d11, [x9], #16 + ldp d12, d13, [x9], #16 + ldp d14, d15, [x9], #16 + ldp x19, x20, [x9], #16 + ldp x21, x22, [x9], #16 + ldp x23, x24, [x9], #16 + ldp x25, x26, [x9], #16 + ldp x27, x28, [x9], #16 + + sub sp, sp, #ARG_STACK +.equ pos, 0 +.rept MAX_ARGS-8 + // Skip the first 8 args, that are loaded into registers + ldr x9, [x29, #16 + 8*8 + pos] + str x9, [sp, #pos] +.equ pos, pos + 8 +.endr + + mov x12, x0 + ldp x0, x1, [x29, #16] + ldp x2, x3, [x29, #32] + ldp x4, x5, [x29, #48] + ldp x6, x7, [x29, #64] + blr x12 + add sp, sp, #ARG_STACK + stp x0, x1, [sp, #-16]! + movrel x9, register_init + movi v3.8h, #0 + +.macro check_reg_neon reg1, reg2 + ldr q0, [x9], #16 + uzp1 v1.2d, v\reg1\().2d, v\reg2\().2d + eor v0.16b, v0.16b, v1.16b + orr v3.16b, v3.16b, v0.16b +.endm + check_reg_neon 8, 9 + check_reg_neon 10, 11 + check_reg_neon 12, 13 + check_reg_neon 14, 15 + uqxtn v3.8b, v3.8h + umov x3, v3.d[0] + +.macro check_reg reg1, reg2 + ldp x0, x1, [x9], #16 + eor x0, x0, \reg1 + eor x1, x1, \reg2 + orr x3, x3, x0 + orr x3, x3, x1 +.endm + check_reg x19, x20 + check_reg x21, x22 + check_reg x23, x24 + check_reg x25, x26 + check_reg x27, x28 + + cbz x3, 0f + + movrel x0, error_message +#ifdef PREFIX + bl _checkasm_fail_func +#else + bl checkasm_fail_func +#endif +0: + ldp x0, x1, [sp], #16 + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x29, x30, [sp], #16 + ret +endfunc diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index a0b85d9..1d462ba 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -73,6 +73,8 @@ static const struct { { "AVX", "avx", DAV1D_X86_CPU_FLAG_AVX }, { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 }, { "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 }, +#elif ARCH_AARCH64 || ARCH_ARM + { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON }, #endif { 0 } }; diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index bd52f1a..6b6d2e0 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -92,6 +92,7 @@ static inline uint64_t readtime(void) { } #define readtime readtime #endif +#endif /* Verifies that clobbered callee-saved registers * are properly saved and restored */ @@ -122,13 +123,34 @@ void checkasm_stack_clobber(uint64_t clobber, ...); #define declare_new(ret, ...)\ ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call; #define call_new(...) checked_call(func_new, __VA_ARGS__) +#elif ARCH_ARM +/* Use a dummy argument, to offset the real parameters by 2, not only 1. + * This makes sure that potential 8-byte-alignment of parameters is kept + * the same even when the extra parameters have been removed. */ +void checkasm_checked_call_vfp(void *func, int dummy, ...); +#define declare_new(ret, ...)\ + ret (*checked_call)(void *, int dummy, __VA_ARGS__) =\ + (void *)checkasm_checked_call_vfp; +#define call_new(...) checked_call(func_new, 0, __VA_ARGS__) +#elif ARCH_AARCH64 && !defined(__APPLE__) +void checkasm_stack_clobber(uint64_t clobber, ...); +#define declare_new(ret, ...)\ + ret (*checked_call)(void *, int, int, int, int, int, int, int,\ + __VA_ARGS__) =\ + (void *)checkasm_checked_call; +#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) +#define call_new(...)\ + (checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB),\ + checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__)) #endif -#else +#else /* HAVE_ASM */ #define declare_new(ret, ...) /* Call the function */ #define call_new(...) ((func_type *)func_new)(__VA_ARGS__) -#endif -#endif +#endif /* HAVE_ASM */ /* Benchmark the function */ #ifdef readtime diff --git a/tests/meson.build b/tests/meson.build index 05f977b..2759367 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -49,7 +49,14 @@ if is_asm_enabled checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects() endforeach - checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm')) + checkasm_nasm_objs = [] + if host_machine.cpu_family() == 'aarch64' + checkasm_sources += files('checkasm/arm/checkasm_64.S') + elif host_machine.cpu_family().startswith('arm') + checkasm_sources += files('checkasm/arm/checkasm_32.S') + elif host_machine.cpu_family().startswith('x86') + checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm')) + endif checkasm = executable('checkasm', checkasm_sources, checkasm_nasm_objs, objects: [checkasm_bitdepth_objs], |