diff options
author | André Kempe <andre.kempe@arm.com> | 2022-01-28 14:38:38 +0300 |
---|---|---|
committer | André Kempe <andre.kempe@arm.com> | 2022-02-09 15:20:17 +0300 |
commit | a00289b6d8ebf0ffef9b2c08c0c63b7ad330fa47 (patch) | |
tree | b0235318da7a6cb916c60b5d4ac324aa9bcd863f | |
parent | 02b8876eb9c03ac7e31ea03742f12bd7a6ed77d2 (diff) |
arm64: Add Armv8.3-A PAC support to assembly files
This patch adds optional support for Arm Pointer Authentication Codes.
PAC support is turned on or off at compile time using additional
compiler flags. Unless any of these is enabled explicitly, no additional
code will be emitted at all.
-rw-r--r-- | src/arm/64/film_grain.S | 27 | ||||
-rw-r--r-- | src/arm/64/film_grain16.S | 29 | ||||
-rw-r--r-- | src/arm/64/looprestoration.S | 4 | ||||
-rw-r--r-- | src/arm/64/looprestoration16.S | 4 | ||||
-rw-r--r-- | src/arm/asm.S | 105 |
5 files changed, 163 insertions, 6 deletions
diff --git a/src/arm/64/film_grain.S b/src/arm/64/film_grain.S index c2d1416..6cdd7ec 100644 --- a/src/arm/64/film_grain.S +++ b/src/arm/64/film_grain.S @@ -398,12 +398,14 @@ sum_\lag\()_\type\()_\edge\()_start: st1 {v0.16b}, [x0], #16 .endif ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret .endif .endm .macro sum_lag1_func type, uv_layout, edge, elems=16 function sum_\type\()_lag1_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! sum_lag_n_body lag1, \type, \uv_layout, \edge, \elems, store=0 endfunc @@ -541,6 +543,7 @@ endfunc .macro sum_lag2_func type, uv_layout, edge, elems=16 function sum_\type\()_lag2_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! .ifc \edge, left sub x12, x0, #2*GRAIN_WIDTH @@ -752,6 +755,7 @@ endfunc .macro sum_lag3_func type, uv_layout, edge, elems=16 function sum_\type\()_lag3_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! .ifc \edge, left sub x11, x0, #3*GRAIN_WIDTH @@ -779,6 +783,7 @@ sum_lag3_func uv_420, 420, mid sum_lag3_func uv_420, 420, right, 9 function generate_grain_rows_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! 1: get_grain_row v16, v17, v18, v19, v20, v21 @@ -786,10 +791,12 @@ function generate_grain_rows_neon store_grain_row v16, v17, v18, v19, v20, v21 b.gt 1b ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function generate_grain_rows_44_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! 1: get_grain_row_44 v16, v17, v18 @@ -797,20 +804,25 @@ function generate_grain_rows_44_neon store_grain_row_44 v16, v17, v18 b.gt 1b ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function get_grain_row_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! get_grain_row v16, v17, v18, v19, v20, v21 ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function get_grain_row_44_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! get_grain_row_44 v16, v17, v18 ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc @@ -854,6 +866,7 @@ endfunc .macro gen_grain_82 type function generate_grain_\type\()_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x30, x19, [sp, #-96]! .ifc \type, uv_444 @@ -950,6 +963,7 @@ L(generate_grain_\type\()_lag0): b.gt 1b .endif ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag1): @@ -990,6 +1004,7 @@ L(generate_grain_\type\()_lag1): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag2): @@ -1018,6 +1033,7 @@ L(generate_grain_\type\()_lag2): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag3): @@ -1057,6 +1073,7 @@ L(generate_grain_\type\()_lag3): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(gen_grain_\type\()_tbl): @@ -1088,6 +1105,7 @@ gen_grain_82 uv_444 .macro gen_grain_44 type function generate_grain_\type\()_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x30, x19, [sp, #-96]! mov w13, w3 @@ -1164,6 +1182,7 @@ L(generate_grain_\type\()_lag0): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag1): @@ -1191,6 +1210,7 @@ L(generate_grain_\type\()_lag1): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag2): @@ -1214,6 +1234,7 @@ L(generate_grain_\type\()_lag2): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag3): @@ -1249,6 +1270,7 @@ L(generate_grain_\type\()_lag3): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(gen_grain_\type\()_tbl): @@ -1343,6 +1365,7 @@ endconst // const int h, const ptrdiff_t clip, // const ptrdiff_t type); function fgy_32x32_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! ldr w11, [x6, #8] // offsets[1][0] ldr w13, [x6, #4] // offsets[0][1] @@ -1524,6 +1547,7 @@ L(loop_\ox\oy): b.gt L(loop_\ox\()0) .endif ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret .endm @@ -1553,6 +1577,7 @@ endfunc // const ptrdiff_t type); .macro fguv layout, sx, sy function fguv_32x32_\layout\()_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-32]! str d8, [sp, #16] ldp x8, x9, [sp, #32] // offsets, h @@ -1821,6 +1846,7 @@ L(fguv_loop_sx0_csfl\csfl\()_\ox\oy): 9: ldr d8, [sp, #16] ldr x30, [sp], #32 + AARCH64_VALIDATE_LINK_REGISTER ret L(fguv_loop_sx0_tbl): @@ -1969,6 +1995,7 @@ L(fguv_loop_sx1_csfl\csfl\()_\ox\oy): 9: ldr d8, [sp, #16] ldr x30, [sp], #32 + AARCH64_VALIDATE_LINK_REGISTER ret L(fguv_loop_sx1_tbl): diff --git a/src/arm/64/film_grain16.S b/src/arm/64/film_grain16.S index 3c7d07f..7c4ff6d 100644 --- a/src/arm/64/film_grain16.S +++ b/src/arm/64/film_grain16.S @@ -321,12 +321,14 @@ sum_\lag\()_\type\()_\edge\()_start: .endif st1 {v0.8h}, [x0], #16 ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret .endif .endm .macro sum_lag1_func type, uv_layout, edge, elems=8 function sum_\type\()_lag1_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! .ifc \edge, left sub x12, x0, #1*GRAIN_WIDTH*2 @@ -419,6 +421,7 @@ endfunc .macro sum_lag2_func type, uv_layout, edge, elems=8 function sum_\type\()_lag2_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! .ifc \edge, left sub x12, x0, #2*GRAIN_WIDTH*2 @@ -572,6 +575,7 @@ endfunc .macro sum_lag3_func type, uv_layout, edge, elems=8 function sum_\type\()_lag3_\edge\()_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! .ifc \edge, left sub x11, x0, #3*GRAIN_WIDTH*2 @@ -599,6 +603,7 @@ sum_lag3_func uv_420, 420, mid sum_lag3_func uv_420, 420, right, 1 function generate_grain_rows_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! 1: mov w16, #80 @@ -613,10 +618,12 @@ function generate_grain_rows_neon st1 {v0.s}[0], [x0], #4 b.gt 1b ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function generate_grain_rows_44_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! 1: mov w16, #40 @@ -632,10 +639,12 @@ function generate_grain_rows_44_neon add x0, x0, #GRAIN_WIDTH*2-80 b.gt 1b ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function gen_grain_uv_444_lag0_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! ld1 {v4.8h}, [x19], #16 gen_grain_uv_lag0_8_start: @@ -654,10 +663,12 @@ gen_grain_uv_lag0_8_add: smax v2.8h, v2.8h, v26.8h st1 {v2.8h}, [x0], #16 ldr x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret endfunc function gen_grain_uv_420_lag0_8_neon + AARCH64_SIGN_LINK_REGISTER add x12, x19, #GRAIN_WIDTH*2 str x30, [sp, #-16]! ld1 {v16.8h, v17.8h}, [x19], #32 @@ -670,6 +681,7 @@ function gen_grain_uv_420_lag0_8_neon endfunc function gen_grain_uv_422_lag0_8_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! ld1 {v16.8h, v17.8h}, [x19], #32 addp v16.8h, v16.8h, v17.8h @@ -679,6 +691,7 @@ endfunc function gen_grain_uv_420_lag0_4_neon add x12, x19, #GRAIN_WIDTH*2 + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! ld1 {v16.4h, v17.4h}, [x19] ld1 {v18.4h, v19.4h}, [x12] @@ -692,6 +705,7 @@ function gen_grain_uv_420_lag0_4_neon endfunc function gen_grain_uv_422_lag0_4_neon + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-16]! ld1 {v16.4h, v17.4h}, [x19] add x19, x19, #32 @@ -703,6 +717,7 @@ endfunc .macro gen_grain_82 type function generate_grain_\type\()_16bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x30, x19, [sp, #-96]! .ifc \type, uv_444 @@ -801,6 +816,7 @@ L(generate_grain_\type\()_lag0): b.gt 1b .endif ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag1): @@ -848,6 +864,7 @@ L(generate_grain_\type\()_lag1): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag2): @@ -881,6 +898,7 @@ L(generate_grain_\type\()_lag2): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag3): @@ -925,6 +943,7 @@ L(generate_grain_\type\()_lag3): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(gen_grain_\type\()_tbl): @@ -956,6 +975,7 @@ gen_grain_82 uv_444 .macro gen_grain_44 type function generate_grain_\type\()_16bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x30, x19, [sp, #-96]! mov w13, w3 @@ -1034,6 +1054,7 @@ L(generate_grain_\type\()_lag0): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag1): @@ -1066,6 +1087,7 @@ L(generate_grain_\type\()_lag1): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag2): @@ -1092,6 +1114,7 @@ L(generate_grain_\type\()_lag2): b.gt 1b ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(generate_grain_\type\()_lag3): @@ -1130,6 +1153,7 @@ L(generate_grain_\type\()_lag3): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldp x30, x19, [sp], #96 + AARCH64_VALIDATE_LINK_REGISTER ret L(gen_grain_\type\()_tbl): @@ -1225,6 +1249,7 @@ endconst // const ptrdiff_t type, // const int bitdepth_max); function fgy_32x32_16bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-80]! stp d8, d9, [sp, #16] stp d10, d11, [sp, #32] @@ -1447,6 +1472,7 @@ L(loop_\ox\oy): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldr x30, [sp], #80 + AARCH64_VALIDATE_LINK_REGISTER ret .endm @@ -1477,6 +1503,7 @@ endfunc // const int bitdepth_max); .macro fguv layout, sx, sy function fguv_32x32_\layout\()_16bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER str x30, [sp, #-80]! stp d8, d9, [sp, #16] stp d10, d11, [sp, #32] @@ -1789,6 +1816,7 @@ L(fguv_loop_sx0_csfl\csfl\()_\ox\oy): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldr x30, [sp], #80 + AARCH64_VALIDATE_LINK_REGISTER ret L(fguv_loop_sx0_tbl): @@ -1954,6 +1982,7 @@ L(fguv_loop_sx1_csfl\csfl\()_\ox\oy): ldp d10, d11, [sp, #32] ldp d8, d9, [sp, #16] ldr x30, [sp], #80 + AARCH64_VALIDATE_LINK_REGISTER ret L(fguv_loop_sx1_tbl): diff --git a/src/arm/64/looprestoration.S b/src/arm/64/looprestoration.S index 778448a..a598b72 100644 --- a/src/arm/64/looprestoration.S +++ b/src/arm/64/looprestoration.S @@ -50,6 +50,7 @@ endconst // const int16_t filter[2][8], // const enum LrEdgeFlags edges); function wiener_filter7_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x29, x30, [sp, #-16]! mov x29, sp ld1 {v0.8h, v1.8h}, [x6] @@ -121,6 +122,7 @@ L(v1_7): mov sp, x29 ldp x29, x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret L(no_top_7): @@ -538,6 +540,7 @@ endfunc // const int16_t filter[2][8], // const enum LrEdgeFlags edges); function wiener_filter5_8bpc_neon, export=1 + AARCH64_SIGN_LINK_REGISTER stp x29, x30, [sp, #-16]! mov x29, sp ld1 {v0.8h, v1.8h}, [x6] @@ -598,6 +601,7 @@ L(end_5): mov sp, x29 ldp x29, x30, [sp], #16 + AARCH64_VALIDATE_LINK_REGISTER ret L(no_top_5): diff --git a/src/arm/64/looprestoration16.S b/src/arm/64/looprestoration16.S index fcb4f84..8954e60 100644 --- a/src/arm/64/looprestoration16.S +++ b/src/arm/64/looprestoration16.S @@ -52,6 +52,7 @@ endconst // const int bitdepth_max); function wiener_filter7_16bpc_neon, export=1 ldr w8, [sp] + AARCH64_SIGN_LINK_REGISTER stp x29, x30, [sp, #-32]! stp d8, d9, [sp, #16] mov x29, sp @@ -137,6 +138,7 @@ L(v1_7): mov sp, x29 ldp d8, d9, [sp, #16] ldp x29, x30, [sp], #32 + AARCH64_VALIDATE_LINK_REGISTER ret L(no_top_7): @@ -595,6 +597,7 @@ endfunc // const int bitdepth_max); function wiener_filter5_16bpc_neon, export=1 ldr w8, [sp] + AARCH64_SIGN_LINK_REGISTER stp x29, x30, [sp, #-32]! stp d8, d9, [sp, #16] mov x29, sp @@ -669,6 +672,7 @@ L(end_5): mov sp, x29 ldp d8, d9, [sp, #16] ldp x29, x30, [sp], #32 + AARCH64_VALIDATE_LINK_REGISTER ret L(no_top_5): diff --git a/src/arm/asm.S b/src/arm/asm.S index 017c89c..74d37a7 100644 --- a/src/arm/asm.S +++ b/src/arm/asm.S @@ -34,10 +34,78 @@ #define x18 do_not_use_x18 #define w18 do_not_use_w18 -/* Support macros for the Armv8.5-A Branch Target Identification feature which - * requires emitting a .note.gnu.property section with the appropriate - * architecture-dependent feature bits set. - * Read more: "ELF for the Arm® 64-bit Architecture" +/* Support macros for + * - Armv8.3-A Pointer Authentication and + * - Armv8.5-A Branch Target Identification + * features which require emitting a .note.gnu.property section with the + * appropriate architecture-dependent feature bits set. + * + * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to + * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be + * used immediately before saving the LR register (x30) to the stack. + * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring + * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone + * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also + * have the same value at the two points. For example: + * + * .global f + * f: + * AARCH64_SIGN_LINK_REGISTER + * stp x29, x30, [sp, #-96]! + * mov x29, sp + * ... + * ldp x29, x30, [sp], #96 + * AARCH64_VALIDATE_LINK_REGISTER + * ret + * + * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or + * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an + * indirect call target. In particular, all symbols exported from a file must + * begin with one of these macros. For example, a leaf function that does not + * save LR can instead use |AARCH64_VALID_CALL_TARGET|: + * + * .globl return_zero + * return_zero: + * AARCH64_VALID_CALL_TARGET + * mov x0, #0 + * ret + * + * A non-leaf function which does not immediately save LR may need both macros + * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function + * may jump to an alternate implementation before setting up the stack: + * + * .globl with_early_jump + * with_early_jump: + * AARCH64_VALID_CALL_TARGET + * cmp x0, #128 + * b.lt .Lwith_early_jump_128 + * AARCH64_SIGN_LINK_REGISTER + * stp x29, x30, [sp, #-96]! + * mov x29, sp + * ... + * ldp x29, x30, [sp], #96 + * AARCH64_VALIDATE_LINK_REGISTER + * ret + * + * .Lwith_early_jump_128: + * ... + * ret + * + * These annotations are only required with indirect calls. Private symbols that + * are only the target of direct calls do not require annotations. Also note + * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not + * indirect jumps (BR). Indirect jumps in assembly are supported through + * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and + * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|. + * + * Although not necessary, it is safe to use these macros in 32-bit ARM + * assembly. This may be used to simplify dual 32-bit and 64-bit files. + * + * References: + * - "ELF for the Arm® 64-bit Architecture" + * https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst + * - "Providing protection for complex software" + * https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software */ #if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1) #define GNU_PROPERTY_AARCH64_BTI (1 << 0) // Has Branch Target Identification @@ -51,7 +119,32 @@ #define AARCH64_VALID_JUMP_TARGET #endif -#if (GNU_PROPERTY_AARCH64_BTI != 0) +#if defined(__ARM_FEATURE_PAC_DEFAULT) + +#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A +#define AARCH64_SIGN_LINK_REGISTER paciasp +#define AARCH64_VALIDATE_LINK_REGISTER autiasp +#elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B +#define AARCH64_SIGN_LINK_REGISTER pacibsp +#define AARCH64_VALIDATE_LINK_REGISTER autibsp +#else +#error Pointer authentication defines no valid key! +#endif +#if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0) // authentication of leaf functions +#error Authentication of leaf functions is enabled but not supported in dav1d! +#endif +#define GNU_PROPERTY_AARCH64_PAC (1 << 1) + +#else + +#define GNU_PROPERTY_AARCH64_PAC 0 +#define AARCH64_SIGN_LINK_REGISTER +#define AARCH64_VALIDATE_LINK_REGISTER + +#endif + + +#if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0) .pushsection .note.gnu.property, "a" .balign 8 .long 4 @@ -60,7 +153,7 @@ .asciz "GNU" .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ .long 4 - .long GNU_PROPERTY_AARCH64_BTI + .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC) .long 0 .popsection #endif |