Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src/arm
diff options
context:
space:
mode:
authorSalome Thirot <salome.thirot@arm.com>2021-10-15 19:05:03 +0300
committerSalome Thirot <salome.thirot@arm.com>2021-10-27 14:09:37 +0300
commitda0da5488b5a5d43c4fef6d4308a0cd930f7961a (patch)
treed95934a5194b64f065a0c5d670d4a47e89caed3e /src/arm
parenteb0308bcdf1bfd651f2359ed49a2f6dc258aed86 (diff)
arm64: Change br instructions to ret for function returns
Using ret x<n> instead of br x<n> removes the need for a BTI landing pad at the target address in x<n>. Using 'ret' instead of 'br' does not have any performance implications. Signed-off-by: Jonathan Wright <jonathan.wright@arm.com> Signed-off-by: Matthew Dalzell <matthew.dalzell@arm.com> Signed-off-by: Salome Thirot <salome.thirot@arm.com>
Diffstat (limited to 'src/arm')
-rw-r--r--src/arm/64/itx.S52
-rw-r--r--src/arm/64/itx16.S52
-rw-r--r--src/arm/64/loopfilter.S36
-rw-r--r--src/arm/64/loopfilter16.S36
-rw-r--r--src/arm/64/mc.S8
-rw-r--r--src/arm/64/mc16.S8
6 files changed, 96 insertions, 96 deletions
diff --git a/src/arm/64/itx.S b/src/arm/64/itx.S
index 98147a3..ea0de6a 100644
--- a/src/arm/64/itx.S
+++ b/src/arm/64/itx.S
@@ -660,7 +660,7 @@ L(itx_4x4_end):
st1 {v1.s}[0], [x0], x1
st1 {v1.s}[1], [x0], x1
- br x15
+ ret x15
endfunc
.macro def_fn_4x4 txfm1, txfm2
@@ -897,7 +897,7 @@ function inv_txfm_\variant\()add_8x8_neon
blr x5
load_add_store_8x8 x0, x7
- br x15
+ ret x15
endfunc
.endm
@@ -962,7 +962,7 @@ function inv_txfm_add_8x4_neon
blr x5
load_add_store_8x4 x0, x7
- br x15
+ ret x15
endfunc
function inv_txfm_add_4x8_neon
@@ -988,7 +988,7 @@ function inv_txfm_add_4x8_neon
blr x5
load_add_store_4x8 x0, x7
- br x15
+ ret x15
endfunc
.macro def_fn_48 w, h, txfm1, txfm2
@@ -1404,7 +1404,7 @@ function inv_txfm_horz\suffix\()_16x8_neon
st1 {\i}, [x6], #16
.endr
- br x14
+ ret x14
endfunc
.endm
@@ -1419,7 +1419,7 @@ function inv_txfm_add_vert_8x16_neon
.endr
blr x5
load_add_store_8x16 x6, x7
- br x14
+ ret x14
endfunc
function inv_txfm_add_16x16_neon
@@ -1453,7 +1453,7 @@ function inv_txfm_add_16x16_neon
.endr
add sp, sp, #512
- br x15
+ ret x15
endfunc
.macro def_fn_16x16 txfm1, txfm2, eob_half
@@ -1553,7 +1553,7 @@ function inv_txfm_\variant\()add_16x4_neon
add x6, x0, #8
load_add_store_8x4 x6, x7
- br x15
+ ret x15
endfunc
function inv_txfm_\variant\()add_4x16_neon
@@ -1621,7 +1621,7 @@ function inv_txfm_\variant\()add_4x16_neon
load_add_store_4x16 x0, x6
- br x15
+ ret x15
endfunc
.endm
@@ -1731,7 +1731,7 @@ function inv_txfm_\variant\()add_16x8_neon
add x0, x0, #8
load_add_store_8x8 x0, x7
- br x15
+ ret x15
endfunc
function inv_txfm_\variant\()add_8x16_neon
@@ -1804,7 +1804,7 @@ function inv_txfm_\variant\()add_8x16_neon
load_add_store_8x16 x0, x6
- br x15
+ ret x15
endfunc
.endm
@@ -2089,7 +2089,7 @@ function inv_txfm_horz\suffix\()_dct_32x8_neon
store2 v25.8h, v17.8h, \shift
store2 v24.8h, v16.8h, \shift
.purgem store2
- br x14
+ ret x14
endfunc
.endm
@@ -2163,7 +2163,7 @@ function inv_txfm_add_vert_dct_8x32_neon
combine v28.8h, v29.8h, v30.8h, v31.8h, sqsub, x9
.purgem combine
- br x14
+ ret x14
endfunc
const eob_32x32
@@ -2374,7 +2374,7 @@ function inv_txfm_add_dct_dct_32x32_8bpc_neon, export=1
.endr
add sp, sp, #2048
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_16x32_8bpc_neon, export=1
@@ -2423,7 +2423,7 @@ function inv_txfm_add_dct_dct_16x32_8bpc_neon, export=1
.endr
add sp, sp, #1024
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x16_8bpc_neon, export=1
@@ -2468,7 +2468,7 @@ function inv_txfm_add_dct_dct_32x16_8bpc_neon, export=1
.endr
add sp, sp, #1024
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_8x32_8bpc_neon, export=1
@@ -2525,7 +2525,7 @@ function inv_txfm_add_dct_dct_8x32_8bpc_neon, export=1
bl inv_txfm_add_vert_dct_8x32_neon
add sp, sp, #512
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x8_8bpc_neon, export=1
@@ -2559,7 +2559,7 @@ function inv_txfm_add_dct_dct_32x8_8bpc_neon, export=1
b.lt 1b
add sp, sp, #512
- br x15
+ ret x15
endfunc
function inv_dct64_step1_neon
@@ -2886,7 +2886,7 @@ function inv_txfm_dct\suffix\()_8h_x64_neon, export=1
bl inv_dct64_step2_neon
- br x14
+ ret x14
endfunc
.endm
@@ -2943,7 +2943,7 @@ function inv_txfm_horz_dct_64x8_neon
cmp x7, x8
b.lt 1b
- br x14
+ ret x14
endfunc
function inv_txfm_add_vert_dct_8x64_neon
@@ -2999,7 +2999,7 @@ function inv_txfm_add_vert_dct_8x64_neon
cmp x7, x8
b.lt 1b
- br x14
+ ret x14
endfunc
function inv_txfm_add_dct_dct_64x64_8bpc_neon, export=1
@@ -3053,7 +3053,7 @@ function inv_txfm_add_dct_dct_64x64_8bpc_neon, export=1
.endr
add sp, x5, #64*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_64x32_8bpc_neon, export=1
@@ -3106,7 +3106,7 @@ function inv_txfm_add_dct_dct_64x32_8bpc_neon, export=1
.endr
add sp, x5, #64*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
@@ -3158,7 +3158,7 @@ function inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
.endr
add sp, x5, #32*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
@@ -3212,7 +3212,7 @@ function inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
.endr
add sp, x4, #64*16*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
@@ -3265,5 +3265,5 @@ function inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
.endr
add sp, x5, #16*32*2
- br x15
+ ret x15
endfunc
diff --git a/src/arm/64/itx16.S b/src/arm/64/itx16.S
index 5e55dd3..d1ac4bc 100644
--- a/src/arm/64/itx16.S
+++ b/src/arm/64/itx16.S
@@ -541,7 +541,7 @@ L(itx_4x4_end):
st1 {v1.d}[0], [x0], x1
st1 {v1.d}[1], [x0], x1
- br x15
+ ret x15
endfunc
.macro def_fn_4x4 txfm1, txfm2
@@ -784,7 +784,7 @@ function inv_txfm_add_8x8_neon
blr x5
load_add_store_8x8 x0, x7
- br x15
+ ret x15
endfunc
.macro def_fn_8x8 txfm1, txfm2, eob_half
@@ -853,7 +853,7 @@ function inv_txfm_add_8x4_neon
blr x5
load_add_store_8x4 x0, x7
- br x15
+ ret x15
endfunc
function inv_txfm_add_4x8_neon
@@ -902,7 +902,7 @@ function inv_txfm_add_4x8_neon
blr x5
load_add_store_4x8 x0, x7
- br x15
+ ret x15
endfunc
.macro def_fn_48 w, h, txfm1, txfm2, eob_half
@@ -1282,7 +1282,7 @@ function inv_txfm_horz\suffix\()_16x4_neon
st1 {\i}, [x6], #16
.endr
- br x14
+ ret x14
endfunc
.endm
@@ -1296,7 +1296,7 @@ function inv_txfm_add_vert_8x16_neon
.endr
blr x5
load_add_store_8x16 x6, x7
- br x14
+ ret x14
endfunc
function inv_txfm_add_16x16_neon
@@ -1338,7 +1338,7 @@ function inv_txfm_add_16x16_neon
.endr
add sp, sp, #512
- br x15
+ ret x15
endfunc
const eob_16x16
@@ -1423,7 +1423,7 @@ function inv_txfm_add_16x4_neon
add x6, x0, #16
load_add_store_8x4 x6, x7
- br x15
+ ret x15
endfunc
function inv_txfm_add_4x16_neon
@@ -1517,7 +1517,7 @@ function inv_txfm_add_4x16_neon
load_add_store_4x16 x0, x6
- br x15
+ ret x15
endfunc
const eob_4x16
@@ -1698,7 +1698,7 @@ function inv_txfm_add_16x8_neon
ldp d12, d13, [sp, #0x20]
ldp d10, d11, [sp, #0x10]
ldp d8, d9, [sp], 0x40
- br x15
+ ret x15
endfunc
function inv_txfm_add_8x16_neon
@@ -1839,7 +1839,7 @@ function inv_txfm_add_8x16_neon
ldp d10, d11, [sp, #0x10]
ldp d8, d9, [sp], 0x20
- br x15
+ ret x15
endfunc
const eob_8x16
@@ -2141,7 +2141,7 @@ function inv_txfm_horz\suffix\()_dct_32x4_neon
store2 v29.4s, v25.4s, v21.4s, v17.4s, \shift
store2 v28.4s, v24.4s, v20.4s, v16.4s, \shift
.purgem store2
- br x14
+ ret x14
endfunc
.endm
@@ -2216,7 +2216,7 @@ function inv_txfm_add_vert_dct_8x32_neon
combine v28.8h, v29.8h, v30.8h, v31.8h, sqsub, x9
.purgem combine
- br x14
+ ret x14
endfunc
const eob_32x32
@@ -2533,7 +2533,7 @@ function inv_txfm_add_dct_dct_32x32_16bpc_neon, export=1
.endr
add sp, sp, #2048
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_16x32_16bpc_neon, export=1
@@ -2582,7 +2582,7 @@ function inv_txfm_add_dct_dct_16x32_16bpc_neon, export=1
.endr
add sp, sp, #1024
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x16_16bpc_neon, export=1
@@ -2632,7 +2632,7 @@ function inv_txfm_add_dct_dct_32x16_16bpc_neon, export=1
.endr
add sp, sp, #1024
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_8x32_16bpc_neon, export=1
@@ -2692,7 +2692,7 @@ function inv_txfm_add_dct_dct_8x32_16bpc_neon, export=1
bl inv_txfm_add_vert_dct_8x32_neon
add sp, sp, #512
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x8_16bpc_neon, export=1
@@ -2743,7 +2743,7 @@ function inv_txfm_add_dct_dct_32x8_16bpc_neon, export=1
b.lt 1b
add sp, sp, #512
- br x15
+ ret x15
endfunc
function inv_dct64_step1_neon
@@ -3070,7 +3070,7 @@ function inv_txfm_dct\suffix\()_4s_x64_neon
bl inv_dct64_step2_neon
- br x14
+ ret x14
endfunc
.endm
@@ -3127,7 +3127,7 @@ function inv_txfm_horz_dct_64x4_neon
cmp x7, x8
b.lt 1b
- br x14
+ ret x14
endfunc
function inv_txfm_add_vert_dct_8x64_neon
@@ -3184,7 +3184,7 @@ function inv_txfm_add_vert_dct_8x64_neon
cmp x7, x8
b.lt 1b
- br x14
+ ret x14
endfunc
function inv_txfm_add_dct_dct_64x64_16bpc_neon, export=1
@@ -3238,7 +3238,7 @@ function inv_txfm_add_dct_dct_64x64_16bpc_neon, export=1
.endr
add sp, x5, #64*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_64x32_16bpc_neon, export=1
@@ -3291,7 +3291,7 @@ function inv_txfm_add_dct_dct_64x32_16bpc_neon, export=1
.endr
add sp, x5, #64*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_32x64_16bpc_neon, export=1
@@ -3341,7 +3341,7 @@ function inv_txfm_add_dct_dct_32x64_16bpc_neon, export=1
.endr
add sp, x5, #32*32*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_64x16_16bpc_neon, export=1
@@ -3395,7 +3395,7 @@ function inv_txfm_add_dct_dct_64x16_16bpc_neon, export=1
.endr
add sp, x4, #64*16*2
- br x15
+ ret x15
endfunc
function inv_txfm_add_dct_dct_16x64_16bpc_neon, export=1
@@ -3448,5 +3448,5 @@ function inv_txfm_add_dct_dct_16x64_16bpc_neon, export=1
.endr
add sp, x5, #16*32*2
- br x15
+ ret x15
endfunc
diff --git a/src/arm/64/loopfilter.S b/src/arm/64/loopfilter.S
index 1d426f1..2b9b5c4 100644
--- a/src/arm/64/loopfilter.S
+++ b/src/arm/64/loopfilter.S
@@ -478,16 +478,16 @@ function lpf_16_wd\wd\()_neon
.if \wd == 16
7:
// Return to a shorter epilogue, writing only the inner 6 pixels
- br x13
+ ret x13
.endif
.if \wd >= 8
8:
// Return to a shorter epilogue, writing only the inner 4 pixels
- br x14
+ ret x14
.endif
9:
// Return directly without writing back any pixels
- br x15
+ ret x15
endfunc
.endm
@@ -532,7 +532,7 @@ function lpf_v_4_16_neon
st1 {v23.16b}, [x16], x1 // p0
st1 {v25.16b}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_4_16_neon
@@ -583,7 +583,7 @@ function lpf_h_4_16_neon
st1 {v25.s}[1], [x16], x1
st1 {v25.s}[3], [x0], x1
add x0, x0, #2
- br x15
+ ret x15
endfunc
function lpf_v_6_16_neon
@@ -607,7 +607,7 @@ function lpf_v_6_16_neon
st1 {v23.16b}, [x16], x1 // p0
st1 {v25.16b}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_6_16_neon
@@ -658,7 +658,7 @@ function lpf_h_6_16_neon
st1 {v25.s}[1], [x16], x1
st1 {v25.s}[3], [x0], x1
add x0, x0, #2
- br x15
+ ret x15
endfunc
function lpf_v_8_16_neon
@@ -686,7 +686,7 @@ function lpf_v_8_16_neon
st1 {v26.16b}, [x0], x1 // q2
sub x0, x0, x1, lsl #1
sub x0, x0, x1
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #1
@@ -695,7 +695,7 @@ function lpf_v_8_16_neon
st1 {v23.16b}, [x16], x1 // p0
st1 {v25.16b}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_8_16_neon
@@ -746,7 +746,7 @@ function lpf_h_8_16_neon
st1 {v27.d}[0], [x16], x1
st1 {v27.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #4
sub x16, x16, #2
@@ -770,7 +770,7 @@ function lpf_h_8_16_neon
st1 {v25.s}[1], [x16], x1
st1 {v25.s}[3], [x0], x1
add x0, x0, #2
- br x15
+ ret x15
endfunc
function lpf_v_16_16_neon
@@ -813,7 +813,7 @@ function lpf_v_16_16_neon
st1 {v11.16b}, [x0], x1 // q5
sub x0, x0, x1, lsl #2
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
7:
sub x16, x0, x1
sub x16, x16, x1, lsl #1
@@ -825,7 +825,7 @@ function lpf_v_16_16_neon
st1 {v26.16b}, [x0], x1 // q2
sub x0, x0, x1, lsl #1
sub x0, x0, x1
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #1
@@ -834,7 +834,7 @@ function lpf_v_16_16_neon
st1 {v23.16b}, [x16], x1 // p0
st1 {v25.16b}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_16_16_neon
@@ -916,7 +916,7 @@ function lpf_h_16_16_neon
st1 {v30.d}[1], [x0], x1
st1 {v5.d}[1], [x16], x1
st1 {v31.d}[1], [x0], x1
- br x15
+ ret x15
7:
sub x16, x0, x1, lsl #4
@@ -941,7 +941,7 @@ function lpf_h_16_16_neon
st1 {v27.d}[0], [x16], x1
st1 {v27.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #4
sub x16, x16, #2
@@ -965,7 +965,7 @@ function lpf_h_16_16_neon
st1 {v25.s}[1], [x16], x1
st1 {v25.s}[3], [x0], x1
add x0, x0, #2
- br x15
+ ret x15
endfunc
// void dav1d_lpf_v_sb_y_8bpc_neon(pixel *dst, const ptrdiff_t stride,
@@ -1096,7 +1096,7 @@ function lpf_\dir\()_sb_\type\()_8bpc_neon, export=1
ldp d12, d13, [sp, #0x20]
ldp d10, d11, [sp, #0x10]
ldp d8, d9, [sp], 0x40
- br x11
+ ret x11
endfunc
.endm
diff --git a/src/arm/64/loopfilter16.S b/src/arm/64/loopfilter16.S
index 8c9f98b..aab0230 100644
--- a/src/arm/64/loopfilter16.S
+++ b/src/arm/64/loopfilter16.S
@@ -364,16 +364,16 @@ function lpf_8_wd\wd\()_neon
.if \wd == 16
7:
// Return to a shorter epilogue, writing only the inner 6 pixels
- br x13
+ ret x13
.endif
.if \wd >= 8
8:
// Return to a shorter epilogue, writing only the inner 4 pixels
- br x14
+ ret x14
.endif
9:
// Return directly without writing back any pixels
- br x15
+ ret x15
endfunc
.endm
@@ -418,7 +418,7 @@ function lpf_v_4_8_neon
st1 {v23.8h}, [x16], x1 // p0
st1 {v25.8h}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_4_8_neon
@@ -453,7 +453,7 @@ function lpf_h_4_8_neon
st1 {v25.d}[0], [x16], x1
st1 {v25.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
endfunc
function lpf_v_6_8_neon
@@ -477,7 +477,7 @@ function lpf_v_6_8_neon
st1 {v23.8h}, [x16], x1 // p0
st1 {v25.8h}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_6_8_neon
@@ -512,7 +512,7 @@ function lpf_h_6_8_neon
st1 {v25.d}[0], [x16], x1
st1 {v25.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
endfunc
function lpf_v_8_8_neon
@@ -540,7 +540,7 @@ function lpf_v_8_8_neon
st1 {v26.8h}, [x0], x1 // q2
sub x0, x0, x1, lsl #1
sub x0, x0, x1
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #1
@@ -549,7 +549,7 @@ function lpf_v_8_8_neon
st1 {v23.8h}, [x16], x1 // p0
st1 {v25.8h}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_8_8_neon
@@ -584,7 +584,7 @@ function lpf_h_8_8_neon
st1 {v23.8h}, [x16], x1
st1 {v27.8h}, [x0], x1
add x0, x0, #8
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #3
sub x16, x16, #4
@@ -600,7 +600,7 @@ function lpf_h_8_8_neon
st1 {v25.d}[0], [x16], x1
st1 {v25.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
endfunc
function lpf_v_16_8_neon
@@ -643,7 +643,7 @@ function lpf_v_16_8_neon
st1 {v11.8h}, [x0], x1 // q5
sub x0, x0, x1, lsl #2
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
7:
sub x16, x0, x1
sub x16, x16, x1, lsl #1
@@ -655,7 +655,7 @@ function lpf_v_16_8_neon
st1 {v26.8h}, [x0], x1 // q2
sub x0, x0, x1, lsl #1
sub x0, x0, x1
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #1
@@ -664,7 +664,7 @@ function lpf_v_16_8_neon
st1 {v23.8h}, [x16], x1 // p0
st1 {v25.8h}, [x0], x1 // q1
sub x0, x0, x1, lsl #1
- br x15
+ ret x15
endfunc
function lpf_h_16_8_neon
@@ -714,7 +714,7 @@ function lpf_h_16_8_neon
st1 {v30.8h}, [x0], x1
st1 {v5.8h}, [x16], x1
st1 {v31.8h}, [x0], x1
- br x15
+ ret x15
7:
sub x16, x0, x1, lsl #3
@@ -731,7 +731,7 @@ function lpf_h_16_8_neon
st1 {v23.8h}, [x16], x1
st1 {v27.8h}, [x0], x1
add x0, x0, #8
- br x15
+ ret x15
8:
sub x16, x0, x1, lsl #3
sub x16, x16, #4
@@ -747,7 +747,7 @@ function lpf_h_16_8_neon
st1 {v25.d}[0], [x16], x1
st1 {v25.d}[1], [x0], x1
add x0, x0, #4
- br x15
+ ret x15
endfunc
// void dav1d_lpf_v_sb_y_16bpc_neon(pixel *dst, const ptrdiff_t stride,
@@ -892,7 +892,7 @@ function lpf_\dir\()_sb_\type\()_16bpc_neon, export=1
ldp d12, d13, [sp, #0x20]
ldp d10, d11, [sp, #0x10]
ldp d8, d9, [sp], 0x40
- br x11
+ ret x11
endfunc
.endm
diff --git a/src/arm/64/mc.S b/src/arm/64/mc.S
index ef7f23b..8a80b39 100644
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -1979,7 +1979,7 @@ L(\type\()_8tap_hv):
b 28b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_2):
ld1 {v28.8b}, [\sr2], \s_strd
@@ -2135,7 +2135,7 @@ L(\type\()_8tap_filter_2):
mov v22.8b, v29.8b
b 48b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_4):
ld1 {v26.8b}, [\sr2], \s_strd
@@ -2343,7 +2343,7 @@ L(\type\()_8tap_filter_4):
.endif
b 168b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_8_first):
ld1 {v28.8b, v29.8b}, [\src], \s_strd
@@ -3072,7 +3072,7 @@ function warp_affine_8x8\t\()_8bpc_neon, export=1
add w6, w6, w4
b.gt 1b
- br x15
+ ret x15
endfunc
.endm
diff --git a/src/arm/64/mc16.S b/src/arm/64/mc16.S
index cec82a3..05bfa39 100644
--- a/src/arm/64/mc16.S
+++ b/src/arm/64/mc16.S
@@ -2139,7 +2139,7 @@ L(\type\()_8tap_hv):
b 28b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_2):
ld1 {v25.8h}, [\sr2], \s_strd
@@ -2304,7 +2304,7 @@ L(\type\()_8tap_filter_2):
mov v22.8b, v25.8b
b 48b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_4):
ld1 {v24.8h}, [\sr2], \s_strd
@@ -2554,7 +2554,7 @@ L(\type\()_8tap_filter_4):
add \dst, \dst, #16
b 168b
0:
- br x15
+ ret x15
L(\type\()_8tap_filter_8):
ld1 {v4.8h, v5.8h}, [\sr2], \s_strd
@@ -3398,7 +3398,7 @@ function warp_affine_8x8\t\()_16bpc_neon, export=1
ldp d10, d11, [sp, #0x10]
ldp d8, d9, [sp], 0x40
- br x15
+ ret x15
endfunc
.endm