Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Conrad <david_conrad@apple.com>2022-06-08 23:50:02 +0300
committerDavid Conrad <david_conrad@apple.com>2022-09-15 03:28:22 +0300
commit1bdb776c71d615e9a1a4890bbc53f979e225e150 (patch)
treea465f7e5ab722725fe4aa36bdb73e151a933c980
parent08c708015ec372b6c28d341cba7bbc86843cc17b (diff)
Fix overflow in 8-bit NEON ADST
In 8-bit adst, it's possible that the final Round2(x[0], 12) can exceed 16-bits signed Specifically, in 7.13.2.6. Inverse ADST4 process, the precision requirement is: "It is a requirement of bitstream conformance that all values stored in the s and x arrays by this process are representable by a signed integer using r + 12 bits of precision." For 8 bits, r is 16 for both row and column, so x[] can be 28-bit signed. For values [134215680, 134217727] (within 2047 of the maximum 28-bit value), the final Round2(x[0], 12) evaluates to 32768, exceeding 16-bits signed. So switch to using sqrshrn, which saturates to 16-bits signed This is a continuation of: Commit b53ff29d80a21180e5ad9bbe39a02541151f4f53 arm: itx: Do clipping in all narrowing downshifts
-rw-r--r--src/arm/64/itx.S24
1 files changed, 12 insertions, 12 deletions
diff --git a/src/arm/64/itx.S b/src/arm/64/itx.S
index c9650e9..b1b2f8f 100644
--- a/src/arm/64/itx.S
+++ b/src/arm/64/itx.S
@@ -483,10 +483,10 @@ endfunc
add \o1\().4s, v5.4s, v7.4s
sub \o3\().4s, \o3\().4s, v7.4s
- rshrn \o0\().4h, \o0\().4s, #12
- rshrn \o2\().4h, \o2\().4s, #12
- rshrn \o1\().4h, \o1\().4s, #12
- rshrn \o3\().4h, \o3\().4s, #12
+ sqrshrn \o0\().4h, \o0\().4s, #12
+ sqrshrn \o2\().4h, \o2\().4s, #12
+ sqrshrn \o1\().4h, \o1\().4s, #12
+ sqrshrn \o3\().4h, \o3\().4s, #12
.endm
function inv_adst_4h_x4_neon, export=1
@@ -538,21 +538,21 @@ endfunc
sub v4.4s, v4.4s, v2.4s // out3
sub v5.4s, v5.4s, v3.4s
- rshrn v18.4h, v18.4s, #12
- rshrn2 v18.8h, v19.4s, #12
+ sqrshrn v18.4h, v18.4s, #12
+ sqrshrn2 v18.8h, v19.4s, #12
- rshrn \o0\().4h, v16.4s, #12
- rshrn2 \o0\().8h, v17.4s, #12
+ sqrshrn \o0\().4h, v16.4s, #12
+ sqrshrn2 \o0\().8h, v17.4s, #12
.ifc \o2, v17
mov v17.16b, v18.16b
.endif
- rshrn \o1\().4h, v6.4s, #12
- rshrn2 \o1\().8h, v7.4s, #12
+ sqrshrn \o1\().4h, v6.4s, #12
+ sqrshrn2 \o1\().8h, v7.4s, #12
- rshrn \o3\().4h, v4.4s, #12
- rshrn2 \o3\().8h, v5.4s, #12
+ sqrshrn \o3\().4h, v4.4s, #12
+ sqrshrn2 \o3\().8h, v5.4s, #12
.endm
function inv_adst_8h_x4_neon, export=1