diff options
author | David Conrad <david_conrad@apple.com> | 2022-06-08 23:50:02 +0300 |
---|---|---|
committer | David Conrad <david_conrad@apple.com> | 2022-09-15 03:28:22 +0300 |
commit | 1bdb776c71d615e9a1a4890bbc53f979e225e150 (patch) | |
tree | a465f7e5ab722725fe4aa36bdb73e151a933c980 | |
parent | 08c708015ec372b6c28d341cba7bbc86843cc17b (diff) |
Fix overflow in 8-bit NEON ADST
In 8-bit adst, it's possible that the final Round2(x[0], 12) can exceed
16-bits signed
Specifically, in 7.13.2.6. Inverse ADST4 process, the precision requirement is:
"It is a requirement of bitstream conformance that all values stored in the
s and x arrays by this process are representable by a signed integer using
r + 12 bits of precision."
For 8 bits, r is 16 for both row and column, so x[] can be 28-bit signed.
For values [134215680, 134217727] (within 2047 of the maximum 28-bit value),
the final Round2(x[0], 12) evaluates to 32768, exceeding 16-bits signed.
So switch to using sqrshrn, which saturates to 16-bits signed
This is a continuation of: Commit b53ff29d80a21180e5ad9bbe39a02541151f4f53
arm: itx: Do clipping in all narrowing downshifts
-rw-r--r-- | src/arm/64/itx.S | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/src/arm/64/itx.S b/src/arm/64/itx.S index c9650e9..b1b2f8f 100644 --- a/src/arm/64/itx.S +++ b/src/arm/64/itx.S @@ -483,10 +483,10 @@ endfunc add \o1\().4s, v5.4s, v7.4s sub \o3\().4s, \o3\().4s, v7.4s - rshrn \o0\().4h, \o0\().4s, #12 - rshrn \o2\().4h, \o2\().4s, #12 - rshrn \o1\().4h, \o1\().4s, #12 - rshrn \o3\().4h, \o3\().4s, #12 + sqrshrn \o0\().4h, \o0\().4s, #12 + sqrshrn \o2\().4h, \o2\().4s, #12 + sqrshrn \o1\().4h, \o1\().4s, #12 + sqrshrn \o3\().4h, \o3\().4s, #12 .endm function inv_adst_4h_x4_neon, export=1 @@ -538,21 +538,21 @@ endfunc sub v4.4s, v4.4s, v2.4s // out3 sub v5.4s, v5.4s, v3.4s - rshrn v18.4h, v18.4s, #12 - rshrn2 v18.8h, v19.4s, #12 + sqrshrn v18.4h, v18.4s, #12 + sqrshrn2 v18.8h, v19.4s, #12 - rshrn \o0\().4h, v16.4s, #12 - rshrn2 \o0\().8h, v17.4s, #12 + sqrshrn \o0\().4h, v16.4s, #12 + sqrshrn2 \o0\().8h, v17.4s, #12 .ifc \o2, v17 mov v17.16b, v18.16b .endif - rshrn \o1\().4h, v6.4s, #12 - rshrn2 \o1\().8h, v7.4s, #12 + sqrshrn \o1\().4h, v6.4s, #12 + sqrshrn2 \o1\().8h, v7.4s, #12 - rshrn \o3\().4h, v4.4s, #12 - rshrn2 \o3\().8h, v5.4s, #12 + sqrshrn \o3\().4h, v4.4s, #12 + sqrshrn2 \o3\().8h, v5.4s, #12 .endm function inv_adst_8h_x4_neon, export=1 |