Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/videolan/dav1d.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrik Gramner <gramner@twoorioles.com>2022-09-07 03:06:25 +0300
committerHenrik Gramner <henrik@gramner.com>2022-09-07 03:09:49 +0300
commit58b15237eef865b2673fc3eecf5a07735db32545 (patch)
tree0415ea8914888d1adff44db94fcad66334355d82
parent895fed08e1de3d0a90c9ecd588cf61d8eee8457b (diff)
x86: Fix an alignment issue in 8-bit AVX-512 loop restoration
We don't have a separate 8-bit AVX-512 5-tap Wiener filter so the 7-tap function is used for chroma as well, and in some esoteric edge cases chroma dst pointers may only have a 32-byte alignment despite having a width larger than 32, so use an unaligned store as a workaround.
-rw-r--r--src/x86/looprestoration_avx512.asm12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/x86/looprestoration_avx512.asm b/src/x86/looprestoration_avx512.asm
index 5669ce6..1e57177 100644
--- a/src/x86/looprestoration_avx512.asm
+++ b/src/x86/looprestoration_avx512.asm
@@ -329,11 +329,11 @@ ALIGN function_align
packuswb m2, m4
psrlw m2, 8
vpackuswb m2{k2}, m3, m5
- mova [dstq+r10], m2
- add r10, 64
- jl .hv_loop
- mov t6, t5
- mov t5, t4
+ movu [dstq+r10], m2 ; We don't have a separate 5-tap version so the 7-tap
+ add r10, 64 ; function is used for chroma as well, and in some
+ jl .hv_loop ; esoteric edge cases chroma dst pointers may only
+ mov t6, t5 ; have a 32-byte alignment despite having a width
+ mov t5, t4 ; larger than 32, so use an unaligned store here.
mov t4, t3
mov t3, t2
mov t2, t1
@@ -379,7 +379,7 @@ ALIGN function_align
packuswb m0, m2
psrlw m0, 8
vpackuswb m0{k2}, m1, m3
- mova [dstq+r10], m0
+ movu [dstq+r10], m0
add r10, 64
jl .v_loop
mov t6, t5