Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Mundt <tmundt75@gmail.com>2017-09-19 23:23:23 +0300
committerJames Almer <jamrial@gmail.com>2017-09-23 22:19:58 +0300
commit40bfaa190c61b6eeff1b76b767c12edd6609967d (patch)
tree533340612ea536e60bd9189fb110772e4513a49a /libavfilter/x86
parent58ca446672fec10e851b820ce7df64bd2d1f3a70 (diff)
avfilter/interlace: add support for 10 and 12 bit
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Thomas Mundt <tmundt75@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/vf_interlace.asm80
-rw-r--r--libavfilter/x86/vf_interlace_init.c51
-rw-r--r--libavfilter/x86/vf_tinterlace_init.c51
3 files changed, 147 insertions, 35 deletions
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index d0fffd293b..7c0065d4d9 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -30,27 +30,26 @@ pw_4: times 8 dw 4
SECTION .text
-%macro LOWPASS_LINE 0
-cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
+%macro LOWPASS 1
add dstq, hq
add srcq, hq
add mrefq, srcq
add prefq, srcq
neg hq
- pcmpeqb m6, m6
+ pcmpeq%1 m6, m6
.loop:
mova m0, [mrefq+hq]
mova m1, [mrefq+hq+mmsize]
- pavgb m0, [prefq+hq]
- pavgb m1, [prefq+hq+mmsize]
+ pavg%1 m0, [prefq+hq]
+ pavg%1 m1, [prefq+hq+mmsize]
pxor m0, m6
pxor m1, m6
pxor m2, m6, [srcq+hq]
pxor m3, m6, [srcq+hq+mmsize]
- pavgb m0, m2
- pavgb m1, m3
+ pavg%1 m0, m2
+ pavg%1 m1, m3
pxor m0, m6
pxor m1, m6
mova [dstq+hq], m0
@@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
add hq, 2*mmsize
jl .loop
REP_RET
+%endmacro
+
+%macro LOWPASS_LINE 0
+cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
+ LOWPASS b
+cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref
+ shl hq, 1
+ LOWPASS w
%endmacro
%macro LOWPASS_LINE_COMPLEX 0
@@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
jg .loop
REP_RET
+cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
+ movd m7, DWORD clip_maxm
+ SPLATW m7, m7, 0
+ mova [rsp], m7
+.loop:
+ mova m0, [srcq+mrefq]
+ mova m1, [srcq+mrefq+mmsize]
+ mova m2, [srcq+prefq]
+ mova m3, [srcq+prefq+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ mova m6, m0
+ mova m7, m1
+ mova m2, [srcq]
+ mova m3, [srcq+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ psllw m2, 1
+ psllw m3, 1
+ paddw m0, m2
+ paddw m1, m3
+ psllw m0, 1
+ psllw m1, 1
+ pcmpgtw m6, m2
+ pcmpgtw m7, m3
+ mova m2, [srcq+2*mrefq]
+ mova m3, [srcq+2*mrefq+mmsize]
+ mova m4, [srcq+2*prefq]
+ mova m5, [srcq+2*prefq+mmsize]
+ paddw m2, m4
+ paddw m3, m5
+ paddw m0, [pw_4]
+ paddw m1, [pw_4]
+ psubusw m0, m2
+ psubusw m1, m3
+ psrlw m0, 3
+ psrlw m1, 3
+ pminsw m0, [rsp]
+ pminsw m1, [rsp]
+ mova m2, m0
+ mova m3, m1
+ pmaxsw m0, [srcq]
+ pmaxsw m1, [srcq+mmsize]
+ pminsw m2, [srcq]
+ pminsw m3, [srcq+mmsize]
+ pand m0, m6
+ pand m1, m7
+ pandn m6, m2
+ pandn m7, m3
+ por m0, m6
+ por m1, m7
+ mova [dstq], m0
+ mova [dstq+mmsize], m1
+
+ add dstq, 2*mmsize
+ add srcq, 2*mmsize
+ sub hd, mmsize
+ jg .loop
+REP_RET
%endmacro
INIT_XMM sse2
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
index c0f04dcd97..70fe86ccff 100644
--- a/libavfilter/x86/vf_interlace_init.c
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -27,27 +27,50 @@
#include "libavfilter/interlace.h"
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
av_cold void ff_interlace_init_x86(InterlaceContext *s)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSE2(cpu_flags)) {
- if (s->lowpass == VLPF_LIN)
- s->lowpass_line = ff_lowpass_line_sse2;
- else if (s->lowpass == VLPF_CMP)
- s->lowpass_line = ff_lowpass_line_complex_sse2;
+ if (s->csp->comp[0].depth > 8) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_16_sse2;
+ else if (s->lowpass == VLPF_CMP)
+ s->lowpass_line = ff_lowpass_line_complex_12_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_16_avx;
+ } else {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_sse2;
+ else if (s->lowpass == VLPF_CMP)
+ s->lowpass_line = ff_lowpass_line_complex_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_avx;
}
- if (EXTERNAL_AVX(cpu_flags))
- if (s->lowpass == VLPF_LIN)
- s->lowpass_line = ff_lowpass_line_avx;
}
diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c
index 2b10e1b74c..209812964d 100644
--- a/libavfilter/x86/vf_tinterlace_init.c
+++ b/libavfilter/x86/vf_tinterlace_init.c
@@ -28,27 +28,50 @@
#include "libavfilter/tinterlace.h"
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSE2(cpu_flags)) {
- if (!(s->flags & TINTERLACE_FLAG_CVLPF))
- s->lowpass_line = ff_lowpass_line_sse2;
- else
- s->lowpass_line = ff_lowpass_line_complex_sse2;
+ if (s->csp->comp[0].depth > 8) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_16_sse2;
+ else
+ s->lowpass_line = ff_lowpass_line_complex_12_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_16_avx;
+ } else {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_sse2;
+ else
+ s->lowpass_line = ff_lowpass_line_complex_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_avx;
}
- if (EXTERNAL_AVX(cpu_flags))
- if (!(s->flags & TINTERLACE_FLAG_CVLPF))
- s->lowpass_line = ff_lowpass_line_avx;
}