Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.cpp10
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.h1
-rw-r--r--decoder/LAVVideo/LAVVideo.cpp2
-rw-r--r--decoder/LAVVideo/pixconv/convert_direct.cpp93
4 files changed, 103 insertions, 3 deletions
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp
index 5abf6779..c24903ac 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.cpp
+++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp
@@ -444,10 +444,15 @@ void CLAVPixFmtConverter::SelectConvertFunctionDirect()
convert_direct = &CLAVPixFmtConverter::convert_p010_nv12_direct_sse4;
else if (cpu & AV_CPU_FLAG_SSE2)
convert_direct = &CLAVPixFmtConverter::convert_p010_nv12_sse2;
+ } else if (m_InputPixFmt == LAVPixFmt_NV12 && m_OutputPixFmt == LAVOutPixFmt_YV12) {
+ if (cpu & AV_CPU_FLAG_SSE4)
+ convert_direct = &CLAVPixFmtConverter::convert_nv12_yv12_direct_sse4;
+ else if (cpu & AV_CPU_FLAG_SSE2)
+ convert_direct = &CLAVPixFmtConverter::convert_nv12_yv12;
}
if (convert_direct != nullptr)
- m_bDirectMode = true;
+ m_bDirectMode = TRUE;
}
HRESULT CLAVPixFmtConverter::Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, ptrdiff_t dstStride, int planeHeight) {
@@ -490,7 +495,8 @@ HRESULT CLAVPixFmtConverter::Convert(LAVFrame *pFrame, uint8_t *dst, int width,
}
BOOL CLAVPixFmtConverter::IsDirectModeSupported(uintptr_t dst, ptrdiff_t stride) {
- if (FFALIGN(stride, 16) != stride || (dst % 16u))
+ const int stride_align = (m_OutputPixFmt == LAVOutPixFmt_YV12 ? 32 : 16);
+ if (FFALIGN(stride, stride_align) != stride || (dst % 16u))
return false;
return m_bDirectMode;
}
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.h b/decoder/LAVVideo/LAVPixFmtConverter.h
index aa197f35..488c6702 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.h
+++ b/decoder/LAVVideo/LAVPixFmtConverter.h
@@ -136,6 +136,7 @@ private:
template <int out32> DECLARE_CONV_FUNC(convert_rgb48_rgb);
DECLARE_CONV_FUNC(plane_copy_direct_sse4);
+ DECLARE_CONV_FUNC(convert_nv12_yv12_direct_sse4);
DECLARE_CONV_FUNC(convert_p010_nv12_direct_sse4);
DECLARE_CONV_FUNC(convert_yuv_rgb);
diff --git a/decoder/LAVVideo/LAVVideo.cpp b/decoder/LAVVideo/LAVVideo.cpp
index 474bc69e..2f8a8ffa 100644
--- a/decoder/LAVVideo/LAVVideo.cpp
+++ b/decoder/LAVVideo/LAVVideo.cpp
@@ -687,7 +687,7 @@ HRESULT CLAVVideo::CheckDirectMode()
BOOL bDirect = (pix == LAVPixFmt_NV12 || pix == LAVPixFmt_P010);
if (pix == LAVPixFmt_NV12 && m_Decoder.IsInterlaced() && m_settings.SWDeintMode == SWDeintMode_YADIF)
bDirect = FALSE;
- else if (pix == LAVPixFmt_NV12 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_NV12)
+ else if (pix == LAVPixFmt_NV12 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_NV12 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_YV12)
bDirect = FALSE;
else if (pix == LAVPixFmt_P010 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_P010 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_P016 && m_pOutput->CurrentMediaType().subtype != MEDIASUBTYPE_NV12)
bDirect = FALSE;
diff --git a/decoder/LAVVideo/pixconv/convert_direct.cpp b/decoder/LAVVideo/pixconv/convert_direct.cpp
index b2857317..a8017e81 100644
--- a/decoder/LAVVideo/pixconv/convert_direct.cpp
+++ b/decoder/LAVVideo/pixconv/convert_direct.cpp
@@ -89,6 +89,99 @@ DECLARE_CONV_FUNC_IMPL(plane_copy_direct_sse4)
return S_OK;
}
+DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12_direct_sse4)
+{
+ const ptrdiff_t inStride = srcStride[0];
+ const ptrdiff_t outStride = dstStride[0];
+ const ptrdiff_t outChromaStride = dstStride[1];
+ const ptrdiff_t chromaHeight = (height >> 1);
+
+ const ptrdiff_t stride = min(FFALIGN(width, 64), min(inStride, outStride));
+
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm7;
+ xmm7 = _mm_set1_epi16(0x00FF);
+
+ _mm_sfence();
+
+ ptrdiff_t line, i;
+
+ for (line = 0; line < height; line++) {
+ const uint8_t *y = (src[0] + line * inStride);
+ uint8_t *dy = (dst[0] + line * outStride);
+ for (i = 0; i < (stride - 63); i += 64) {
+ PIXCONV_STREAM_LOAD(xmm0, y + i + 0);
+ PIXCONV_STREAM_LOAD(xmm1, y + i + 16);
+ PIXCONV_STREAM_LOAD(xmm2, y + i + 32);
+ PIXCONV_STREAM_LOAD(xmm3, y + i + 48);
+
+ _ReadWriteBarrier();
+
+ PIXCONV_PUT_STREAM(dy + i + 0, xmm0);
+ PIXCONV_PUT_STREAM(dy + i + 16, xmm1);
+ PIXCONV_PUT_STREAM(dy + i + 32, xmm2);
+ PIXCONV_PUT_STREAM(dy + i + 48, xmm3);
+ }
+
+ for (; i < width; i += 16) {
+ PIXCONV_LOAD_ALIGNED(xmm0, y + i);
+ PIXCONV_PUT_STREAM(dy + i, xmm0);
+ }
+ }
+
+ for (line = 0; line < chromaHeight; line++) {
+ const uint8_t *uv = (src[1] + line * inStride);
+ uint8_t *dv = (dst[1] + line * outChromaStride);
+ uint8_t *du = (dst[2] + line * outChromaStride);
+ for (i = 0; i < (stride - 63); i += 64) {
+ PIXCONV_STREAM_LOAD(xmm0, uv + i + 0);
+ PIXCONV_STREAM_LOAD(xmm1, uv + i + 16);
+ PIXCONV_STREAM_LOAD(xmm2, uv + i + 32);
+ PIXCONV_STREAM_LOAD(xmm3, uv + i + 48);
+
+ _ReadWriteBarrier();
+
+ // process first pair
+ xmm4 = _mm_srli_epi16(xmm0, 8);
+ xmm5 = _mm_srli_epi16(xmm1, 8);
+ xmm0 = _mm_and_si128(xmm0, xmm7);
+ xmm1 = _mm_and_si128(xmm1, xmm7);
+ xmm0 = _mm_packus_epi16(xmm0, xmm1);
+ xmm4 = _mm_packus_epi16(xmm4, xmm5);
+
+ PIXCONV_PUT_STREAM(du + (i >> 1) + 0, xmm0);
+ PIXCONV_PUT_STREAM(dv + (i >> 1) + 0, xmm4);
+
+ // and second pair
+ xmm4 = _mm_srli_epi16(xmm2, 8);
+ xmm5 = _mm_srli_epi16(xmm3, 8);
+ xmm2 = _mm_and_si128(xmm2, xmm7);
+ xmm3 = _mm_and_si128(xmm3, xmm7);
+ xmm2 = _mm_packus_epi16(xmm2, xmm3);
+ xmm4 = _mm_packus_epi16(xmm4, xmm5);
+
+ PIXCONV_PUT_STREAM(du + (i >> 1) + 16, xmm2);
+ PIXCONV_PUT_STREAM(dv + (i >> 1) + 16, xmm4);
+ }
+
+ for (; i < width; i += 32) {
+ PIXCONV_LOAD_ALIGNED(xmm0, uv + i);
+ PIXCONV_LOAD_ALIGNED(xmm1, uv + i);
+
+ xmm4 = _mm_srli_epi16(xmm0, 8);
+ xmm5 = _mm_srli_epi16(xmm1, 8);
+ xmm0 = _mm_and_si128(xmm0, xmm7);
+ xmm1 = _mm_and_si128(xmm1, xmm7);
+ xmm0 = _mm_packus_epi16(xmm0, xmm1);
+ xmm4 = _mm_packus_epi16(xmm4, xmm5);
+
+ PIXCONV_PUT_STREAM(du + (i >> 1), xmm0);
+ PIXCONV_PUT_STREAM(dv + (i >> 1), xmm4);
+ }
+ }
+
+ return S_OK;
+}
+
DECLARE_CONV_FUNC_IMPL(convert_p010_nv12_direct_sse4)
{
const ptrdiff_t inStride = srcStride[0];