From 036570bc1479ea3090c072a497f253df42f761e5 Mon Sep 17 00:00:00 2001 From: Hendrik Leppkes Date: Sun, 1 Feb 2015 22:28:29 +0100 Subject: Basic support for P010 as an internal pixel format --- decoder/LAVVideo/LAVPixFmtConverter.cpp | 7 +++++-- decoder/LAVVideo/decoders/ILAVDecoder.h | 1 + decoder/LAVVideo/decoders/avcodec.cpp | 2 ++ decoder/LAVVideo/decoders/pixfmt.cpp | 2 ++ decoder/LAVVideo/pixconv/yuv2rgb.cpp | 32 ++++++++++++++++++++++---------- 5 files changed, 32 insertions(+), 12 deletions(-) (limited to 'decoder') diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp index a1e18387..3cbdec6e 100644 --- a/decoder/LAVVideo/LAVPixFmtConverter.cpp +++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp @@ -41,6 +41,7 @@ * YUV444 - - - - - x x - - - - - - - - x x * YUV444bX - - - - - x x - - - x - - - x x x * NV12 x x - x x - - - - - - - - - - x x + * P010 - - - - - - - x - - - - x - - x x * YUY2 - - - - - - - - - - - - - - - - - * RGB24 - - - - - - - - - - - - - - - x - * RGB32 - - - - - - - - - - - - - - - - x @@ -78,6 +79,7 @@ static LAV_INOUT_PIXFMT_MAP lav_pixfmt_map[] = { // 4:2:0 { LAVPixFmt_YUV420, 8, { PIXOUT_420_8, PIXOUT_420_10, PIXOUT_420_16, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } }, { LAVPixFmt_NV12, 8, { PIXOUT_420_8, PIXOUT_420_10, PIXOUT_420_16, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } }, + { LAVPixFmt_P010, 10, { PIXOUT_420_10, PIXOUT_420_16, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } }, { LAVPixFmt_YUV420bX, 10, { PIXOUT_420_10, PIXOUT_420_16, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } }, { LAVPixFmt_YUV420bX, 16, { PIXOUT_420_16, PIXOUT_420_10, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } }, @@ -319,7 +321,8 @@ void CLAVPixFmtConverter::SelectConvertFunction() m_RequiredAlignment = 0; } else if ((m_OutputPixFmt == LAVOutPixFmt_RGB32 && (m_InputPixFmt == LAVPixFmt_RGB32 || m_InputPixFmt == LAVPixFmt_ARGB32)) || (m_OutputPixFmt == LAVOutPixFmt_RGB24 && m_InputPixFmt == LAVPixFmt_RGB24) || (m_OutputPixFmt == LAVOutPixFmt_RGB48 && m_InputPixFmt == LAVPixFmt_RGB48) - || (m_OutputPixFmt == LAVOutPixFmt_NV12 && m_InputPixFmt == LAVPixFmt_NV12)) { + || (m_OutputPixFmt == LAVOutPixFmt_NV12 && m_InputPixFmt == LAVPixFmt_NV12) + || ((m_OutputPixFmt == LAVOutPixFmt_P010 || m_OutputPixFmt == LAVOutPixFmt_P016) && m_InputPixFmt == LAVPixFmt_P010)) { if (cpu & AV_CPU_FLAG_SSE2) convert = &CLAVPixFmtConverter::plane_copy_sse2; else @@ -359,7 +362,7 @@ void CLAVPixFmtConverter::SelectConvertFunction() && (m_InputPixFmt == LAVPixFmt_YUV420 || m_InputPixFmt == LAVPixFmt_YUV420bX || m_InputPixFmt == LAVPixFmt_YUV422 || m_InputPixFmt == LAVPixFmt_YUV422bX || m_InputPixFmt == LAVPixFmt_YUV444 || m_InputPixFmt == LAVPixFmt_YUV444bX - || m_InputPixFmt == LAVPixFmt_NV12)) { + || m_InputPixFmt == LAVPixFmt_NV12 || m_InputPixFmt == LAVPixFmt_P010)) { convert = &CLAVPixFmtConverter::convert_yuv_rgb; if (m_OutputPixFmt == LAVOutPixFmt_RGB32) { m_RequiredAlignment = 4; diff --git a/decoder/LAVVideo/decoders/ILAVDecoder.h b/decoder/LAVVideo/decoders/ILAVDecoder.h index 93660cc3..07681826 100644 --- a/decoder/LAVVideo/decoders/ILAVDecoder.h +++ b/decoder/LAVVideo/decoders/ILAVDecoder.h @@ -45,6 +45,7 @@ typedef enum LAVPixelFormat { /* packed/half-packed YUV */ LAVPixFmt_NV12, ///< YUV 4:2:0, U/V interleaved LAVPixFmt_YUY2, ///< YUV 4:2:2, packed, YUYV order + LAVPixFmt_P010, ///< YUV 4:2:0, 10-bit, U/V interleaved /* RGB */ LAVPixFmt_RGB24, ///< RGB24, in BGR order diff --git a/decoder/LAVVideo/decoders/avcodec.cpp b/decoder/LAVVideo/decoders/avcodec.cpp index bf72aa27..6e77355e 100644 --- a/decoder/LAVVideo/decoders/avcodec.cpp +++ b/decoder/LAVVideo/decoders/avcodec.cpp @@ -236,6 +236,8 @@ static struct PixelFormatMapping { { AV_PIX_FMT_YUVJ411P, LAVPixFmt_YUV422, TRUE }, + { AV_PIX_FMT_P010LE, LAVPixFmt_P010, FALSE, 10 }, + { AV_PIX_FMT_DXVA2_VLD, LAVPixFmt_DXVA2, FALSE }, }; diff --git a/decoder/LAVVideo/decoders/pixfmt.cpp b/decoder/LAVVideo/decoders/pixfmt.cpp index caf0100f..cfd4e1d7 100644 --- a/decoder/LAVVideo/decoders/pixfmt.cpp +++ b/decoder/LAVVideo/decoders/pixfmt.cpp @@ -29,6 +29,7 @@ static LAVPixFmtDesc lav_pixfmt_desc[] = { { 2, 3, { 1, 1, 1 }, { 1, 1, 1 } }, ///< LAVPixFmt_YUV444bX { 1, 2, { 1, 1 }, { 1, 2 } }, ///< LAVPixFmt_NV12 { 2, 1, { 1 }, { 1 } }, ///< LAVPixFmt_YUY2 + { 2, 2, { 1, 1 }, { 1, 2 } }, ///< LAVPixFmt_P010 { 3, 1, { 1 }, { 1 } }, ///< LAVPixFmt_RGB24 { 4, 1, { 1 }, { 1 } }, ///< LAVPixFmt_RGB32 { 4, 1, { 1 }, { 1 } }, ///< LAVPixFmt_ARGB32 @@ -49,6 +50,7 @@ static struct { { LAVPixFmt_YUV444, AV_PIX_FMT_YUV444P }, { LAVPixFmt_NV12, AV_PIX_FMT_NV12 }, { LAVPixFmt_YUY2, AV_PIX_FMT_YUYV422 }, + { LAVPixFmt_P010, AV_PIX_FMT_P010 }, { LAVPixFmt_RGB24, AV_PIX_FMT_BGR24 }, { LAVPixFmt_RGB32, AV_PIX_FMT_BGRA }, { LAVPixFmt_ARGB32, AV_PIX_FMT_BGRA }, diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp index af124a2c..b70fb11a 100644 --- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp +++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp @@ -38,7 +38,11 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co xmm7 = _mm_setzero_si128 (); // Shift > 0 is for 9/10 bit formats - if (shift > 0) { + if (inputFormat == LAVPixFmt_P010) { + // Load 2 32-bit macro pixels from each line, which contain 4 UV at 16-bit each samples + PIXCONV_LOAD_PIXEL8(xmm0, srcU); + PIXCONV_LOAD_PIXEL8(xmm2, srcU+srcStrideUV); + } else if (shift > 0) { // Load 4 U/V values from line 0/1 into registers PIXCONV_LOAD_4PIXEL16(xmm1, srcU); PIXCONV_LOAD_4PIXEL16(xmm3, srcU+srcStrideUV); @@ -74,8 +78,11 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co // xmm0/xmm2 contain 4 interleaved U/V samples from two lines each in the 16bit parts, still in their native bitdepth // Chroma upsampling required - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422) { - if (shift > 0 || inputFormat == LAVPixFmt_NV12) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422 || inputFormat == LAVPixFmt_P010) { + if (inputFormat == LAVPixFmt_P010) { + srcU += 8; + srcV += 8; + } else if (shift > 0 || inputFormat == LAVPixFmt_NV12) { srcU += 4; srcV += 4; } else { @@ -103,7 +110,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co } // 4:2:0 - upsample to 4:2:2 using 75:25 - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) { // Too high bitdepth, shift down to 14-bit if (shift >= 7) { xmm0 = _mm_srli_epi16(xmm0, shift-6); @@ -166,7 +173,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co // Shift the result to 12 bit // For 10-bit input, we need to shift one bit off, or we exceed the allowed processing depth // For 8-bit, we need to add one bit - if (inputFormat == LAVPixFmt_YUV420 && shift > 1) { + if ((inputFormat == LAVPixFmt_YUV420 && shift > 1) || inputFormat == LAVPixFmt_P010) { if (shift >= 5) { xmm1 = _mm_srli_epi16(xmm1, 4); xmm3 = _mm_srli_epi16(xmm3, 4); @@ -411,7 +418,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c _mm_sfence(); // 4:2:0 needs special handling for the first and the last line - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) { if (line == 0) { for (ptrdiff_t i = 0; i < endx; i += 4) { yuv2rgb_convert_pixels(y, u, v, rgb, 0, 0, 0, line, coeffs, lineDither, i); @@ -432,7 +439,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c lineDither = dithers + (line * 24 * DITHER_STEPS); y = srcY + line * srcStrideY; - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) { u = srcU + (line >> 1) * srcStrideUV; v = srcV + (line >> 1) * srcStrideUV; } else { @@ -448,12 +455,12 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c yuv2rgb_convert_pixels(y, u, v, rgb, srcStrideY, srcStrideUV, dstStride, line, coeffs, lineDither, 0); } - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || lastLineInOddHeight) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || lastLineInOddHeight) { if (sliceYEnd == height) { if (dithertype == LAVDither_Random) lineDither = dithers + ((height - 2) * 24 * DITHER_STEPS); y = srcY + (height - 1) * srcStrideY; - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) { u = srcU + ((height >> 1) - 1) * srcStrideUV; v = srcV + ((height >> 1) - 1) * srcStrideUV; } else { @@ -506,6 +513,10 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb) if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX || inputFormat == LAVPixFmt_YUV444bX) inputFormat = (LAVPixelFormat)(inputFormat - 1); + // P010 has the data in the high bits, so set shift appropriately + if (inputFormat == LAVPixFmt_P010) + shift = 8; + YUVRGBConversionFunc convFn = m_RGBConvFuncs[outFmt][ditherMode][bYCgCo][inputFormat][shift]; if (convFn == nullptr) { ASSERT(0); @@ -516,7 +527,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb) if (m_NumThreads <= 1) { convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], 0, height, coeffs, dithers); } else { - const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12); + const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010); const ptrdiff_t lines_per_thread = (height / m_NumThreads)&~1; Concurrency::parallel_for(0, m_NumThreads, [&](int i) { @@ -558,6 +569,7 @@ void CLAVPixFmtConverter::InitRGBConvDispatcher() ZeroMemory(&m_RGBConvFuncs, sizeof(m_RGBConvFuncs)); CONV_FUNC(LAVPixFmt_NV12, 0); + CONV_FUNC(LAVPixFmt_P010, 8); CONV_FUNCX(LAVPixFmt_YUV420); CONV_FUNCX(LAVPixFmt_YUV422); -- cgit v1.2.3