From 036570bc1479ea3090c072a497f253df42f761e5 Mon Sep 17 00:00:00 2001
From: Hendrik Leppkes <h.leppkes@gmail.com>
Date: Sun, 1 Feb 2015 22:28:29 +0100
Subject: Basic support for P010 as an internal pixel format

---
 decoder/LAVVideo/LAVPixFmtConverter.cpp |  7 +++++--
 decoder/LAVVideo/decoders/ILAVDecoder.h |  1 +
 decoder/LAVVideo/decoders/avcodec.cpp   |  2 ++
 decoder/LAVVideo/decoders/pixfmt.cpp    |  2 ++
 decoder/LAVVideo/pixconv/yuv2rgb.cpp    | 32 ++++++++++++++++++++++----------
 5 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'decoder')

diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp
index a1e18387..3cbdec6e 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.cpp
+++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp
@@ -41,6 +41,7 @@
  * YUV444         -       -       -       -       -       x       x       -       -       -       -       -       -       -       -      x       x
  * YUV444bX       -       -       -       -       -       x       x       -       -       -       x       -       -       -       x      x       x
  * NV12           x       x       -       x       x       -       -       -       -       -       -       -       -       -       -      x       x
+ * P010           -       -       -       -       -       -       -       x       -       -       -       -       x       -       -      x       x
  * YUY2           -       -       -       -       -       -       -       -       -       -       -       -       -       -       -      -       -
  * RGB24          -       -       -       -       -       -       -       -       -       -       -       -       -       -       -      x       -
  * RGB32          -       -       -       -       -       -       -       -       -       -       -       -       -       -       -      -       x
@@ -78,6 +79,7 @@ static LAV_INOUT_PIXFMT_MAP lav_pixfmt_map[] = {
   // 4:2:0
   { LAVPixFmt_YUV420, 8,    { PIXOUT_420_8, PIXOUT_420_10, PIXOUT_420_16, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } },
   { LAVPixFmt_NV12,   8,    { PIXOUT_420_8, PIXOUT_420_10, PIXOUT_420_16, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } },
+  { LAVPixFmt_P010,   10,   { PIXOUT_420_10, PIXOUT_420_16, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } },
 
   { LAVPixFmt_YUV420bX, 10, { PIXOUT_420_10, PIXOUT_420_16, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } },
   { LAVPixFmt_YUV420bX, 16, { PIXOUT_420_16, PIXOUT_420_10, PIXOUT_420_8, PIXOUT_422_16, PIXOUT_422_10, PIXOUT_422_8, PIXOUT_RGB_8, PIXOUT_RGB_16, PIXOUT_444_16, PIXOUT_444_10, PIXOUT_444_8 } },
@@ -319,7 +321,8 @@ void CLAVPixFmtConverter::SelectConvertFunction()
     m_RequiredAlignment = 0;
   } else if ((m_OutputPixFmt == LAVOutPixFmt_RGB32 && (m_InputPixFmt == LAVPixFmt_RGB32 || m_InputPixFmt == LAVPixFmt_ARGB32))
     || (m_OutputPixFmt == LAVOutPixFmt_RGB24 && m_InputPixFmt == LAVPixFmt_RGB24) || (m_OutputPixFmt == LAVOutPixFmt_RGB48 && m_InputPixFmt == LAVPixFmt_RGB48)
-    || (m_OutputPixFmt == LAVOutPixFmt_NV12 && m_InputPixFmt == LAVPixFmt_NV12)) {
+    || (m_OutputPixFmt == LAVOutPixFmt_NV12 && m_InputPixFmt == LAVPixFmt_NV12)
+    || ((m_OutputPixFmt == LAVOutPixFmt_P010 || m_OutputPixFmt == LAVOutPixFmt_P016) && m_InputPixFmt == LAVPixFmt_P010)) {
     if (cpu & AV_CPU_FLAG_SSE2)
       convert = &CLAVPixFmtConverter::plane_copy_sse2;
     else
@@ -359,7 +362,7 @@ void CLAVPixFmtConverter::SelectConvertFunction()
             && (m_InputPixFmt == LAVPixFmt_YUV420 || m_InputPixFmt == LAVPixFmt_YUV420bX
              || m_InputPixFmt == LAVPixFmt_YUV422 || m_InputPixFmt == LAVPixFmt_YUV422bX
              || m_InputPixFmt == LAVPixFmt_YUV444 || m_InputPixFmt == LAVPixFmt_YUV444bX
-             || m_InputPixFmt == LAVPixFmt_NV12)) {
+             || m_InputPixFmt == LAVPixFmt_NV12   || m_InputPixFmt == LAVPixFmt_P010)) {
       convert = &CLAVPixFmtConverter::convert_yuv_rgb;
       if (m_OutputPixFmt == LAVOutPixFmt_RGB32) {
         m_RequiredAlignment = 4;
diff --git a/decoder/LAVVideo/decoders/ILAVDecoder.h b/decoder/LAVVideo/decoders/ILAVDecoder.h
index 93660cc3..07681826 100644
--- a/decoder/LAVVideo/decoders/ILAVDecoder.h
+++ b/decoder/LAVVideo/decoders/ILAVDecoder.h
@@ -45,6 +45,7 @@ typedef enum LAVPixelFormat {
   /* packed/half-packed YUV */
   LAVPixFmt_NV12,        ///< YUV 4:2:0, U/V interleaved
   LAVPixFmt_YUY2,        ///< YUV 4:2:2, packed, YUYV order
+  LAVPixFmt_P010,        ///< YUV 4:2:0, 10-bit, U/V interleaved
 
   /* RGB */
   LAVPixFmt_RGB24,       ///< RGB24, in BGR order
diff --git a/decoder/LAVVideo/decoders/avcodec.cpp b/decoder/LAVVideo/decoders/avcodec.cpp
index bf72aa27..6e77355e 100644
--- a/decoder/LAVVideo/decoders/avcodec.cpp
+++ b/decoder/LAVVideo/decoders/avcodec.cpp
@@ -236,6 +236,8 @@ static struct PixelFormatMapping {
 
   { AV_PIX_FMT_YUVJ411P,  LAVPixFmt_YUV422, TRUE },
 
+  { AV_PIX_FMT_P010LE, LAVPixFmt_P010, FALSE, 10 },
+
   { AV_PIX_FMT_DXVA2_VLD, LAVPixFmt_DXVA2, FALSE },
 };
 
diff --git a/decoder/LAVVideo/decoders/pixfmt.cpp b/decoder/LAVVideo/decoders/pixfmt.cpp
index caf0100f..cfd4e1d7 100644
--- a/decoder/LAVVideo/decoders/pixfmt.cpp
+++ b/decoder/LAVVideo/decoders/pixfmt.cpp
@@ -29,6 +29,7 @@ static LAVPixFmtDesc lav_pixfmt_desc[] = {
   { 2, 3, { 1, 1, 1 }, { 1, 1, 1 } },       ///< LAVPixFmt_YUV444bX
   { 1, 2, { 1, 1 },    { 1, 2 }    },       ///< LAVPixFmt_NV12
   { 2, 1, { 1 },       { 1 }       },       ///< LAVPixFmt_YUY2
+  { 2, 2, { 1, 1 },    { 1, 2 }    },       ///< LAVPixFmt_P010
   { 3, 1, { 1 },       { 1 }       },       ///< LAVPixFmt_RGB24
   { 4, 1, { 1 },       { 1 }       },       ///< LAVPixFmt_RGB32
   { 4, 1, { 1 },       { 1 }       },       ///< LAVPixFmt_ARGB32
@@ -49,6 +50,7 @@ static struct {
   { LAVPixFmt_YUV444, AV_PIX_FMT_YUV444P },
   { LAVPixFmt_NV12,   AV_PIX_FMT_NV12    },
   { LAVPixFmt_YUY2,   AV_PIX_FMT_YUYV422 },
+  { LAVPixFmt_P010,   AV_PIX_FMT_P010    },
   { LAVPixFmt_RGB24,  AV_PIX_FMT_BGR24   },
   { LAVPixFmt_RGB32,  AV_PIX_FMT_BGRA    },
   { LAVPixFmt_ARGB32, AV_PIX_FMT_BGRA    },
diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
index af124a2c..b70fb11a 100644
--- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
@@ -38,7 +38,11 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
   xmm7 = _mm_setzero_si128 ();
 
   // Shift > 0 is for 9/10 bit formats
-  if (shift > 0) {
+  if (inputFormat == LAVPixFmt_P010) {
+    // Load 2 32-bit macro pixels from each line, which contain 4 UV at 16-bit each samples
+    PIXCONV_LOAD_PIXEL8(xmm0, srcU);
+    PIXCONV_LOAD_PIXEL8(xmm2, srcU+srcStrideUV);
+  } else if (shift > 0) {
     // Load 4 U/V values from line 0/1 into registers
     PIXCONV_LOAD_4PIXEL16(xmm1, srcU);
     PIXCONV_LOAD_4PIXEL16(xmm3, srcU+srcStrideUV);
@@ -74,8 +78,11 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
   // xmm0/xmm2 contain 4 interleaved U/V samples from two lines each in the 16bit parts, still in their native bitdepth
 
   // Chroma upsampling required
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422) {
-    if (shift > 0 || inputFormat == LAVPixFmt_NV12) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422 || inputFormat == LAVPixFmt_P010) {
+    if (inputFormat == LAVPixFmt_P010) {
+      srcU += 8;
+      srcV += 8;
+    } else if (shift > 0 || inputFormat == LAVPixFmt_NV12) {
       srcU += 4;
       srcV += 4;
     } else {
@@ -103,7 +110,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
     }
 
     // 4:2:0 - upsample to 4:2:2 using 75:25
-    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) {
+    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
       // Too high bitdepth, shift down to 14-bit
       if (shift >= 7) {
         xmm0 = _mm_srli_epi16(xmm0, shift-6);
@@ -166,7 +173,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
     // Shift the result to 12 bit
     // For 10-bit input, we need to shift one bit off, or we exceed the allowed processing depth
     // For 8-bit, we need to add one bit
-    if (inputFormat == LAVPixFmt_YUV420 && shift > 1) {
+    if ((inputFormat == LAVPixFmt_YUV420 && shift > 1) || inputFormat == LAVPixFmt_P010) {
       if (shift >= 5) {
         xmm1 = _mm_srli_epi16(xmm1, 4);
         xmm3 = _mm_srli_epi16(xmm3, 4);
@@ -411,7 +418,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
   _mm_sfence();
 
   // 4:2:0 needs special handling for the first and the last line
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
     if (line == 0) {
       for (ptrdiff_t i = 0; i < endx; i += 4) {
         yuv2rgb_convert_pixels<inputFormat, shift, outFmt, 0, dithertype, ycgco>(y, u, v, rgb, 0, 0, 0, line, coeffs, lineDither, i);
@@ -432,7 +439,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
       lineDither = dithers + (line * 24 * DITHER_STEPS);
     y = srcY + line * srcStrideY;
 
-    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) {
+    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
       u = srcU + (line >> 1) * srcStrideUV;
       v = srcV + (line >> 1) * srcStrideUV;
     } else {
@@ -448,12 +455,12 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
     yuv2rgb_convert_pixels<inputFormat, shift, outFmt, 1, dithertype, ycgco>(y, u, v, rgb, srcStrideY, srcStrideUV, dstStride, line, coeffs, lineDither, 0);
   }
 
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || lastLineInOddHeight) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || lastLineInOddHeight) {
     if (sliceYEnd == height) {
       if (dithertype == LAVDither_Random)
         lineDither = dithers + ((height - 2) * 24 * DITHER_STEPS);
       y = srcY + (height - 1) * srcStrideY;
-      if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12) {
+      if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
         u = srcU + ((height >> 1) - 1)  * srcStrideUV;
         v = srcV + ((height >> 1) - 1)  * srcStrideUV;
       } else {
@@ -506,6 +513,10 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
   if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX || inputFormat == LAVPixFmt_YUV444bX)
     inputFormat = (LAVPixelFormat)(inputFormat - 1);
 
+  // P010 has the data in the high bits, so set shift appropriately
+  if (inputFormat == LAVPixFmt_P010)
+    shift = 8;
+
   YUVRGBConversionFunc convFn = m_RGBConvFuncs[outFmt][ditherMode][bYCgCo][inputFormat][shift];
   if (convFn == nullptr) {
     ASSERT(0);
@@ -516,7 +527,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
   if (m_NumThreads <= 1) {
     convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], 0, height, coeffs, dithers);
   } else {
-    const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12);
+    const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010);
     const ptrdiff_t lines_per_thread = (height / m_NumThreads)&~1;
 
     Concurrency::parallel_for(0, m_NumThreads, [&](int i) {
@@ -558,6 +569,7 @@ void CLAVPixFmtConverter::InitRGBConvDispatcher()
   ZeroMemory(&m_RGBConvFuncs, sizeof(m_RGBConvFuncs));
 
   CONV_FUNC(LAVPixFmt_NV12,   0);
+  CONV_FUNC(LAVPixFmt_P010,   8);
 
   CONV_FUNCX(LAVPixFmt_YUV420);
   CONV_FUNCX(LAVPixFmt_YUV422);
-- 
cgit v1.2.3