From 93ef8e80912f4868adcda05907a070551cd0947f Mon Sep 17 00:00:00 2001
From: Hendrik Leppkes <h.leppkes@gmail.com>
Date: Sat, 19 Nov 2016 23:57:50 +0100
Subject: Support P016 as an internal pixel format

---
 decoder/LAVVideo/pixconv/yuv2rgb.cpp | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'decoder/LAVVideo/pixconv')

diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
index 901af97c..cfc6e6de 100644
--- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
@@ -38,7 +38,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
   xmm7 = _mm_setzero_si128 ();
 
   // Shift > 0 is for 9/10 bit formats
-  if (inputFormat == LAVPixFmt_P010) {
+  if (inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
     // Load 2 32-bit macro pixels from each line, which contain 4 UV at 16-bit each samples
     PIXCONV_LOAD_PIXEL8(xmm0, srcU);
     PIXCONV_LOAD_PIXEL8(xmm2, srcU+srcStrideUV);
@@ -78,8 +78,8 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
   // xmm0/xmm2 contain 4 interleaved U/V samples from two lines each in the 16bit parts, still in their native bitdepth
 
   // Chroma upsampling required
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422 || inputFormat == LAVPixFmt_P010) {
-    if (inputFormat == LAVPixFmt_P010) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_YUV422 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
+    if (inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
       srcU += 8;
       srcV += 8;
     } else if (shift > 0 || inputFormat == LAVPixFmt_NV12) {
@@ -110,7 +110,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
     }
 
     // 4:2:0 - upsample to 4:2:2 using 75:25
-    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
+    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
       // Too high bitdepth, shift down to 14-bit
       if (shift >= 7) {
         xmm0 = _mm_srli_epi16(xmm0, shift-6);
@@ -173,7 +173,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co
     // Shift the result to 12 bit
     // For 10-bit input, we need to shift one bit off, or we exceed the allowed processing depth
     // For 8-bit, we need to add one bit
-    if ((inputFormat == LAVPixFmt_YUV420 && shift > 1) || inputFormat == LAVPixFmt_P010) {
+    if ((inputFormat == LAVPixFmt_YUV420 && shift > 1) || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
       if (shift >= 5) {
         xmm1 = _mm_srli_epi16(xmm1, 4);
         xmm3 = _mm_srli_epi16(xmm3, 4);
@@ -421,7 +421,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
   _mm_sfence();
 
   // 4:2:0 needs special handling for the first and the last line
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
     if (line == 0) {
       for (ptrdiff_t i = 0; i < endx; i += 4) {
         yuv2rgb_convert_pixels<inputFormat, shift, outFmt, 0, dithertype, ycgco>(y, u, v, rgb, 0, 0, 0, line, coeffs, lineDither, i);
@@ -442,7 +442,7 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
       lineDither = dithers + (line * 24 * DITHER_STEPS);
     y = srcY + line * srcStrideY;
 
-    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
+    if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
       u = srcU + (line >> 1) * srcStrideUV;
       v = srcV + (line >> 1) * srcStrideUV;
     } else {
@@ -458,12 +458,12 @@ static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, c
     yuv2rgb_convert_pixels<inputFormat, shift, outFmt, 1, dithertype, ycgco>(y, u, v, rgb, srcStrideY, srcStrideUV, dstStride, line, coeffs, lineDither, 0);
   }
 
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || lastLineInOddHeight) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016 || lastLineInOddHeight) {
     if (sliceYEnd == height) {
       if (dithertype == LAVDither_Random)
         lineDither = dithers + ((height - 2) * 24 * DITHER_STEPS);
       y = srcY + (height - 1) * srcStrideY;
-      if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010) {
+      if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016) {
         u = srcU + ((height >> 1) - 1)  * srcStrideUV;
         v = srcV + ((height >> 1) - 1)  * srcStrideUV;
       } else {
@@ -518,8 +518,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
   if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX || inputFormat == LAVPixFmt_YUV444bX)
     inputFormat = (LAVPixelFormat)(inputFormat - 1);
 
-  // P010 has the data in the high bits, so set shift appropriately
-  if (inputFormat == LAVPixFmt_P010)
+  // P010/P016 has the data in the high bits, so set shift appropriately
+  if (inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016)
     shift = 8;
 
   YUVRGBConversionFunc convFn = m_RGBConvFuncs[outFmt][ditherMode][bYCgCo][inputFormat][shift];
@@ -532,7 +532,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
   if (m_NumThreads <= 1) {
     convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], 0, height, coeffs, dithers);
   } else {
-    const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010);
+    const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12 || inputFormat == LAVPixFmt_P010 || inputFormat == LAVPixFmt_P016);
     const ptrdiff_t lines_per_thread = (height / m_NumThreads)&~1;
 
     Concurrency::parallel_for(0, m_NumThreads, [&](int i) {
@@ -573,8 +573,9 @@ void CLAVPixFmtConverter::InitRGBConvDispatcher()
 {
   ZeroMemory(&m_RGBConvFuncs, sizeof(m_RGBConvFuncs));
 
-  CONV_FUNC(LAVPixFmt_NV12,   0);
-  CONV_FUNC(LAVPixFmt_P010,   8);
+  CONV_FUNC(LAVPixFmt_NV12, 0);
+  CONV_FUNC(LAVPixFmt_P010, 8);
+  CONV_FUNC(LAVPixFmt_P016, 8);
 
   CONV_FUNCX(LAVPixFmt_YUV420);
   CONV_FUNCX(LAVPixFmt_YUV422);
-- 
cgit v1.2.3