From e5580bd5127a40380e966e9da3607b7f7c98c0a9 Mon Sep 17 00:00:00 2001
From: Hendrik Leppkes <h.leppkes@gmail.com>
Date: Fri, 15 Nov 2013 09:59:16 +0100
Subject: Move plane/stride calculation out of the individual pixfmt converters

---
 decoder/LAVVideo/LAVPixFmtConverter.cpp       |  46 +++-
 decoder/LAVVideo/LAVPixFmtConverter.h         |  40 +---
 decoder/LAVVideo/pixconv/convert_generic.cpp  | 288 ++++++++++++--------------
 decoder/LAVVideo/pixconv/interleave.cpp       |   6 +-
 decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp |  10 +-
 decoder/LAVVideo/pixconv/yuv2rgb.cpp          |  10 +-
 decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp | 148 ++++++-------
 decoder/LAVVideo/pixconv/yuv420_yuy2.cpp      |   6 +-
 decoder/LAVVideo/pixconv/yuv444_ayuv.cpp      |   8 +-
 9 files changed, 265 insertions(+), 297 deletions(-)

(limited to 'decoder/LAVVideo')

diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp
index 5c690f98..69ed713d 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.cpp
+++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp
@@ -407,6 +407,41 @@ void CLAVPixFmtConverter::SelectConvertFunction()
   }
 }
 
+HRESULT CLAVPixFmtConverter::Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) {
+  uint8_t *out = dst;
+  int outStride = dstStride, i;
+  // Check if we have proper pixel alignment and the dst memory is actually aligned
+  if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) {
+    outStride = FFALIGN(dstStride, m_RequiredAlignment);
+    size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3;
+    if (requiredSize > m_nAlignedBufferSize) {
+      DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride));
+      av_freep(&m_pAlignedBuffer);
+      m_nAlignedBufferSize = requiredSize;
+      m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE);
+    }
+    out = m_pAlignedBuffer;
+  }
+
+  uint8_t *dstArray[4] = {0};
+  int dstStrideArray[4] = {0};
+  int byteStride = outStride * lav_pixfmt_desc[m_OutputPixFmt].codedbytes;
+
+  dstArray[0] = out;
+  dstStrideArray[0] = byteStride;
+
+  for (i = 1; i < lav_pixfmt_desc[m_OutputPixFmt].planes; ++i) {
+    dstArray[i] = dstArray[i-1] + dstStrideArray[i-1] * (height / lav_pixfmt_desc[m_OutputPixFmt].planeHeight[i-1]);
+    dstStrideArray[i] = byteStride / lav_pixfmt_desc[m_OutputPixFmt].planeWidth[i];
+  }
+
+  HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, dstArray, dstStrideArray, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt);
+  if (out != dst) {
+    ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt);
+  }
+  return hr;
+}
+
 DECLARE_CONV_FUNC_IMPL(plane_copy)
 {
   LAVOutPixFmtDesc desc = lav_pixfmt_desc[outputFormat];
@@ -414,18 +449,17 @@ DECLARE_CONV_FUNC_IMPL(plane_copy)
   int plane, line;
 
   const int widthBytes = width * desc.codedbytes;
-  const int dstStrideBytes = dstStride * desc.codedbytes;
   const int planes = max(desc.planes, 1);
 
   for (plane = 0; plane < planes; plane++) {
-    const int planeWidth     = widthBytes     / desc.planeWidth[plane];
-    const int planeHeight    = height         / desc.planeHeight[plane];
-    const int dstPlaneStride = dstStrideBytes / desc.planeWidth[plane];
+    const int planeWidth     = widthBytes / desc.planeWidth[plane];
+    const int planeHeight    = height     / desc.planeHeight[plane];
     const uint8_t *srcBuf = src[plane];
+    uint8_t *dstBuf = dst[plane];
     for (line = 0; line < planeHeight; ++line) {
-      memcpy(dst, srcBuf, planeWidth);
+      memcpy(dstBuf, srcBuf, planeWidth);
       srcBuf += srcStride[plane];
-      dst += dstPlaneStride;
+      dstBuf += dstStride[plane];
     }
   }
 
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.h b/decoder/LAVVideo/LAVPixFmtConverter.h
index 5a3d88b2..b0faff7d 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.h
+++ b/decoder/LAVVideo/LAVPixFmtConverter.h
@@ -22,7 +22,7 @@
 #include "LAVVideoSettings.h"
 #include "decoders/ILAVDecoder.h"
 
-#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int dstStride, int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat)
+#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[4], int dstStride[4], int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat)
 
 #define DECLARE_CONV_FUNC(name) \
   HRESULT name CONV_FUNC_PARAMS
@@ -73,27 +73,7 @@ public:
   void GetMediaType(CMediaType *mt, int index, LONG biWidth, LONG biHeight, DWORD dwAspectX, DWORD dwAspectY, REFERENCE_TIME rtAvgTime, BOOL bInterlaced = TRUE, BOOL bVIH1 = FALSE);
   BOOL IsAllowedSubtype(const GUID *guid);
 
-  inline HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) {
-    uint8_t *out = dst;
-    int outStride = dstStride;
-    // Check if we have proper pixel alignment and the dst memory is actually aligned
-    if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) {
-      outStride = FFALIGN(dstStride, m_RequiredAlignment);
-      size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3;
-      if (requiredSize > m_nAlignedBufferSize) {
-        DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride));
-        av_freep(&m_pAlignedBuffer);
-        m_nAlignedBufferSize = requiredSize;
-        m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE);
-      }
-      out = m_pAlignedBuffer;
-    }
-    HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, out, outStride, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt);
-    if (out != dst) {
-      ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt);
-    }
-    return hr;
-  }
+  HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride);
 
   BOOL IsRGBConverterActive() { return m_bRGBConverter; }
 
@@ -108,14 +88,14 @@ private:
   void SelectConvertFunction();
 
   // Helper functions for convert_generic
-  HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false);
-  HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
-  HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
-  HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride, int chromaVertical);
-  HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
-  HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
-  HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
-  HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
+  HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false);
+  HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+  HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+  HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4], int chromaVertical);
+  HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+  HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+  HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+  HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
 
   void DestroySWScale() { if (m_pSwsContext) sws_freeContext(m_pSwsContext); m_pSwsContext = NULL; if (m_rgbCoeffs) _aligned_free(m_rgbCoeffs); m_rgbCoeffs = NULL; if (m_pRandomDithers) _aligned_free(m_pRandomDithers); m_pRandomDithers = NULL; };
   SwsContext *GetSWSContext(int width, int height, enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, int flags);
diff --git a/decoder/LAVVideo/pixconv/convert_generic.cpp b/decoder/LAVVideo/pixconv/convert_generic.cpp
index 96c38051..4046ac81 100644
--- a/decoder/LAVVideo/pixconv/convert_generic.cpp
+++ b/decoder/LAVVideo/pixconv/convert_generic.cpp
@@ -67,10 +67,10 @@ DECLARE_CONV_FUNC_IMPL(convert_generic)
     hr = ConvertToY416(src, srcStride, dst, width, height, dstStride);
     break;
   case LAVOutPixFmt_RGB32:
-    hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride * 4, lav_pixfmt_desc[m_OutputPixFmt]);
+    hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]);
     break;
   case LAVOutPixFmt_RGB24:
-    hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride * 3, lav_pixfmt_desc[m_OutputPixFmt]);
+    hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]);
     break;
   case LAVOutPixFmt_v210:
     hr = ConvertTov210(src, srcStride, dst, width, height, dstStride);
@@ -85,7 +85,7 @@ DECLARE_CONV_FUNC_IMPL(convert_generic)
     hr = swscale_scale(inputFmt, AV_PIX_FMT_YUV444P, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true);
     break;
   case LAVOutPixFmt_RGB48:
-    hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride * 6, lav_pixfmt_desc[m_OutputPixFmt], true);
+    hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true);
     break;
   default:
     ASSERT(0);
@@ -137,27 +137,15 @@ inline SwsContext *CLAVPixFmtConverter::GetSWSContext(int width, int height, enu
   return m_pSwsContext;
 }
 
-HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12)
+HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12)
 {
-  uint8_t *dst[4];
-  int     dstStride[4];
-  int     i, ret;
+  int ret;
 
   SwsContext *ctx = GetSWSContext(width, height, srcPix, dstPix, SWS_BILINEAR);
   CheckPointer(m_pSwsContext, E_POINTER);
 
-  memset(dst, 0, sizeof(dst));
-  memset(dstStride, 0, sizeof(dstStride));
-
-  dst[0] = pOut;
-  dstStride[0] = stride;
-  for (i = 1; i < pixFmtDesc.planes; ++i) {
-    dst[i] = dst[i-1] + (stride / pixFmtDesc.planeWidth[i-1]) * (height / pixFmtDesc.planeHeight[i-1]);
-    dstStride[i] = stride / pixFmtDesc.planeWidth[i];
-  }
-
   if (swapPlanes12) {
-    BYTE *tmp = dst[1];
+    uint8_t *tmp = dst[1];
     dst[1] = dst[2];
     dst[2] = tmp;
   }
@@ -166,7 +154,7 @@ HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPix
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const BYTE *y = NULL;
   const BYTE *u = NULL;
@@ -176,28 +164,28 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV422) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride);
-    dst[2] = dst[1] + (height * scaleStride / 2);
-    dst[3] = NULL;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride);
+    tmp[2] = tmp[1] + (height * scaleStride / 2);
+    tmp[3] = NULL;
 
-    dstStride[0] = scaleStride;
-    dstStride[1] = scaleStride / 2;
-    dstStride[2] = scaleStride / 2;
-    dstStride[3] = 0;
+    tmpStride[0] = scaleStride;
+    tmpStride[1] = scaleStride / 2;
+    tmpStride[2] = scaleStride / 2;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P, SWS_FAST_BILINEAR);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = dst[0];
-    u = dst[1];
-    v = dst[2];
+    y = tmp[0];
+    u = tmp[1];
+    v = tmp[2];
     sourceStride = scaleStride;
   }  else {
     y = src[0];
@@ -206,12 +194,10 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
     sourceStride = srcStride[0];
   }
 
-  dstStride <<= 1;
-
 #define YUV422_PACK_YUY2(offset) *idst++ = y[(i+offset) * 2] | (u[i+offset] << 8) | (y[(i+offset) * 2 + 1] << 16) | (v[i+offset] << 24);
 #define YUV422_PACK_UYVY(offset) *idst++ = u[i+offset] | (y[(i+offset) * 2] << 8) | (v[i+offset] << 16) | (y[(i+offset) * 2 + 1] << 24);
 
-  BYTE *out = pOut;
+  uint8_t *out = dst[0];
   int halfwidth = width >> 1;
   int halfstride = sourceStride >> 1;
 
@@ -234,7 +220,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
       y += sourceStride;
       u += halfstride;
       v += halfstride;
-      out += dstStride;
+      out += dstStride[0];
     }
   } else {
     for (line = 0; line < height; ++line) {
@@ -255,7 +241,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
       y += sourceStride;
       u += halfstride;
       v += halfstride;
-      out += dstStride;
+      out += dstStride[0];
     }
   }
 
@@ -264,7 +250,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const BYTE *y = NULL;
   const BYTE *u = NULL;
@@ -274,27 +260,27 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV444) {
-    uint8_t *dst[4] = {NULL};
-    int     swStride[4] = {0};
-    int scaleStride = FFALIGN(dstStride, 32);
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
+    int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 3);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride);
-    dst[2] = dst[1] + (height * scaleStride);
-    dst[3] = NULL;
-    swStride[0] = scaleStride;
-    swStride[1] = scaleStride;
-    swStride[2] = scaleStride;
-    swStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride);
+    tmp[2] = tmp[1] + (height * scaleStride);
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride;
+    tmpStride[1] = scaleStride;
+    tmpStride[2] = scaleStride;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, swStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = dst[0];
-    u = dst[1];
-    v = dst[2];
+    y = tmp[0];
+    u = tmp[1];
+    v = tmp[2];
     sourceStride = scaleStride;
   } else {
     y = src[0];
@@ -305,7 +291,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
 
 #define YUV444_PACK_AYUV(offset) *idst++ = v[i+offset] | (u[i+offset] << 8) | (y[i+offset] << 16) | (0xff << 24);
 
-  BYTE *out = pOut;
+  BYTE *out = dst[0];
   for (line = 0; line < height; ++line) {
     int32_t *idst = (int32_t *)out;
     for (i = 0; i < (width-7); i+=8) {
@@ -324,7 +310,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
     y += sourceStride;
     u += sourceStride;
     v += sourceStride;
-    out += dstStride << 2;
+    out += dstStride[0];
   }
 
   av_freep(&pTmpBuffer);
@@ -332,7 +318,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride, int chromaVertical)
+HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[], int chromaVertical)
 {
   const BYTE *y = NULL;
   const BYTE *u = NULL;
@@ -342,33 +328,30 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
 
   int shift = 0;
 
-  // Stride needs to be doubled for 16-bit per pixel
-  dstStride <<= 1;
-
   BYTE *pTmpBuffer = NULL;
 
   if ((chromaVertical == 1 && m_InputPixFmt != LAVPixFmt_YUV422bX) || (chromaVertical == 2 && m_InputPixFmt != LAVPixFmt_YUV420bX)) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32) * 2;
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride);
-    dst[2] = dst[1] + ((height / chromaVertical) * (scaleStride / 2));
-    dst[3] = NULL;
-    dstStride[0] = scaleStride;
-    dstStride[1] = scaleStride / 2;
-    dstStride[2] = scaleStride / 2;
-    dstStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride);
+    tmp[2] = tmp[1] + ((height / chromaVertical) * (scaleStride / 2));
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride;
+    tmpStride[1] = scaleStride / 2;
+    tmpStride[2] = scaleStride / 2;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), chromaVertical == 1 ? AV_PIX_FMT_YUV422P16LE : AV_PIX_FMT_YUV420P16LE, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = dst[0];
-    u = dst[1];
-    v = dst[2];
+    y = tmp[0];
+    u = tmp[1];
+    v = tmp[2];
     sourceStride = scaleStride;
   } else {
     y = src[0];
@@ -380,7 +363,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
   }
 
   // copy Y
-  BYTE *pLineOut = pOut;
+  BYTE *pLineOut = dst[0];
   const BYTE *pLineIn = y;
   for (line = 0; line < height; ++line) {
     if (shift == 0) {
@@ -394,14 +377,14 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
         *idst++ = yv;
       }
     }
-    pLineOut += dstStride;
+    pLineOut += dstStride[0];
     pLineIn += sourceStride;
   }
 
   sourceStride >>= 2;
 
   // Merge U/V
-  BYTE *out = pLineOut;
+  BYTE *out = dst[1];
   const int16_t *uc = (int16_t *)u;
   const int16_t *vc = (int16_t *)v;
   for (line = 0; line < height/chromaVertical; ++line) {
@@ -417,7 +400,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
     }
     uc += sourceStride;
     vc += sourceStride;
-    out += dstStride;
+    out += dstStride[1];
   }
 
   av_freep(&pTmpBuffer);
@@ -443,7 +426,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
     out += dstStride; \
   }
 
-HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const int16_t *y = NULL;
   const int16_t *u = NULL;
@@ -454,27 +437,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride * 2);
-    dst[2] = dst[1] + (height * scaleStride * 2);
-    dst[3] = NULL;
-    dstStride[0] = scaleStride * 2;
-    dstStride[1] = scaleStride * 2;
-    dstStride[2] = scaleStride * 2;
-    dstStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride * 2);
+    tmp[2] = tmp[1] + (height * scaleStride * 2);
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride * 2;
+    tmpStride[1] = scaleStride * 2;
+    tmpStride[2] = scaleStride * 2;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = (int16_t *)dst[0];
-    u = (int16_t *)dst[1];
-    v = (int16_t *)dst[2];
+    y = (int16_t *)tmp[0];
+    u = (int16_t *)tmp[1];
+    v = (int16_t *)tmp[2];
     sourceStride = scaleStride;
   } else {
     y = (int16_t *)src[0];
@@ -485,13 +468,10 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
     b9Bit = (m_InBpp == 9);
   }
 
-  // 32-bit per pixel
-  dstStride *= 4;
-
 #define YUV444_Y410_PACK \
   *idst++ = (uv & 0x3FF) | ((yv & 0x3FF) << 10) | ((vv & 0x3FF) << 20) | (3 << 30);
 
-  BYTE *out = pOut;
+  BYTE *out = dst[0];
   YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
     if (b9Bit) {
       yv <<= 1;
@@ -499,14 +479,14 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
       vv <<= 1;
     }
     YUV444_Y410_PACK
-  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
 
   av_freep(&pTmpBuffer);
 
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const int16_t *y = NULL;
   const int16_t *u = NULL;
@@ -516,27 +496,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp != 16) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride * 2);
-    dst[2] = dst[1] + (height * scaleStride * 2);
-    dst[3] = NULL;
-    dstStride[0] = scaleStride * 2;
-    dstStride[1] = scaleStride * 2;
-    dstStride[2] = scaleStride * 2;
-    dstStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride * 2);
+    tmp[2] = tmp[1] + (height * scaleStride * 2);
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride * 2;
+    tmpStride[1] = scaleStride * 2;
+    tmpStride[2] = scaleStride * 2;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P16LE, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = (int16_t *)dst[0];
-    u = (int16_t *)dst[1];
-    v = (int16_t *)dst[2];
+    y = (int16_t *)tmp[0];
+    u = (int16_t *)tmp[1];
+    v = (int16_t *)tmp[2];
     sourceStride = scaleStride;
   } else {
     y = (int16_t *)src[0];
@@ -545,24 +525,21 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in
     sourceStride = srcStride[0] / 2;
   }
 
-  // 64-bit per pixel
-  dstStride <<= 3;
-
 #define YUV444_Y416_PACK \
   *idst++ = 0xFFFF | (vv << 16); \
   *idst++ = yv | (uv << 16);
 
-  BYTE *out = pOut;
+  BYTE *out = dst[0];
   YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
     YUV444_Y416_PACK
-  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
 
   av_freep(&pTmpBuffer);
 
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const int16_t *y = NULL;
   const int16_t *u = NULL;
@@ -573,27 +550,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV422bX || m_InBpp != 10) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride * 2);
-    dst[2] = dst[1] + (height * scaleStride * 2);
-    dst[3] = NULL;
-    dstStride[0] = scaleStride * 2;
-    dstStride[1] = scaleStride;
-    dstStride[2] = scaleStride;
-    dstStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride * 2);
+    tmp[2] = tmp[1] + (height * scaleStride * 2);
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride * 2;
+    tmpStride[1] = scaleStride;
+    tmpStride[2] = scaleStride;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P10LE, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = (int16_t *)dst[0];
-    u = (int16_t *)dst[1];
-    v = (int16_t *)dst[2];
+    y = (int16_t *)tmp[0];
+    u = (int16_t *)tmp[1];
+    v = (int16_t *)tmp[2];
     srcyStride = scaleStride;
     srcuvStride = scaleStride >> 1;
   } else {
@@ -604,10 +581,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
     srcuvStride = srcStride[1] >> 1;
   }
 
-  // 32-bit per pixel
-  dstStride = ((dstStride + 47) / 48) * 128;
+  // Calculate v210 stride
+  int outStride = (((dstStride[0] >> 2) + 47) / 48) * 128;
 
-  BYTE *pdst = pOut;
+  BYTE *pdst = dst[0];
   int32_t *p = (int32_t *)pdst;
   int w;
 
@@ -644,7 +621,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
       }
     }
 
-    pdst += dstStride;
+    pdst += outStride;
     memset(p, 0, pdst - (BYTE *)p);
     p = (int32_t *)pdst;
     y += srcyStride - width;
@@ -656,7 +633,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
   return S_OK;
 }
 
-HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
 {
   const int16_t *y = NULL;
   const int16_t *u = NULL;
@@ -667,27 +644,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
   BYTE *pTmpBuffer = NULL;
 
   if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) {
-    uint8_t *dst[4] = {NULL};
-    int     dstStride[4] = {0};
+    uint8_t *tmp[4] = {NULL};
+    int     tmpStride[4] = {0};
     int scaleStride = FFALIGN(width, 32);
 
     pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
 
-    dst[0] = pTmpBuffer;
-    dst[1] = dst[0] + (height * scaleStride * 2);
-    dst[2] = dst[1] + (height * scaleStride * 2);
-    dst[3] = NULL;
-    dstStride[0] = scaleStride * 2;
-    dstStride[1] = scaleStride * 2;
-    dstStride[2] = scaleStride * 2;
-    dstStride[3] = 0;
+    tmp[0] = pTmpBuffer;
+    tmp[1] = tmp[0] + (height * scaleStride * 2);
+    tmp[2] = tmp[1] + (height * scaleStride * 2);
+    tmp[3] = NULL;
+    tmpStride[0] = scaleStride * 2;
+    tmpStride[1] = scaleStride * 2;
+    tmpStride[2] = scaleStride * 2;
+    tmpStride[3] = 0;
 
     SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT);
-    sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+    sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
 
-    y = (int16_t *)dst[0];
-    u = (int16_t *)dst[1];
-    v = (int16_t *)dst[2];
+    y = (int16_t *)tmp[0];
+    u = (int16_t *)tmp[1];
+    v = (int16_t *)tmp[2];
     sourceStride = scaleStride;
   } else {
     y = (int16_t *)src[0];
@@ -698,13 +675,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
     b9Bit = (m_InBpp == 9);
   }
 
-  // 32-bit per pixel
-  dstStride *= 4;
-
 #define YUV444_v410_PACK \
   *idst++ = ((uv & 0x3FF) << 2) | ((yv & 0x3FF) << 12) | ((vv & 0x3FF) << 22);
 
-  BYTE *out = pOut;
+  BYTE *out = dst[0];
   YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
     if (b9Bit) {
       yv <<= 1;
@@ -712,7 +686,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
       vv <<= 1;
     }
     YUV444_v410_PACK
-  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+  YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
 
   av_freep(&pTmpBuffer);
 
diff --git a/decoder/LAVVideo/pixconv/interleave.cpp b/decoder/LAVVideo/pixconv/interleave.cpp
index b9c132fa..699d157e 100644
--- a/decoder/LAVVideo/pixconv/interleave.cpp
+++ b/decoder/LAVVideo/pixconv/interleave.cpp
@@ -30,8 +30,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410)
   const uint16_t *u = (const uint16_t *)src[1];
   const uint16_t *v = (const uint16_t *)src[2];
 
-  ptrdiff_t inStride = srcStride[0] >> 1;
-  ptrdiff_t outStride = dstStride << 2;
+  const ptrdiff_t inStride = srcStride[0] >> 1;
+  const ptrdiff_t outStride = dstStride[0];
   int shift = 10 - bpp;
 
   ptrdiff_t line, i;
@@ -44,7 +44,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410)
   _mm_sfence();
 
   for (line = 0; line < height; ++line) {
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     for (i = 0; i < width; i+=8) {
       PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (y+i));
diff --git a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
index 47a75a91..b7a0c16d 100644
--- a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
@@ -28,7 +28,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3)
 {
   const uint16_t *rgb = (const uint16_t *)src[0];
   const ptrdiff_t inStride = srcStride[0] >> 1;
-  const ptrdiff_t outStride = dstStride * 4;
+  const ptrdiff_t outStride = dstStride[0];
   ptrdiff_t line, i;
 
   int processWidth = width * 3;
@@ -43,7 +43,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3)
 
   _mm_sfence();
   for (line = 0; line < height; line++) {
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     // Load dithering coefficients for this line
     if (ditherMode == LAVDither_Random) {
@@ -96,7 +96,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
   // Dither to RGB24/32 with SSE2
   const uint16_t *rgb = (const uint16_t *)dstBS[0];
   const ptrdiff_t inStride = srcStride[0] >> 1;
-  const ptrdiff_t outStride = dstStride * (out32 ? 4 : 3);
+  const ptrdiff_t outStride = dstStride[0];
   ptrdiff_t line, i;
   int processWidth = width * 3;
 
@@ -117,7 +117,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
     if (out32) {
       dst128 = (__m128i *)rgb24buffer;
     } else {
-      dst128 = (__m128i *)(dst + line * outStride);
+      dst128 = (__m128i *)(dst[0] + line * outStride);
     }
 
     // Load dithering coefficients for this line
@@ -143,7 +143,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
     rgb += inStride;
     if (out32) {
       uint32_t *src24 = (uint32_t *)rgb24buffer;
-      uint32_t *dst32 = (uint32_t *)(dst + line * outStride);
+      uint32_t *dst32 = (uint32_t *)(dst[0] + line * outStride);
       for (i = 0; i < width; i += 4) {
         uint32_t sa = src24[0];
         uint32_t sb = src24[1];
diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
index 5e68dc2f..4a5003ce 100644
--- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
@@ -407,8 +407,6 @@ static int __stdcall yuv2rgb_process_lines(const uint8_t *srcY, const uint8_t *s
   const uint8_t *v = srcV;
   uint8_t *rgb = dst;
 
-  dstStride *= (3 + out32);
-
   ptrdiff_t line = sliceYStart;
   ptrdiff_t lastLine = sliceYEnd;
 
@@ -577,15 +575,15 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
   const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 3, 4, 0) : NULL;
   if (ditherMode == LAVDither_Random && dithers != NULL) {
     if (m_ColorProps.VideoTransferMatrix == 7) {
-      yuv2rgb_dispatch<out32, 1, 1>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
+      yuv2rgb_dispatch<out32, 1, 1>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
     } else {
-      yuv2rgb_dispatch<out32, 1, 0>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
+      yuv2rgb_dispatch<out32, 1, 0>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
     }
   } else {
     if (m_ColorProps.VideoTransferMatrix == 7) {
-      yuv2rgb_dispatch<out32, 0, 1>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
+      yuv2rgb_dispatch<out32, 0, 1>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
     } else {
-      yuv2rgb_dispatch<out32, 0, 0>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
+      yuv2rgb_dispatch<out32, 0, 0>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
     }
   }
 
diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
index 6591d75d..ba116abd 100644
--- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
@@ -31,13 +31,14 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
   const uint16_t *u = (const uint16_t *)src[1];
   const uint16_t *v = (const uint16_t *)src[2];
 
-  const ptrdiff_t inYStride = srcStride[0] >> 1;
-  const ptrdiff_t inUVStride = srcStride[1] >> 1;
+  const ptrdiff_t inYStride   = srcStride[0] >> 1;
+  const ptrdiff_t inUVStride  = srcStride[1] >> 1;
 
-  ptrdiff_t outLumaStride    = dstStride;
-  ptrdiff_t outChromaStride  = dstStride;
-  ptrdiff_t chromaWidth      = width;
-  ptrdiff_t chromaHeight     = height;
+  const ptrdiff_t outYStride  = dstStride[0];
+  const ptrdiff_t outUVStride = dstStride[1];
+
+  ptrdiff_t chromaWidth       = width;
+  ptrdiff_t chromaHeight      = height;
 
   LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
   const uint16_t *dithers = GetRandomDitherCoeffs(height, 4, 8, 0);
@@ -46,19 +47,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
 
   if (inputFormat == LAVPixFmt_YUV420bX)
     chromaHeight = chromaHeight >> 1;
-  if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX) {
+  if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX)
     chromaWidth = (chromaWidth + 1) >> 1;
-    outChromaStride = outChromaStride >> 1;
-  }
 
   ptrdiff_t line, i;
 
   __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
 
-  uint8_t *dstY = dst;
-  uint8_t *dstV = dstY + outLumaStride * height;
-  uint8_t *dstU = dstV + outChromaStride * chromaHeight;
-
   _mm_sfence();
 
   // Process Y
@@ -74,7 +69,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
       xmm4 = xmm5 = xmm6 = xmm7;
     }
 
-    __m128i *dst128Y = (__m128i *)(dstY + line * outLumaStride);
+    __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride);
 
     for (i = 0; i < width; i+=32) {
       // Load pixels into registers, and apply dithering
@@ -92,9 +87,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
 
     // Process U/V for chromaHeight lines
     if (line < chromaHeight) {
-      __m128i *dst128UV = (__m128i *)(dstV + line * outLumaStride);
-      __m128i *dst128U = (__m128i *)(dstU + line * outChromaStride);
-      __m128i *dst128V = (__m128i *)(dstV + line * outChromaStride);
+      __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride);
+      __m128i *dst128U = (__m128i *)(dst[2] + line * outUVStride);
+      __m128i *dst128V = (__m128i *)(dst[1] + line * outUVStride);
 
        for (i = 0; i < chromaWidth; i+=16) {
         PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i+0), bpp);  /* U0U0U0U0 */
@@ -137,11 +132,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
   const uint16_t *u = (const uint16_t *)src[1];
   const uint16_t *v = (const uint16_t *)src[2];
 
-  const ptrdiff_t inYStride = srcStride[0] >> 1;
-  const ptrdiff_t inUVStride = srcStride[1] >> 1;
-  const ptrdiff_t outStride = dstStride << 1;
-  const ptrdiff_t uvHeight = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height;
-  const ptrdiff_t uvWidth = (width + 1) >> 1;
+  const ptrdiff_t inYStride   = srcStride[0] >> 1;
+  const ptrdiff_t inUVStride  = srcStride[1] >> 1;
+  const ptrdiff_t outYStride  = dstStride[0];
+  const ptrdiff_t outUVStride = dstStride[1];
+  const ptrdiff_t uvHeight    = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height;
+  const ptrdiff_t uvWidth     = (width + 1) >> 1;
 
   ptrdiff_t line, i;
   __m128i xmm0,xmm1,xmm2;
@@ -150,7 +146,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
 
   // Process Y
   for (line = 0; line < height; ++line) {
-    __m128i *dst128Y = (__m128i *)(dst + line * outStride);
+    __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride);
 
     for (i = 0; i < width; i+=16) {
       // Load 8 pixels into register
@@ -164,11 +160,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
     y += inYStride;
   }
 
-  BYTE *dstUV = dst + (height * outStride);
-
   // Process UV
   for (line = 0; line < uvHeight; ++line) {
-    __m128i *dst128UV = (__m128i *)(dstUV + line * outStride);
+    __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride);
 
     for (i = 0; i < uvWidth; i+=8) {
       // Load 8 pixels into register
@@ -198,23 +192,18 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
 
   const ptrdiff_t inLumaStride    = srcStride[0];
   const ptrdiff_t inChromaStride  = srcStride[1];
-  const ptrdiff_t outLumaStride   = dstStride;
-  ptrdiff_t outChromaStride = dstStride;
+
+  const ptrdiff_t outLumaStride   = dstStride[0];
+  const ptrdiff_t outChromaStride = dstStride[1];
 
   ptrdiff_t line;
-  ptrdiff_t chromaWidth  = width;
-  ptrdiff_t chromaHeight = height;
+  ptrdiff_t chromaWidth       = width;
+  ptrdiff_t chromaHeight      = height;
 
   if (inputFormat == LAVPixFmt_YUV420)
     chromaHeight = chromaHeight >> 1;
-  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422) {
+  if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422)
     chromaWidth = (chromaWidth + 1) >> 1;
-    outChromaStride = outChromaStride >> 1;
-  }
-
-  uint8_t *dstY = dst;
-  uint8_t *dstV = dstY + height * outLumaStride;
-  uint8_t *dstU = dstV + chromaHeight * outChromaStride;
 
   // Copy planes
 
@@ -223,12 +212,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
   // Y
   if ((outLumaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) {
     for(line = 0; line < height; ++line) {
-      PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width);
+      PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
       y += inLumaStride;
     }
   } else {
     for(line = 0; line < height; ++line) {
-      memcpy(dstY + outLumaStride * line, y, width);
+      memcpy(dst[0] + outLumaStride * line, y, width);
       y += inLumaStride;
     }
   }
@@ -237,16 +226,16 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
   if ((outChromaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) {
     for(line = 0; line < chromaHeight; ++line) {
       PIXCONV_MEMCPY_ALIGNED_TWO(
-        dstU + outChromaStride * line, u,
-        dstV + outChromaStride * line, v,
+        dst[2] + outChromaStride * line, u,
+        dst[1] + outChromaStride * line, v,
         chromaWidth);
       u += inChromaStride;
       v += inChromaStride;
     }
   } else {
     for(line = 0; line < chromaHeight; ++line) {
-      memcpy(dstU + outChromaStride * line, u, chromaWidth);
-      memcpy(dstV + outChromaStride * line, v, chromaWidth);
+      memcpy(dst[2] + outChromaStride * line, u, chromaWidth);
+      memcpy(dst[1] + outChromaStride * line, v, chromaWidth);
       u += inChromaStride;
       v += inChromaStride;
     }
@@ -263,14 +252,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12)
 
   const ptrdiff_t inLumaStride    = srcStride[0];
   const ptrdiff_t inChromaStride  = srcStride[1];
-  const ptrdiff_t outStride       = dstStride;
+
+  const ptrdiff_t outLumaStride   = dstStride[0];
+  const ptrdiff_t outChromaStride = dstStride[1];
 
   const ptrdiff_t chromaWidth     = (width + 1) >> 1;
   const ptrdiff_t chromaHeight    = height >> 1;
 
-  uint8_t *dstY = dst;
-  uint8_t *dstUV = dstY + height * outStride;
-
   ptrdiff_t line,i;
   __m128i xmm0,xmm1,xmm2,xmm3;
 
@@ -278,13 +266,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12)
 
   // Y
   for(line = 0; line < height; ++line) {
-    PIXCONV_MEMCPY_ALIGNED32(dstY + outStride * line, y, width);
+    PIXCONV_MEMCPY_ALIGNED32(dst[0] + outLumaStride * line, y, width);
     y += inLumaStride;
   }
 
   // U/V
   for(line = 0; line < chromaHeight; ++line) {
-    __m128i *dst128UV = (__m128i *)(dstUV + line * outStride);
+    __m128i *dst128UV = (__m128i *)(dst[1] + line * outChromaStride);
 
     for (i = 0; i < chromaWidth; i+=16) {
       PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (v+i));  /* VVVV */
@@ -313,7 +301,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy)
 
   const ptrdiff_t inLumaStride    = srcStride[0];
   const ptrdiff_t inChromaStride  = srcStride[1];
-  const ptrdiff_t outStride       = dstStride << 1;
+
+  const ptrdiff_t outStride       = dstStride[0];
 
   const ptrdiff_t chromaWidth     = (width + 1) >> 1;
 
@@ -323,7 +312,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy)
   _mm_sfence();
 
   for (line = 0;  line < height; ++line) {
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     for (i = 0; i < chromaWidth; i+=16) {
       // Load pixels
@@ -386,7 +375,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
 
   const ptrdiff_t inLumaStride    = srcStride[0] >> 1;
   const ptrdiff_t inChromaStride  = srcStride[1] >> 1;
-  const ptrdiff_t outStride       = dstStride << 1;
+  const ptrdiff_t outStride       = dstStride[0];
   const ptrdiff_t chromaWidth     = (width + 1) >> 1;
 
   LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
@@ -400,7 +389,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
   _mm_sfence();
 
   for (line = 0;  line < height; ++line) {
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     // Load dithering coefficients for this line
     if (ditherMode == LAVDither_Random) {
@@ -459,15 +448,11 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
   const uint8_t *y  = src[0];
   const uint8_t *uv = src[1];
 
-  const ptrdiff_t inStride = srcStride[0];
-  const ptrdiff_t outLumaStride = dstStride;
-  const ptrdiff_t outChromaStride = dstStride >> 1;
-
-  const ptrdiff_t chromaHeight = height >> 1;
-
-  uint8_t *dstY = dst;
-  uint8_t *dstV = dstY + height * outLumaStride;
-  uint8_t *dstU = dstV + chromaHeight * outChromaStride;
+  const ptrdiff_t inLumaStride    = srcStride[0];
+  const ptrdiff_t inChromaStride  = srcStride[1];
+  const ptrdiff_t outLumaStride   = dstStride[0];
+  const ptrdiff_t outChromaStride = dstStride[1];
+  const ptrdiff_t chromaHeight    = height >> 1;
 
   ptrdiff_t line, i;
   __m128i xmm0,xmm1,xmm2,xmm3,xmm7;
@@ -478,13 +463,13 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
 
   // Copy the y
   for (line = 0; line < height; line++) {
-    PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width);
-    y += inStride;
+    PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
+    y += inLumaStride;
   }
 
   for (line = 0; line < chromaHeight; line++) {
-    __m128i *dstV128 = (__m128i *)(dstV + outChromaStride * line);
-    __m128i *dstU128 = (__m128i *)(dstU + outChromaStride * line);
+    __m128i *dstV128 = (__m128i *)(dst[1] + outChromaStride * line);
+    __m128i *dstU128 = (__m128i *)(dst[2] + outChromaStride * line);
 
     for (i = 0; i < width; i+=32) {
       PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, uv+i+0);
@@ -505,7 +490,7 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
       _mm_stream_si128(dstU128++, xmm0);
       _mm_stream_si128(dstV128++, xmm2);
     }
-    uv += inStride;
+    uv += inChromaStride;
   }
 
   return S_OK;
@@ -516,39 +501,38 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_nv12)
   const uint8_t *y  = src[0];
   const uint8_t *uv = src[1];
 
-  const ptrdiff_t inStride = srcStride[0];
-  const ptrdiff_t outStride = dstStride;
-  const ptrdiff_t chromaHeight = (height >> 1);
-
-  uint8_t *dstY = dst;
-  uint8_t *dstUV = dstY + height * outStride;
+  const ptrdiff_t inLumaStride    = srcStride[0];
+  const ptrdiff_t inChromaStride  = srcStride[1];
+  const ptrdiff_t outLumaStride   = dstStride[0];
+  const ptrdiff_t outChromaStride = dstStride[1];
+  const ptrdiff_t chromaHeight    = height >> 1;
 
   ptrdiff_t line;
 
   _mm_sfence();
 
   // Use SSE2 copy when the stride is aligned
-  if ((outStride % 16) == 0) {
+  if ((dstStride[0] % 16) == 0) {
     // Copy the data
     for (line = 0; line < height; line++) {
-      PIXCONV_MEMCPY_ALIGNED(dstY + outStride * line, y, width);
-      y += inStride;
+      PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
+      y += inLumaStride;
     }
 
     for (line = 0; line < chromaHeight; line++) {
-      PIXCONV_MEMCPY_ALIGNED(dstUV + outStride * line, uv, width);
-      uv += inStride;
+      PIXCONV_MEMCPY_ALIGNED(dst[1] + outChromaStride * line, uv, width);
+      uv += inChromaStride;
     }
   } else {
     // Copy the data
     for (line = 0; line < height; line++) {
-      memcpy(dstY + outStride * line, y, width);
-      y += inStride;
+      memcpy(dst[0] + outLumaStride * line, y, width);
+      y += inLumaStride;
     }
 
     for (line = 0; line < chromaHeight; line++) {
-      memcpy(dstUV + outStride * line, uv, width);
-      uv += inStride;
+      memcpy(dst[1] + outChromaStride * line, uv, width);
+      uv += inChromaStride;
     }
   }
 
diff --git a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
index c5859623..869992a3 100644
--- a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
+++ b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
@@ -168,8 +168,6 @@ static int __stdcall yuv420yuy2_process_lines(const uint8_t *srcY, const uint8_t
   const uint8_t *v = srcV;
   uint8_t *yuy2 = dst;
 
-  dstStride *= 2;
-
   // Processing starts at line 1, and ends at height - 1. The first and last line have special handling
   ptrdiff_t line = 1;
   const ptrdiff_t lastLine = height - 1;
@@ -253,9 +251,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_yuy2)
   LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
   const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 2, bpp - 8 + 2, 0) : NULL;
   if (ditherMode == LAVDither_Random && dithers != NULL) {
-    yuv420yuy2_dispatch<uyvy, 1>(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, dithers);
+    yuv420yuy2_dispatch<uyvy, 1>(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], dithers);
   } else {
-    yuv420yuy2_dispatch<uyvy, 0>(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, NULL);
+    yuv420yuy2_dispatch<uyvy, 0>(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], NULL);
   }
 
   return S_OK;
diff --git a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
index a84e2940..40d7ea16 100644
--- a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
+++ b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
@@ -39,7 +39,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv)
   const uint8_t *v = (const uint8_t *)src[2];
 
   const ptrdiff_t inStride = srcStride[0];
-  const ptrdiff_t outStride = dstStride << 2;
+  const ptrdiff_t outStride = dstStride[0];
 
   ptrdiff_t line, i;
 
@@ -50,7 +50,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv)
   _mm_sfence();
 
   for (line = 0; line < height; ++line) {
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     for (i = 0; i < width; i+=16) {
       // Load pixels into registers
@@ -95,7 +95,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
   const uint16_t *v = (const uint16_t *)src[2];
 
   const ptrdiff_t inStride = srcStride[0] >> 1;
-  const ptrdiff_t outStride = dstStride << 2;
+  const ptrdiff_t outStride = dstStride[0];
 
   LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
   const uint16_t *dithers = GetRandomDitherCoeffs(height, 3, 8, 0);
@@ -121,7 +121,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
       xmm4 = xmm5 = xmm6;
     }
 
-    __m128i *dst128 = (__m128i *)(dst + line * outStride);
+    __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
 
     for (i = 0; i < width; i+=8) {
       // Load pixels into registers, and apply dithering
-- 
cgit v1.2.3