Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2013-11-15 12:59:16 +0400
committerHendrik Leppkes <h.leppkes@gmail.com>2013-11-15 13:12:58 +0400
commite5580bd5127a40380e966e9da3607b7f7c98c0a9 (patch)
tree2d5df536e81f8dfc150621bfacbcd367f9765ae9 /decoder/LAVVideo
parent73db2eca710995b9c7c8df19559f2744ffab1ff1 (diff)
Move plane/stride calculation out of the individual pixfmt converters
Diffstat (limited to 'decoder/LAVVideo')
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.cpp46
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.h40
-rw-r--r--decoder/LAVVideo/pixconv/convert_generic.cpp288
-rw-r--r--decoder/LAVVideo/pixconv/interleave.cpp6
-rw-r--r--decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp10
-rw-r--r--decoder/LAVVideo/pixconv/yuv2rgb.cpp10
-rw-r--r--decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp148
-rw-r--r--decoder/LAVVideo/pixconv/yuv420_yuy2.cpp6
-rw-r--r--decoder/LAVVideo/pixconv/yuv444_ayuv.cpp8
9 files changed, 265 insertions, 297 deletions
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp
index 5c690f98..69ed713d 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.cpp
+++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp
@@ -407,6 +407,41 @@ void CLAVPixFmtConverter::SelectConvertFunction()
}
}
+HRESULT CLAVPixFmtConverter::Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) {
+ uint8_t *out = dst;
+ int outStride = dstStride, i;
+ // Check if we have proper pixel alignment and the dst memory is actually aligned
+ if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) {
+ outStride = FFALIGN(dstStride, m_RequiredAlignment);
+ size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3;
+ if (requiredSize > m_nAlignedBufferSize) {
+ DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride));
+ av_freep(&m_pAlignedBuffer);
+ m_nAlignedBufferSize = requiredSize;
+ m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE);
+ }
+ out = m_pAlignedBuffer;
+ }
+
+ uint8_t *dstArray[4] = {0};
+ int dstStrideArray[4] = {0};
+ int byteStride = outStride * lav_pixfmt_desc[m_OutputPixFmt].codedbytes;
+
+ dstArray[0] = out;
+ dstStrideArray[0] = byteStride;
+
+ for (i = 1; i < lav_pixfmt_desc[m_OutputPixFmt].planes; ++i) {
+ dstArray[i] = dstArray[i-1] + dstStrideArray[i-1] * (height / lav_pixfmt_desc[m_OutputPixFmt].planeHeight[i-1]);
+ dstStrideArray[i] = byteStride / lav_pixfmt_desc[m_OutputPixFmt].planeWidth[i];
+ }
+
+ HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, dstArray, dstStrideArray, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt);
+ if (out != dst) {
+ ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt);
+ }
+ return hr;
+}
+
DECLARE_CONV_FUNC_IMPL(plane_copy)
{
LAVOutPixFmtDesc desc = lav_pixfmt_desc[outputFormat];
@@ -414,18 +449,17 @@ DECLARE_CONV_FUNC_IMPL(plane_copy)
int plane, line;
const int widthBytes = width * desc.codedbytes;
- const int dstStrideBytes = dstStride * desc.codedbytes;
const int planes = max(desc.planes, 1);
for (plane = 0; plane < planes; plane++) {
- const int planeWidth = widthBytes / desc.planeWidth[plane];
- const int planeHeight = height / desc.planeHeight[plane];
- const int dstPlaneStride = dstStrideBytes / desc.planeWidth[plane];
+ const int planeWidth = widthBytes / desc.planeWidth[plane];
+ const int planeHeight = height / desc.planeHeight[plane];
const uint8_t *srcBuf = src[plane];
+ uint8_t *dstBuf = dst[plane];
for (line = 0; line < planeHeight; ++line) {
- memcpy(dst, srcBuf, planeWidth);
+ memcpy(dstBuf, srcBuf, planeWidth);
srcBuf += srcStride[plane];
- dst += dstPlaneStride;
+ dstBuf += dstStride[plane];
}
}
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.h b/decoder/LAVVideo/LAVPixFmtConverter.h
index 5a3d88b2..b0faff7d 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.h
+++ b/decoder/LAVVideo/LAVPixFmtConverter.h
@@ -22,7 +22,7 @@
#include "LAVVideoSettings.h"
#include "decoders/ILAVDecoder.h"
-#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int dstStride, int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat)
+#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[4], int dstStride[4], int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat)
#define DECLARE_CONV_FUNC(name) \
HRESULT name CONV_FUNC_PARAMS
@@ -73,27 +73,7 @@ public:
void GetMediaType(CMediaType *mt, int index, LONG biWidth, LONG biHeight, DWORD dwAspectX, DWORD dwAspectY, REFERENCE_TIME rtAvgTime, BOOL bInterlaced = TRUE, BOOL bVIH1 = FALSE);
BOOL IsAllowedSubtype(const GUID *guid);
- inline HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) {
- uint8_t *out = dst;
- int outStride = dstStride;
- // Check if we have proper pixel alignment and the dst memory is actually aligned
- if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) {
- outStride = FFALIGN(dstStride, m_RequiredAlignment);
- size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3;
- if (requiredSize > m_nAlignedBufferSize) {
- DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride));
- av_freep(&m_pAlignedBuffer);
- m_nAlignedBufferSize = requiredSize;
- m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE);
- }
- out = m_pAlignedBuffer;
- }
- HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, out, outStride, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt);
- if (out != dst) {
- ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt);
- }
- return hr;
- }
+ HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride);
BOOL IsRGBConverterActive() { return m_bRGBConverter; }
@@ -108,14 +88,14 @@ private:
void SelectConvertFunction();
// Helper functions for convert_generic
- HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false);
- HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
- HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
- HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride, int chromaVertical);
- HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
- HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
- HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
- HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride);
+ HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false);
+ HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+ HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+ HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4], int chromaVertical);
+ HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+ HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+ HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
+ HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]);
void DestroySWScale() { if (m_pSwsContext) sws_freeContext(m_pSwsContext); m_pSwsContext = NULL; if (m_rgbCoeffs) _aligned_free(m_rgbCoeffs); m_rgbCoeffs = NULL; if (m_pRandomDithers) _aligned_free(m_pRandomDithers); m_pRandomDithers = NULL; };
SwsContext *GetSWSContext(int width, int height, enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, int flags);
diff --git a/decoder/LAVVideo/pixconv/convert_generic.cpp b/decoder/LAVVideo/pixconv/convert_generic.cpp
index 96c38051..4046ac81 100644
--- a/decoder/LAVVideo/pixconv/convert_generic.cpp
+++ b/decoder/LAVVideo/pixconv/convert_generic.cpp
@@ -67,10 +67,10 @@ DECLARE_CONV_FUNC_IMPL(convert_generic)
hr = ConvertToY416(src, srcStride, dst, width, height, dstStride);
break;
case LAVOutPixFmt_RGB32:
- hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride * 4, lav_pixfmt_desc[m_OutputPixFmt]);
+ hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]);
break;
case LAVOutPixFmt_RGB24:
- hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride * 3, lav_pixfmt_desc[m_OutputPixFmt]);
+ hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]);
break;
case LAVOutPixFmt_v210:
hr = ConvertTov210(src, srcStride, dst, width, height, dstStride);
@@ -85,7 +85,7 @@ DECLARE_CONV_FUNC_IMPL(convert_generic)
hr = swscale_scale(inputFmt, AV_PIX_FMT_YUV444P, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true);
break;
case LAVOutPixFmt_RGB48:
- hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride * 6, lav_pixfmt_desc[m_OutputPixFmt], true);
+ hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true);
break;
default:
ASSERT(0);
@@ -137,27 +137,15 @@ inline SwsContext *CLAVPixFmtConverter::GetSWSContext(int width, int height, enu
return m_pSwsContext;
}
-HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12)
+HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12)
{
- uint8_t *dst[4];
- int dstStride[4];
- int i, ret;
+ int ret;
SwsContext *ctx = GetSWSContext(width, height, srcPix, dstPix, SWS_BILINEAR);
CheckPointer(m_pSwsContext, E_POINTER);
- memset(dst, 0, sizeof(dst));
- memset(dstStride, 0, sizeof(dstStride));
-
- dst[0] = pOut;
- dstStride[0] = stride;
- for (i = 1; i < pixFmtDesc.planes; ++i) {
- dst[i] = dst[i-1] + (stride / pixFmtDesc.planeWidth[i-1]) * (height / pixFmtDesc.planeHeight[i-1]);
- dstStride[i] = stride / pixFmtDesc.planeWidth[i];
- }
-
if (swapPlanes12) {
- BYTE *tmp = dst[1];
+ uint8_t *tmp = dst[1];
dst[1] = dst[2];
dst[2] = tmp;
}
@@ -166,7 +154,7 @@ HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPix
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const BYTE *y = NULL;
const BYTE *u = NULL;
@@ -176,28 +164,28 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV422) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride);
- dst[2] = dst[1] + (height * scaleStride / 2);
- dst[3] = NULL;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride);
+ tmp[2] = tmp[1] + (height * scaleStride / 2);
+ tmp[3] = NULL;
- dstStride[0] = scaleStride;
- dstStride[1] = scaleStride / 2;
- dstStride[2] = scaleStride / 2;
- dstStride[3] = 0;
+ tmpStride[0] = scaleStride;
+ tmpStride[1] = scaleStride / 2;
+ tmpStride[2] = scaleStride / 2;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P, SWS_FAST_BILINEAR);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = dst[0];
- u = dst[1];
- v = dst[2];
+ y = tmp[0];
+ u = tmp[1];
+ v = tmp[2];
sourceStride = scaleStride;
} else {
y = src[0];
@@ -206,12 +194,10 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
sourceStride = srcStride[0];
}
- dstStride <<= 1;
-
#define YUV422_PACK_YUY2(offset) *idst++ = y[(i+offset) * 2] | (u[i+offset] << 8) | (y[(i+offset) * 2 + 1] << 16) | (v[i+offset] << 24);
#define YUV422_PACK_UYVY(offset) *idst++ = u[i+offset] | (y[(i+offset) * 2] << 8) | (v[i+offset] << 16) | (y[(i+offset) * 2 + 1] << 24);
- BYTE *out = pOut;
+ uint8_t *out = dst[0];
int halfwidth = width >> 1;
int halfstride = sourceStride >> 1;
@@ -234,7 +220,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
y += sourceStride;
u += halfstride;
v += halfstride;
- out += dstStride;
+ out += dstStride[0];
}
} else {
for (line = 0; line < height; ++line) {
@@ -255,7 +241,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
y += sourceStride;
u += halfstride;
v += halfstride;
- out += dstStride;
+ out += dstStride[0];
}
}
@@ -264,7 +250,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const BYTE *y = NULL;
const BYTE *u = NULL;
@@ -274,27 +260,27 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV444) {
- uint8_t *dst[4] = {NULL};
- int swStride[4] = {0};
- int scaleStride = FFALIGN(dstStride, 32);
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
+ int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 3);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride);
- dst[2] = dst[1] + (height * scaleStride);
- dst[3] = NULL;
- swStride[0] = scaleStride;
- swStride[1] = scaleStride;
- swStride[2] = scaleStride;
- swStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride);
+ tmp[2] = tmp[1] + (height * scaleStride);
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride;
+ tmpStride[1] = scaleStride;
+ tmpStride[2] = scaleStride;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, swStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = dst[0];
- u = dst[1];
- v = dst[2];
+ y = tmp[0];
+ u = tmp[1];
+ v = tmp[2];
sourceStride = scaleStride;
} else {
y = src[0];
@@ -305,7 +291,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
#define YUV444_PACK_AYUV(offset) *idst++ = v[i+offset] | (u[i+offset] << 8) | (y[i+offset] << 16) | (0xff << 24);
- BYTE *out = pOut;
+ BYTE *out = dst[0];
for (line = 0; line < height; ++line) {
int32_t *idst = (int32_t *)out;
for (i = 0; i < (width-7); i+=8) {
@@ -324,7 +310,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
y += sourceStride;
u += sourceStride;
v += sourceStride;
- out += dstStride << 2;
+ out += dstStride[0];
}
av_freep(&pTmpBuffer);
@@ -332,7 +318,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride, int chromaVertical)
+HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[], int chromaVertical)
{
const BYTE *y = NULL;
const BYTE *u = NULL;
@@ -342,33 +328,30 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
int shift = 0;
- // Stride needs to be doubled for 16-bit per pixel
- dstStride <<= 1;
-
BYTE *pTmpBuffer = NULL;
if ((chromaVertical == 1 && m_InputPixFmt != LAVPixFmt_YUV422bX) || (chromaVertical == 2 && m_InputPixFmt != LAVPixFmt_YUV420bX)) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32) * 2;
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride);
- dst[2] = dst[1] + ((height / chromaVertical) * (scaleStride / 2));
- dst[3] = NULL;
- dstStride[0] = scaleStride;
- dstStride[1] = scaleStride / 2;
- dstStride[2] = scaleStride / 2;
- dstStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride);
+ tmp[2] = tmp[1] + ((height / chromaVertical) * (scaleStride / 2));
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride;
+ tmpStride[1] = scaleStride / 2;
+ tmpStride[2] = scaleStride / 2;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), chromaVertical == 1 ? AV_PIX_FMT_YUV422P16LE : AV_PIX_FMT_YUV420P16LE, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = dst[0];
- u = dst[1];
- v = dst[2];
+ y = tmp[0];
+ u = tmp[1];
+ v = tmp[2];
sourceStride = scaleStride;
} else {
y = src[0];
@@ -380,7 +363,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
}
// copy Y
- BYTE *pLineOut = pOut;
+ BYTE *pLineOut = dst[0];
const BYTE *pLineIn = y;
for (line = 0; line < height; ++line) {
if (shift == 0) {
@@ -394,14 +377,14 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
*idst++ = yv;
}
}
- pLineOut += dstStride;
+ pLineOut += dstStride[0];
pLineIn += sourceStride;
}
sourceStride >>= 2;
// Merge U/V
- BYTE *out = pLineOut;
+ BYTE *out = dst[1];
const int16_t *uc = (int16_t *)u;
const int16_t *vc = (int16_t *)v;
for (line = 0; line < height/chromaVertical; ++line) {
@@ -417,7 +400,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
}
uc += sourceStride;
vc += sourceStride;
- out += dstStride;
+ out += dstStride[1];
}
av_freep(&pTmpBuffer);
@@ -443,7 +426,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in
out += dstStride; \
}
-HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const int16_t *y = NULL;
const int16_t *u = NULL;
@@ -454,27 +437,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride * 2);
- dst[2] = dst[1] + (height * scaleStride * 2);
- dst[3] = NULL;
- dstStride[0] = scaleStride * 2;
- dstStride[1] = scaleStride * 2;
- dstStride[2] = scaleStride * 2;
- dstStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride * 2);
+ tmp[2] = tmp[1] + (height * scaleStride * 2);
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride * 2;
+ tmpStride[1] = scaleStride * 2;
+ tmpStride[2] = scaleStride * 2;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = (int16_t *)dst[0];
- u = (int16_t *)dst[1];
- v = (int16_t *)dst[2];
+ y = (int16_t *)tmp[0];
+ u = (int16_t *)tmp[1];
+ v = (int16_t *)tmp[2];
sourceStride = scaleStride;
} else {
y = (int16_t *)src[0];
@@ -485,13 +468,10 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
b9Bit = (m_InBpp == 9);
}
- // 32-bit per pixel
- dstStride *= 4;
-
#define YUV444_Y410_PACK \
*idst++ = (uv & 0x3FF) | ((yv & 0x3FF) << 10) | ((vv & 0x3FF) << 20) | (3 << 30);
- BYTE *out = pOut;
+ BYTE *out = dst[0];
YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
if (b9Bit) {
yv <<= 1;
@@ -499,14 +479,14 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in
vv <<= 1;
}
YUV444_Y410_PACK
- YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+ YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
av_freep(&pTmpBuffer);
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const int16_t *y = NULL;
const int16_t *u = NULL;
@@ -516,27 +496,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp != 16) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride * 2);
- dst[2] = dst[1] + (height * scaleStride * 2);
- dst[3] = NULL;
- dstStride[0] = scaleStride * 2;
- dstStride[1] = scaleStride * 2;
- dstStride[2] = scaleStride * 2;
- dstStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride * 2);
+ tmp[2] = tmp[1] + (height * scaleStride * 2);
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride * 2;
+ tmpStride[1] = scaleStride * 2;
+ tmpStride[2] = scaleStride * 2;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P16LE, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = (int16_t *)dst[0];
- u = (int16_t *)dst[1];
- v = (int16_t *)dst[2];
+ y = (int16_t *)tmp[0];
+ u = (int16_t *)tmp[1];
+ v = (int16_t *)tmp[2];
sourceStride = scaleStride;
} else {
y = (int16_t *)src[0];
@@ -545,24 +525,21 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in
sourceStride = srcStride[0] / 2;
}
- // 64-bit per pixel
- dstStride <<= 3;
-
#define YUV444_Y416_PACK \
*idst++ = 0xFFFF | (vv << 16); \
*idst++ = yv | (uv << 16);
- BYTE *out = pOut;
+ BYTE *out = dst[0];
YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
YUV444_Y416_PACK
- YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+ YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
av_freep(&pTmpBuffer);
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const int16_t *y = NULL;
const int16_t *u = NULL;
@@ -573,27 +550,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV422bX || m_InBpp != 10) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride * 2);
- dst[2] = dst[1] + (height * scaleStride * 2);
- dst[3] = NULL;
- dstStride[0] = scaleStride * 2;
- dstStride[1] = scaleStride;
- dstStride[2] = scaleStride;
- dstStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride * 2);
+ tmp[2] = tmp[1] + (height * scaleStride * 2);
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride * 2;
+ tmpStride[1] = scaleStride;
+ tmpStride[2] = scaleStride;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P10LE, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = (int16_t *)dst[0];
- u = (int16_t *)dst[1];
- v = (int16_t *)dst[2];
+ y = (int16_t *)tmp[0];
+ u = (int16_t *)tmp[1];
+ v = (int16_t *)tmp[2];
srcyStride = scaleStride;
srcuvStride = scaleStride >> 1;
} else {
@@ -604,10 +581,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
srcuvStride = srcStride[1] >> 1;
}
- // 32-bit per pixel
- dstStride = ((dstStride + 47) / 48) * 128;
+ // Calculate v210 stride
+ int outStride = (((dstStride[0] >> 2) + 47) / 48) * 128;
- BYTE *pdst = pOut;
+ BYTE *pdst = dst[0];
int32_t *p = (int32_t *)pdst;
int w;
@@ -644,7 +621,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
}
}
- pdst += dstStride;
+ pdst += outStride;
memset(p, 0, pdst - (BYTE *)p);
p = (int32_t *)pdst;
y += srcyStride - width;
@@ -656,7 +633,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in
return S_OK;
}
-HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride)
+HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[])
{
const int16_t *y = NULL;
const int16_t *u = NULL;
@@ -667,27 +644,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
BYTE *pTmpBuffer = NULL;
if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) {
- uint8_t *dst[4] = {NULL};
- int dstStride[4] = {0};
+ uint8_t *tmp[4] = {NULL};
+ int tmpStride[4] = {0};
int scaleStride = FFALIGN(width, 32);
pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6);
- dst[0] = pTmpBuffer;
- dst[1] = dst[0] + (height * scaleStride * 2);
- dst[2] = dst[1] + (height * scaleStride * 2);
- dst[3] = NULL;
- dstStride[0] = scaleStride * 2;
- dstStride[1] = scaleStride * 2;
- dstStride[2] = scaleStride * 2;
- dstStride[3] = 0;
+ tmp[0] = pTmpBuffer;
+ tmp[1] = tmp[0] + (height * scaleStride * 2);
+ tmp[2] = tmp[1] + (height * scaleStride * 2);
+ tmp[3] = NULL;
+ tmpStride[0] = scaleStride * 2;
+ tmpStride[1] = scaleStride * 2;
+ tmpStride[2] = scaleStride * 2;
+ tmpStride[3] = 0;
SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT);
- sws_scale(ctx, src, srcStride, 0, height, dst, dstStride);
+ sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride);
- y = (int16_t *)dst[0];
- u = (int16_t *)dst[1];
- v = (int16_t *)dst[2];
+ y = (int16_t *)tmp[0];
+ u = (int16_t *)tmp[1];
+ v = (int16_t *)tmp[2];
sourceStride = scaleStride;
} else {
y = (int16_t *)src[0];
@@ -698,13 +675,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
b9Bit = (m_InBpp == 9);
}
- // 32-bit per pixel
- dstStride *= 4;
-
#define YUV444_v410_PACK \
*idst++ = ((uv & 0x3FF) << 2) | ((yv & 0x3FF) << 12) | ((vv & 0x3FF) << 22);
- BYTE *out = pOut;
+ BYTE *out = dst[0];
YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out)
if (b9Bit) {
yv <<= 1;
@@ -712,7 +686,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in
vv <<= 1;
}
YUV444_v410_PACK
- YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride)
+ YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0])
av_freep(&pTmpBuffer);
diff --git a/decoder/LAVVideo/pixconv/interleave.cpp b/decoder/LAVVideo/pixconv/interleave.cpp
index b9c132fa..699d157e 100644
--- a/decoder/LAVVideo/pixconv/interleave.cpp
+++ b/decoder/LAVVideo/pixconv/interleave.cpp
@@ -30,8 +30,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410)
const uint16_t *u = (const uint16_t *)src[1];
const uint16_t *v = (const uint16_t *)src[2];
- ptrdiff_t inStride = srcStride[0] >> 1;
- ptrdiff_t outStride = dstStride << 2;
+ const ptrdiff_t inStride = srcStride[0] >> 1;
+ const ptrdiff_t outStride = dstStride[0];
int shift = 10 - bpp;
ptrdiff_t line, i;
@@ -44,7 +44,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410)
_mm_sfence();
for (line = 0; line < height; ++line) {
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
for (i = 0; i < width; i+=8) {
PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (y+i));
diff --git a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
index 47a75a91..b7a0c16d 100644
--- a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
@@ -28,7 +28,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3)
{
const uint16_t *rgb = (const uint16_t *)src[0];
const ptrdiff_t inStride = srcStride[0] >> 1;
- const ptrdiff_t outStride = dstStride * 4;
+ const ptrdiff_t outStride = dstStride[0];
ptrdiff_t line, i;
int processWidth = width * 3;
@@ -43,7 +43,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3)
_mm_sfence();
for (line = 0; line < height; line++) {
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
// Load dithering coefficients for this line
if (ditherMode == LAVDither_Random) {
@@ -96,7 +96,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
// Dither to RGB24/32 with SSE2
const uint16_t *rgb = (const uint16_t *)dstBS[0];
const ptrdiff_t inStride = srcStride[0] >> 1;
- const ptrdiff_t outStride = dstStride * (out32 ? 4 : 3);
+ const ptrdiff_t outStride = dstStride[0];
ptrdiff_t line, i;
int processWidth = width * 3;
@@ -117,7 +117,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
if (out32) {
dst128 = (__m128i *)rgb24buffer;
} else {
- dst128 = (__m128i *)(dst + line * outStride);
+ dst128 = (__m128i *)(dst[0] + line * outStride);
}
// Load dithering coefficients for this line
@@ -143,7 +143,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb)
rgb += inStride;
if (out32) {
uint32_t *src24 = (uint32_t *)rgb24buffer;
- uint32_t *dst32 = (uint32_t *)(dst + line * outStride);
+ uint32_t *dst32 = (uint32_t *)(dst[0] + line * outStride);
for (i = 0; i < width; i += 4) {
uint32_t sa = src24[0];
uint32_t sb = src24[1];
diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
index 5e68dc2f..4a5003ce 100644
--- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp
@@ -407,8 +407,6 @@ static int __stdcall yuv2rgb_process_lines(const uint8_t *srcY, const uint8_t *s
const uint8_t *v = srcV;
uint8_t *rgb = dst;
- dstStride *= (3 + out32);
-
ptrdiff_t line = sliceYStart;
ptrdiff_t lastLine = sliceYEnd;
@@ -577,15 +575,15 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb)
const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 3, 4, 0) : NULL;
if (ditherMode == LAVDither_Random && dithers != NULL) {
if (m_ColorProps.VideoTransferMatrix == 7) {
- yuv2rgb_dispatch<out32, 1, 1>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
+ yuv2rgb_dispatch<out32, 1, 1>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
} else {
- yuv2rgb_dispatch<out32, 1, 0>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
+ yuv2rgb_dispatch<out32, 1, 0>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers);
}
} else {
if (m_ColorProps.VideoTransferMatrix == 7) {
- yuv2rgb_dispatch<out32, 0, 1>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
+ yuv2rgb_dispatch<out32, 0, 1>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
} else {
- yuv2rgb_dispatch<out32, 0, 0>(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
+ yuv2rgb_dispatch<out32, 0, 0>(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL);
}
}
diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
index 6591d75d..ba116abd 100644
--- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
@@ -31,13 +31,14 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
const uint16_t *u = (const uint16_t *)src[1];
const uint16_t *v = (const uint16_t *)src[2];
- const ptrdiff_t inYStride = srcStride[0] >> 1;
- const ptrdiff_t inUVStride = srcStride[1] >> 1;
+ const ptrdiff_t inYStride = srcStride[0] >> 1;
+ const ptrdiff_t inUVStride = srcStride[1] >> 1;
- ptrdiff_t outLumaStride = dstStride;
- ptrdiff_t outChromaStride = dstStride;
- ptrdiff_t chromaWidth = width;
- ptrdiff_t chromaHeight = height;
+ const ptrdiff_t outYStride = dstStride[0];
+ const ptrdiff_t outUVStride = dstStride[1];
+
+ ptrdiff_t chromaWidth = width;
+ ptrdiff_t chromaHeight = height;
LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
const uint16_t *dithers = GetRandomDitherCoeffs(height, 4, 8, 0);
@@ -46,19 +47,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
if (inputFormat == LAVPixFmt_YUV420bX)
chromaHeight = chromaHeight >> 1;
- if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX) {
+ if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX)
chromaWidth = (chromaWidth + 1) >> 1;
- outChromaStride = outChromaStride >> 1;
- }
ptrdiff_t line, i;
__m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
- uint8_t *dstY = dst;
- uint8_t *dstV = dstY + outLumaStride * height;
- uint8_t *dstU = dstV + outChromaStride * chromaHeight;
-
_mm_sfence();
// Process Y
@@ -74,7 +69,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
xmm4 = xmm5 = xmm6 = xmm7;
}
- __m128i *dst128Y = (__m128i *)(dstY + line * outLumaStride);
+ __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride);
for (i = 0; i < width; i+=32) {
// Load pixels into registers, and apply dithering
@@ -92,9 +87,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
// Process U/V for chromaHeight lines
if (line < chromaHeight) {
- __m128i *dst128UV = (__m128i *)(dstV + line * outLumaStride);
- __m128i *dst128U = (__m128i *)(dstU + line * outChromaStride);
- __m128i *dst128V = (__m128i *)(dstV + line * outChromaStride);
+ __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride);
+ __m128i *dst128U = (__m128i *)(dst[2] + line * outUVStride);
+ __m128i *dst128V = (__m128i *)(dst[1] + line * outUVStride);
for (i = 0; i < chromaWidth; i+=16) {
PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i+0), bpp); /* U0U0U0U0 */
@@ -137,11 +132,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
const uint16_t *u = (const uint16_t *)src[1];
const uint16_t *v = (const uint16_t *)src[2];
- const ptrdiff_t inYStride = srcStride[0] >> 1;
- const ptrdiff_t inUVStride = srcStride[1] >> 1;
- const ptrdiff_t outStride = dstStride << 1;
- const ptrdiff_t uvHeight = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height;
- const ptrdiff_t uvWidth = (width + 1) >> 1;
+ const ptrdiff_t inYStride = srcStride[0] >> 1;
+ const ptrdiff_t inUVStride = srcStride[1] >> 1;
+ const ptrdiff_t outYStride = dstStride[0];
+ const ptrdiff_t outUVStride = dstStride[1];
+ const ptrdiff_t uvHeight = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height;
+ const ptrdiff_t uvWidth = (width + 1) >> 1;
ptrdiff_t line, i;
__m128i xmm0,xmm1,xmm2;
@@ -150,7 +146,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
// Process Y
for (line = 0; line < height; ++line) {
- __m128i *dst128Y = (__m128i *)(dst + line * outStride);
+ __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride);
for (i = 0; i < width; i+=16) {
// Load 8 pixels into register
@@ -164,11 +160,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
y += inYStride;
}
- BYTE *dstUV = dst + (height * outStride);
-
// Process UV
for (line = 0; line < uvHeight; ++line) {
- __m128i *dst128UV = (__m128i *)(dstUV + line * outStride);
+ __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride);
for (i = 0; i < uvWidth; i+=8) {
// Load 8 pixels into register
@@ -198,23 +192,18 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
const ptrdiff_t inLumaStride = srcStride[0];
const ptrdiff_t inChromaStride = srcStride[1];
- const ptrdiff_t outLumaStride = dstStride;
- ptrdiff_t outChromaStride = dstStride;
+
+ const ptrdiff_t outLumaStride = dstStride[0];
+ const ptrdiff_t outChromaStride = dstStride[1];
ptrdiff_t line;
- ptrdiff_t chromaWidth = width;
- ptrdiff_t chromaHeight = height;
+ ptrdiff_t chromaWidth = width;
+ ptrdiff_t chromaHeight = height;
if (inputFormat == LAVPixFmt_YUV420)
chromaHeight = chromaHeight >> 1;
- if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422) {
+ if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422)
chromaWidth = (chromaWidth + 1) >> 1;
- outChromaStride = outChromaStride >> 1;
- }
-
- uint8_t *dstY = dst;
- uint8_t *dstV = dstY + height * outLumaStride;
- uint8_t *dstU = dstV + chromaHeight * outChromaStride;
// Copy planes
@@ -223,12 +212,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
// Y
if ((outLumaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) {
for(line = 0; line < height; ++line) {
- PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width);
+ PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
y += inLumaStride;
}
} else {
for(line = 0; line < height; ++line) {
- memcpy(dstY + outLumaStride * line, y, width);
+ memcpy(dst[0] + outLumaStride * line, y, width);
y += inLumaStride;
}
}
@@ -237,16 +226,16 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
if ((outChromaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) {
for(line = 0; line < chromaHeight; ++line) {
PIXCONV_MEMCPY_ALIGNED_TWO(
- dstU + outChromaStride * line, u,
- dstV + outChromaStride * line, v,
+ dst[2] + outChromaStride * line, u,
+ dst[1] + outChromaStride * line, v,
chromaWidth);
u += inChromaStride;
v += inChromaStride;
}
} else {
for(line = 0; line < chromaHeight; ++line) {
- memcpy(dstU + outChromaStride * line, u, chromaWidth);
- memcpy(dstV + outChromaStride * line, v, chromaWidth);
+ memcpy(dst[2] + outChromaStride * line, u, chromaWidth);
+ memcpy(dst[1] + outChromaStride * line, v, chromaWidth);
u += inChromaStride;
v += inChromaStride;
}
@@ -263,14 +252,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12)
const ptrdiff_t inLumaStride = srcStride[0];
const ptrdiff_t inChromaStride = srcStride[1];
- const ptrdiff_t outStride = dstStride;
+
+ const ptrdiff_t outLumaStride = dstStride[0];
+ const ptrdiff_t outChromaStride = dstStride[1];
const ptrdiff_t chromaWidth = (width + 1) >> 1;
const ptrdiff_t chromaHeight = height >> 1;
- uint8_t *dstY = dst;
- uint8_t *dstUV = dstY + height * outStride;
-
ptrdiff_t line,i;
__m128i xmm0,xmm1,xmm2,xmm3;
@@ -278,13 +266,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12)
// Y
for(line = 0; line < height; ++line) {
- PIXCONV_MEMCPY_ALIGNED32(dstY + outStride * line, y, width);
+ PIXCONV_MEMCPY_ALIGNED32(dst[0] + outLumaStride * line, y, width);
y += inLumaStride;
}
// U/V
for(line = 0; line < chromaHeight; ++line) {
- __m128i *dst128UV = (__m128i *)(dstUV + line * outStride);
+ __m128i *dst128UV = (__m128i *)(dst[1] + line * outChromaStride);
for (i = 0; i < chromaWidth; i+=16) {
PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (v+i)); /* VVVV */
@@ -313,7 +301,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy)
const ptrdiff_t inLumaStride = srcStride[0];
const ptrdiff_t inChromaStride = srcStride[1];
- const ptrdiff_t outStride = dstStride << 1;
+
+ const ptrdiff_t outStride = dstStride[0];
const ptrdiff_t chromaWidth = (width + 1) >> 1;
@@ -323,7 +312,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy)
_mm_sfence();
for (line = 0; line < height; ++line) {
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
for (i = 0; i < chromaWidth; i+=16) {
// Load pixels
@@ -386,7 +375,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
const ptrdiff_t inLumaStride = srcStride[0] >> 1;
const ptrdiff_t inChromaStride = srcStride[1] >> 1;
- const ptrdiff_t outStride = dstStride << 1;
+ const ptrdiff_t outStride = dstStride[0];
const ptrdiff_t chromaWidth = (width + 1) >> 1;
LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
@@ -400,7 +389,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
_mm_sfence();
for (line = 0; line < height; ++line) {
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
// Load dithering coefficients for this line
if (ditherMode == LAVDither_Random) {
@@ -459,15 +448,11 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
const uint8_t *y = src[0];
const uint8_t *uv = src[1];
- const ptrdiff_t inStride = srcStride[0];
- const ptrdiff_t outLumaStride = dstStride;
- const ptrdiff_t outChromaStride = dstStride >> 1;
-
- const ptrdiff_t chromaHeight = height >> 1;
-
- uint8_t *dstY = dst;
- uint8_t *dstV = dstY + height * outLumaStride;
- uint8_t *dstU = dstV + chromaHeight * outChromaStride;
+ const ptrdiff_t inLumaStride = srcStride[0];
+ const ptrdiff_t inChromaStride = srcStride[1];
+ const ptrdiff_t outLumaStride = dstStride[0];
+ const ptrdiff_t outChromaStride = dstStride[1];
+ const ptrdiff_t chromaHeight = height >> 1;
ptrdiff_t line, i;
__m128i xmm0,xmm1,xmm2,xmm3,xmm7;
@@ -478,13 +463,13 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
// Copy the y
for (line = 0; line < height; line++) {
- PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width);
- y += inStride;
+ PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
+ y += inLumaStride;
}
for (line = 0; line < chromaHeight; line++) {
- __m128i *dstV128 = (__m128i *)(dstV + outChromaStride * line);
- __m128i *dstU128 = (__m128i *)(dstU + outChromaStride * line);
+ __m128i *dstV128 = (__m128i *)(dst[1] + outChromaStride * line);
+ __m128i *dstU128 = (__m128i *)(dst[2] + outChromaStride * line);
for (i = 0; i < width; i+=32) {
PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, uv+i+0);
@@ -505,7 +490,7 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12)
_mm_stream_si128(dstU128++, xmm0);
_mm_stream_si128(dstV128++, xmm2);
}
- uv += inStride;
+ uv += inChromaStride;
}
return S_OK;
@@ -516,39 +501,38 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_nv12)
const uint8_t *y = src[0];
const uint8_t *uv = src[1];
- const ptrdiff_t inStride = srcStride[0];
- const ptrdiff_t outStride = dstStride;
- const ptrdiff_t chromaHeight = (height >> 1);
-
- uint8_t *dstY = dst;
- uint8_t *dstUV = dstY + height * outStride;
+ const ptrdiff_t inLumaStride = srcStride[0];
+ const ptrdiff_t inChromaStride = srcStride[1];
+ const ptrdiff_t outLumaStride = dstStride[0];
+ const ptrdiff_t outChromaStride = dstStride[1];
+ const ptrdiff_t chromaHeight = height >> 1;
ptrdiff_t line;
_mm_sfence();
// Use SSE2 copy when the stride is aligned
- if ((outStride % 16) == 0) {
+ if ((dstStride[0] % 16) == 0) {
// Copy the data
for (line = 0; line < height; line++) {
- PIXCONV_MEMCPY_ALIGNED(dstY + outStride * line, y, width);
- y += inStride;
+ PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width);
+ y += inLumaStride;
}
for (line = 0; line < chromaHeight; line++) {
- PIXCONV_MEMCPY_ALIGNED(dstUV + outStride * line, uv, width);
- uv += inStride;
+ PIXCONV_MEMCPY_ALIGNED(dst[1] + outChromaStride * line, uv, width);
+ uv += inChromaStride;
}
} else {
// Copy the data
for (line = 0; line < height; line++) {
- memcpy(dstY + outStride * line, y, width);
- y += inStride;
+ memcpy(dst[0] + outLumaStride * line, y, width);
+ y += inLumaStride;
}
for (line = 0; line < chromaHeight; line++) {
- memcpy(dstUV + outStride * line, uv, width);
- uv += inStride;
+ memcpy(dst[1] + outChromaStride * line, uv, width);
+ uv += inChromaStride;
}
}
diff --git a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
index c5859623..869992a3 100644
--- a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
+++ b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp
@@ -168,8 +168,6 @@ static int __stdcall yuv420yuy2_process_lines(const uint8_t *srcY, const uint8_t
const uint8_t *v = srcV;
uint8_t *yuy2 = dst;
- dstStride *= 2;
-
// Processing starts at line 1, and ends at height - 1. The first and last line have special handling
ptrdiff_t line = 1;
const ptrdiff_t lastLine = height - 1;
@@ -253,9 +251,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_yuy2)
LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 2, bpp - 8 + 2, 0) : NULL;
if (ditherMode == LAVDither_Random && dithers != NULL) {
- yuv420yuy2_dispatch<uyvy, 1>(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, dithers);
+ yuv420yuy2_dispatch<uyvy, 1>(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], dithers);
} else {
- yuv420yuy2_dispatch<uyvy, 0>(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, NULL);
+ yuv420yuy2_dispatch<uyvy, 0>(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], NULL);
}
return S_OK;
diff --git a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
index a84e2940..40d7ea16 100644
--- a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
+++ b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
@@ -39,7 +39,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv)
const uint8_t *v = (const uint8_t *)src[2];
const ptrdiff_t inStride = srcStride[0];
- const ptrdiff_t outStride = dstStride << 2;
+ const ptrdiff_t outStride = dstStride[0];
ptrdiff_t line, i;
@@ -50,7 +50,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv)
_mm_sfence();
for (line = 0; line < height; ++line) {
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
for (i = 0; i < width; i+=16) {
// Load pixels into registers
@@ -95,7 +95,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
const uint16_t *v = (const uint16_t *)src[2];
const ptrdiff_t inStride = srcStride[0] >> 1;
- const ptrdiff_t outStride = dstStride << 2;
+ const ptrdiff_t outStride = dstStride[0];
LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
const uint16_t *dithers = GetRandomDitherCoeffs(height, 3, 8, 0);
@@ -121,7 +121,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
xmm4 = xmm5 = xmm6;
}
- __m128i *dst128 = (__m128i *)(dst + line * outStride);
+ __m128i *dst128 = (__m128i *)(dst[0] + line * outStride);
for (i = 0; i < width; i+=8) {
// Load pixels into registers, and apply dithering