From e5580bd5127a40380e966e9da3607b7f7c98c0a9 Mon Sep 17 00:00:00 2001 From: Hendrik Leppkes Date: Fri, 15 Nov 2013 09:59:16 +0100 Subject: Move plane/stride calculation out of the individual pixfmt converters --- decoder/LAVVideo/LAVPixFmtConverter.cpp | 46 +++- decoder/LAVVideo/LAVPixFmtConverter.h | 40 +--- decoder/LAVVideo/pixconv/convert_generic.cpp | 288 ++++++++++++-------------- decoder/LAVVideo/pixconv/interleave.cpp | 6 +- decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp | 10 +- decoder/LAVVideo/pixconv/yuv2rgb.cpp | 10 +- decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp | 148 ++++++------- decoder/LAVVideo/pixconv/yuv420_yuy2.cpp | 6 +- decoder/LAVVideo/pixconv/yuv444_ayuv.cpp | 8 +- 9 files changed, 265 insertions(+), 297 deletions(-) (limited to 'decoder/LAVVideo') diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp index 5c690f98..69ed713d 100644 --- a/decoder/LAVVideo/LAVPixFmtConverter.cpp +++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp @@ -407,6 +407,41 @@ void CLAVPixFmtConverter::SelectConvertFunction() } } +HRESULT CLAVPixFmtConverter::Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) { + uint8_t *out = dst; + int outStride = dstStride, i; + // Check if we have proper pixel alignment and the dst memory is actually aligned + if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) { + outStride = FFALIGN(dstStride, m_RequiredAlignment); + size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3; + if (requiredSize > m_nAlignedBufferSize) { + DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride)); + av_freep(&m_pAlignedBuffer); + m_nAlignedBufferSize = requiredSize; + m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE); + } + out = m_pAlignedBuffer; + } + + uint8_t *dstArray[4] = {0}; + int dstStrideArray[4] = {0}; + int byteStride = outStride * lav_pixfmt_desc[m_OutputPixFmt].codedbytes; + + dstArray[0] = out; + dstStrideArray[0] = byteStride; + + for (i = 1; i < lav_pixfmt_desc[m_OutputPixFmt].planes; ++i) { + dstArray[i] = dstArray[i-1] + dstStrideArray[i-1] * (height / lav_pixfmt_desc[m_OutputPixFmt].planeHeight[i-1]); + dstStrideArray[i] = byteStride / lav_pixfmt_desc[m_OutputPixFmt].planeWidth[i]; + } + + HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, dstArray, dstStrideArray, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt); + if (out != dst) { + ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt); + } + return hr; +} + DECLARE_CONV_FUNC_IMPL(plane_copy) { LAVOutPixFmtDesc desc = lav_pixfmt_desc[outputFormat]; @@ -414,18 +449,17 @@ DECLARE_CONV_FUNC_IMPL(plane_copy) int plane, line; const int widthBytes = width * desc.codedbytes; - const int dstStrideBytes = dstStride * desc.codedbytes; const int planes = max(desc.planes, 1); for (plane = 0; plane < planes; plane++) { - const int planeWidth = widthBytes / desc.planeWidth[plane]; - const int planeHeight = height / desc.planeHeight[plane]; - const int dstPlaneStride = dstStrideBytes / desc.planeWidth[plane]; + const int planeWidth = widthBytes / desc.planeWidth[plane]; + const int planeHeight = height / desc.planeHeight[plane]; const uint8_t *srcBuf = src[plane]; + uint8_t *dstBuf = dst[plane]; for (line = 0; line < planeHeight; ++line) { - memcpy(dst, srcBuf, planeWidth); + memcpy(dstBuf, srcBuf, planeWidth); srcBuf += srcStride[plane]; - dst += dstPlaneStride; + dstBuf += dstStride[plane]; } } diff --git a/decoder/LAVVideo/LAVPixFmtConverter.h b/decoder/LAVVideo/LAVPixFmtConverter.h index 5a3d88b2..b0faff7d 100644 --- a/decoder/LAVVideo/LAVPixFmtConverter.h +++ b/decoder/LAVVideo/LAVPixFmtConverter.h @@ -22,7 +22,7 @@ #include "LAVVideoSettings.h" #include "decoders/ILAVDecoder.h" -#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int dstStride, int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat) +#define CONV_FUNC_PARAMS (const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[4], int dstStride[4], int width, int height, LAVPixelFormat inputFormat, int bpp, LAVOutPixFmts outputFormat) #define DECLARE_CONV_FUNC(name) \ HRESULT name CONV_FUNC_PARAMS @@ -73,27 +73,7 @@ public: void GetMediaType(CMediaType *mt, int index, LONG biWidth, LONG biHeight, DWORD dwAspectX, DWORD dwAspectY, REFERENCE_TIME rtAvgTime, BOOL bInterlaced = TRUE, BOOL bVIH1 = FALSE); BOOL IsAllowedSubtype(const GUID *guid); - inline HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride) { - uint8_t *out = dst; - int outStride = dstStride; - // Check if we have proper pixel alignment and the dst memory is actually aligned - if (m_RequiredAlignment && (FFALIGN(dstStride, m_RequiredAlignment) != dstStride || ((uintptr_t)dst % 16u))) { - outStride = FFALIGN(dstStride, m_RequiredAlignment); - size_t requiredSize = (outStride * height * lav_pixfmt_desc[m_OutputPixFmt].bpp) << 3; - if (requiredSize > m_nAlignedBufferSize) { - DbgLog((LOG_TRACE, 10, L"::Convert(): Conversion requires a bigger stride (need: %d, have: %d), allocating buffer...", outStride, dstStride)); - av_freep(&m_pAlignedBuffer); - m_nAlignedBufferSize = requiredSize; - m_pAlignedBuffer = (uint8_t *)av_malloc(m_nAlignedBufferSize+FF_INPUT_BUFFER_PADDING_SIZE); - } - out = m_pAlignedBuffer; - } - HRESULT hr = (this->*convert)(pFrame->data, pFrame->stride, out, outStride, width, height, m_InputPixFmt, m_InBpp, m_OutputPixFmt); - if (out != dst) { - ChangeStride(out, outStride, dst, dstStride, width, height, m_OutputPixFmt); - } - return hr; - } + HRESULT Convert(LAVFrame *pFrame, uint8_t *dst, int width, int height, int dstStride); BOOL IsRGBConverterActive() { return m_bRGBConverter; } @@ -108,14 +88,14 @@ private: void SelectConvertFunction(); // Helper functions for convert_generic - HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false); - HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); - HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); - HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride, int chromaVertical); - HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); - HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); - HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); - HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst, int width, int height, int dstStride); + HRESULT swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12 = false); + HRESULT ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); + HRESULT ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); + HRESULT ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4], int chromaVertical); + HRESULT ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); + HRESULT ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); + HRESULT ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); + HRESULT ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t *dst[4], int width, int height, int dstStride[4]); void DestroySWScale() { if (m_pSwsContext) sws_freeContext(m_pSwsContext); m_pSwsContext = NULL; if (m_rgbCoeffs) _aligned_free(m_rgbCoeffs); m_rgbCoeffs = NULL; if (m_pRandomDithers) _aligned_free(m_pRandomDithers); m_pRandomDithers = NULL; }; SwsContext *GetSWSContext(int width, int height, enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, int flags); diff --git a/decoder/LAVVideo/pixconv/convert_generic.cpp b/decoder/LAVVideo/pixconv/convert_generic.cpp index 96c38051..4046ac81 100644 --- a/decoder/LAVVideo/pixconv/convert_generic.cpp +++ b/decoder/LAVVideo/pixconv/convert_generic.cpp @@ -67,10 +67,10 @@ DECLARE_CONV_FUNC_IMPL(convert_generic) hr = ConvertToY416(src, srcStride, dst, width, height, dstStride); break; case LAVOutPixFmt_RGB32: - hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride * 4, lav_pixfmt_desc[m_OutputPixFmt]); + hr = swscale_scale(inputFmt, AV_PIX_FMT_BGRA, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]); break; case LAVOutPixFmt_RGB24: - hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride * 3, lav_pixfmt_desc[m_OutputPixFmt]); + hr = swscale_scale(inputFmt, AV_PIX_FMT_BGR24, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt]); break; case LAVOutPixFmt_v210: hr = ConvertTov210(src, srcStride, dst, width, height, dstStride); @@ -85,7 +85,7 @@ DECLARE_CONV_FUNC_IMPL(convert_generic) hr = swscale_scale(inputFmt, AV_PIX_FMT_YUV444P, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true); break; case LAVOutPixFmt_RGB48: - hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride * 6, lav_pixfmt_desc[m_OutputPixFmt], true); + hr = swscale_scale(inputFmt, AV_PIX_FMT_RGB48LE, src, srcStride, dst, width, height, dstStride, lav_pixfmt_desc[m_OutputPixFmt], true); break; default: ASSERT(0); @@ -137,27 +137,15 @@ inline SwsContext *CLAVPixFmtConverter::GetSWSContext(int width, int height, enu return m_pSwsContext; } -HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], BYTE *pOut, int width, int height, int stride, LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12) +HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPixelFormat dstPix, const uint8_t* const src[], const int srcStride[], uint8_t* dst[], int width, int height, int dstStride[], LAVOutPixFmtDesc pixFmtDesc, bool swapPlanes12) { - uint8_t *dst[4]; - int dstStride[4]; - int i, ret; + int ret; SwsContext *ctx = GetSWSContext(width, height, srcPix, dstPix, SWS_BILINEAR); CheckPointer(m_pSwsContext, E_POINTER); - memset(dst, 0, sizeof(dst)); - memset(dstStride, 0, sizeof(dstStride)); - - dst[0] = pOut; - dstStride[0] = stride; - for (i = 1; i < pixFmtDesc.planes; ++i) { - dst[i] = dst[i-1] + (stride / pixFmtDesc.planeWidth[i-1]) * (height / pixFmtDesc.planeHeight[i-1]); - dstStride[i] = stride / pixFmtDesc.planeWidth[i]; - } - if (swapPlanes12) { - BYTE *tmp = dst[1]; + uint8_t *tmp = dst[1]; dst[1] = dst[2]; dst[2] = tmp; } @@ -166,7 +154,7 @@ HRESULT CLAVPixFmtConverter::swscale_scale(enum AVPixelFormat srcPix, enum AVPix return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const BYTE *y = NULL; const BYTE *u = NULL; @@ -176,28 +164,28 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV422) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride); - dst[2] = dst[1] + (height * scaleStride / 2); - dst[3] = NULL; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride); + tmp[2] = tmp[1] + (height * scaleStride / 2); + tmp[3] = NULL; - dstStride[0] = scaleStride; - dstStride[1] = scaleStride / 2; - dstStride[2] = scaleStride / 2; - dstStride[3] = 0; + tmpStride[0] = scaleStride; + tmpStride[1] = scaleStride / 2; + tmpStride[2] = scaleStride / 2; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P, SWS_FAST_BILINEAR); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = dst[0]; - u = dst[1]; - v = dst[2]; + y = tmp[0]; + u = tmp[1]; + v = tmp[2]; sourceStride = scaleStride; } else { y = src[0]; @@ -206,12 +194,10 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con sourceStride = srcStride[0]; } - dstStride <<= 1; - #define YUV422_PACK_YUY2(offset) *idst++ = y[(i+offset) * 2] | (u[i+offset] << 8) | (y[(i+offset) * 2 + 1] << 16) | (v[i+offset] << 24); #define YUV422_PACK_UYVY(offset) *idst++ = u[i+offset] | (y[(i+offset) * 2] << 8) | (v[i+offset] << 16) | (y[(i+offset) * 2 + 1] << 24); - BYTE *out = pOut; + uint8_t *out = dst[0]; int halfwidth = width >> 1; int halfstride = sourceStride >> 1; @@ -234,7 +220,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con y += sourceStride; u += halfstride; v += halfstride; - out += dstStride; + out += dstStride[0]; } } else { for (line = 0; line < height; ++line) { @@ -255,7 +241,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con y += sourceStride; u += halfstride; v += halfstride; - out += dstStride; + out += dstStride[0]; } } @@ -264,7 +250,7 @@ HRESULT CLAVPixFmtConverter::ConvertTo422Packed(const uint8_t* const src[4], con return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const BYTE *y = NULL; const BYTE *u = NULL; @@ -274,27 +260,27 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV444) { - uint8_t *dst[4] = {NULL}; - int swStride[4] = {0}; - int scaleStride = FFALIGN(dstStride, 32); + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; + int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 3); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride); - dst[2] = dst[1] + (height * scaleStride); - dst[3] = NULL; - swStride[0] = scaleStride; - swStride[1] = scaleStride; - swStride[2] = scaleStride; - swStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride); + tmp[2] = tmp[1] + (height * scaleStride); + tmp[3] = NULL; + tmpStride[0] = scaleStride; + tmpStride[1] = scaleStride; + tmpStride[2] = scaleStride; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, swStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = dst[0]; - u = dst[1]; - v = dst[2]; + y = tmp[0]; + u = tmp[1]; + v = tmp[2]; sourceStride = scaleStride; } else { y = src[0]; @@ -305,7 +291,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in #define YUV444_PACK_AYUV(offset) *idst++ = v[i+offset] | (u[i+offset] << 8) | (y[i+offset] << 16) | (0xff << 24); - BYTE *out = pOut; + BYTE *out = dst[0]; for (line = 0; line < height; ++line) { int32_t *idst = (int32_t *)out; for (i = 0; i < (width-7); i+=8) { @@ -324,7 +310,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in y += sourceStride; u += sourceStride; v += sourceStride; - out += dstStride << 2; + out += dstStride[0]; } av_freep(&pTmpBuffer); @@ -332,7 +318,7 @@ HRESULT CLAVPixFmtConverter::ConvertToAYUV(const uint8_t* const src[4], const in return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride, int chromaVertical) +HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[], int chromaVertical) { const BYTE *y = NULL; const BYTE *u = NULL; @@ -342,33 +328,30 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in int shift = 0; - // Stride needs to be doubled for 16-bit per pixel - dstStride <<= 1; - BYTE *pTmpBuffer = NULL; if ((chromaVertical == 1 && m_InputPixFmt != LAVPixFmt_YUV422bX) || (chromaVertical == 2 && m_InputPixFmt != LAVPixFmt_YUV420bX)) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32) * 2; pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 2); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride); - dst[2] = dst[1] + ((height / chromaVertical) * (scaleStride / 2)); - dst[3] = NULL; - dstStride[0] = scaleStride; - dstStride[1] = scaleStride / 2; - dstStride[2] = scaleStride / 2; - dstStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride); + tmp[2] = tmp[1] + ((height / chromaVertical) * (scaleStride / 2)); + tmp[3] = NULL; + tmpStride[0] = scaleStride; + tmpStride[1] = scaleStride / 2; + tmpStride[2] = scaleStride / 2; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), chromaVertical == 1 ? AV_PIX_FMT_YUV422P16LE : AV_PIX_FMT_YUV420P16LE, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = dst[0]; - u = dst[1]; - v = dst[2]; + y = tmp[0]; + u = tmp[1]; + v = tmp[2]; sourceStride = scaleStride; } else { y = src[0]; @@ -380,7 +363,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in } // copy Y - BYTE *pLineOut = pOut; + BYTE *pLineOut = dst[0]; const BYTE *pLineIn = y; for (line = 0; line < height; ++line) { if (shift == 0) { @@ -394,14 +377,14 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in *idst++ = yv; } } - pLineOut += dstStride; + pLineOut += dstStride[0]; pLineIn += sourceStride; } sourceStride >>= 2; // Merge U/V - BYTE *out = pLineOut; + BYTE *out = dst[1]; const int16_t *uc = (int16_t *)u; const int16_t *vc = (int16_t *)v; for (line = 0; line < height/chromaVertical; ++line) { @@ -417,7 +400,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in } uc += sourceStride; vc += sourceStride; - out += dstStride; + out += dstStride[1]; } av_freep(&pTmpBuffer); @@ -443,7 +426,7 @@ HRESULT CLAVPixFmtConverter::ConvertToPX1X(const uint8_t* const src[4], const in out += dstStride; \ } -HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const int16_t *y = NULL; const int16_t *u = NULL; @@ -454,27 +437,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride * 2); - dst[2] = dst[1] + (height * scaleStride * 2); - dst[3] = NULL; - dstStride[0] = scaleStride * 2; - dstStride[1] = scaleStride * 2; - dstStride[2] = scaleStride * 2; - dstStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride * 2); + tmp[2] = tmp[1] + (height * scaleStride * 2); + tmp[3] = NULL; + tmpStride[0] = scaleStride * 2; + tmpStride[1] = scaleStride * 2; + tmpStride[2] = scaleStride * 2; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = (int16_t *)dst[0]; - u = (int16_t *)dst[1]; - v = (int16_t *)dst[2]; + y = (int16_t *)tmp[0]; + u = (int16_t *)tmp[1]; + v = (int16_t *)tmp[2]; sourceStride = scaleStride; } else { y = (int16_t *)src[0]; @@ -485,13 +468,10 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in b9Bit = (m_InBpp == 9); } - // 32-bit per pixel - dstStride *= 4; - #define YUV444_Y410_PACK \ *idst++ = (uv & 0x3FF) | ((yv & 0x3FF) << 10) | ((vv & 0x3FF) << 20) | (3 << 30); - BYTE *out = pOut; + BYTE *out = dst[0]; YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out) if (b9Bit) { yv <<= 1; @@ -499,14 +479,14 @@ HRESULT CLAVPixFmtConverter::ConvertToY410(const uint8_t* const src[4], const in vv <<= 1; } YUV444_Y410_PACK - YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride) + YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0]) av_freep(&pTmpBuffer); return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const int16_t *y = NULL; const int16_t *u = NULL; @@ -516,27 +496,27 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp != 16) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride * 2); - dst[2] = dst[1] + (height * scaleStride * 2); - dst[3] = NULL; - dstStride[0] = scaleStride * 2; - dstStride[1] = scaleStride * 2; - dstStride[2] = scaleStride * 2; - dstStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride * 2); + tmp[2] = tmp[1] + (height * scaleStride * 2); + tmp[3] = NULL; + tmpStride[0] = scaleStride * 2; + tmpStride[1] = scaleStride * 2; + tmpStride[2] = scaleStride * 2; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P16LE, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = (int16_t *)dst[0]; - u = (int16_t *)dst[1]; - v = (int16_t *)dst[2]; + y = (int16_t *)tmp[0]; + u = (int16_t *)tmp[1]; + v = (int16_t *)tmp[2]; sourceStride = scaleStride; } else { y = (int16_t *)src[0]; @@ -545,24 +525,21 @@ HRESULT CLAVPixFmtConverter::ConvertToY416(const uint8_t* const src[4], const in sourceStride = srcStride[0] / 2; } - // 64-bit per pixel - dstStride <<= 3; - #define YUV444_Y416_PACK \ *idst++ = 0xFFFF | (vv << 16); \ *idst++ = yv | (uv << 16); - BYTE *out = pOut; + BYTE *out = dst[0]; YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out) YUV444_Y416_PACK - YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride) + YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0]) av_freep(&pTmpBuffer); return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const int16_t *y = NULL; const int16_t *u = NULL; @@ -573,27 +550,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV422bX || m_InBpp != 10) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride * 2); - dst[2] = dst[1] + (height * scaleStride * 2); - dst[3] = NULL; - dstStride[0] = scaleStride * 2; - dstStride[1] = scaleStride; - dstStride[2] = scaleStride; - dstStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride * 2); + tmp[2] = tmp[1] + (height * scaleStride * 2); + tmp[3] = NULL; + tmpStride[0] = scaleStride * 2; + tmpStride[1] = scaleStride; + tmpStride[2] = scaleStride; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV422P10LE, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = (int16_t *)dst[0]; - u = (int16_t *)dst[1]; - v = (int16_t *)dst[2]; + y = (int16_t *)tmp[0]; + u = (int16_t *)tmp[1]; + v = (int16_t *)tmp[2]; srcyStride = scaleStride; srcuvStride = scaleStride >> 1; } else { @@ -604,10 +581,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in srcuvStride = srcStride[1] >> 1; } - // 32-bit per pixel - dstStride = ((dstStride + 47) / 48) * 128; + // Calculate v210 stride + int outStride = (((dstStride[0] >> 2) + 47) / 48) * 128; - BYTE *pdst = pOut; + BYTE *pdst = dst[0]; int32_t *p = (int32_t *)pdst; int w; @@ -644,7 +621,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in } } - pdst += dstStride; + pdst += outStride; memset(p, 0, pdst - (BYTE *)p); p = (int32_t *)pdst; y += srcyStride - width; @@ -656,7 +633,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov210(const uint8_t* const src[4], const in return S_OK; } -HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], BYTE *pOut, int width, int height, int dstStride) +HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const int srcStride[4], uint8_t* dst[], int width, int height, int dstStride[]) { const int16_t *y = NULL; const int16_t *u = NULL; @@ -667,27 +644,27 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in BYTE *pTmpBuffer = NULL; if (m_InputPixFmt != LAVPixFmt_YUV444bX || m_InBpp > 10) { - uint8_t *dst[4] = {NULL}; - int dstStride[4] = {0}; + uint8_t *tmp[4] = {NULL}; + int tmpStride[4] = {0}; int scaleStride = FFALIGN(width, 32); pTmpBuffer = (BYTE *)av_malloc(height * scaleStride * 6); - dst[0] = pTmpBuffer; - dst[1] = dst[0] + (height * scaleStride * 2); - dst[2] = dst[1] + (height * scaleStride * 2); - dst[3] = NULL; - dstStride[0] = scaleStride * 2; - dstStride[1] = scaleStride * 2; - dstStride[2] = scaleStride * 2; - dstStride[3] = 0; + tmp[0] = pTmpBuffer; + tmp[1] = tmp[0] + (height * scaleStride * 2); + tmp[2] = tmp[1] + (height * scaleStride * 2); + tmp[3] = NULL; + tmpStride[0] = scaleStride * 2; + tmpStride[1] = scaleStride * 2; + tmpStride[2] = scaleStride * 2; + tmpStride[3] = 0; SwsContext *ctx = GetSWSContext(width, height, GetFFInput(), AV_PIX_FMT_YUV444P10LE, SWS_POINT); - sws_scale(ctx, src, srcStride, 0, height, dst, dstStride); + sws_scale(ctx, src, srcStride, 0, height, tmp, tmpStride); - y = (int16_t *)dst[0]; - u = (int16_t *)dst[1]; - v = (int16_t *)dst[2]; + y = (int16_t *)tmp[0]; + u = (int16_t *)tmp[1]; + v = (int16_t *)tmp[2]; sourceStride = scaleStride; } else { y = (int16_t *)src[0]; @@ -698,13 +675,10 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in b9Bit = (m_InBpp == 9); } - // 32-bit per pixel - dstStride *= 4; - #define YUV444_v410_PACK \ *idst++ = ((uv & 0x3FF) << 2) | ((yv & 0x3FF) << 12) | ((vv & 0x3FF) << 22); - BYTE *out = pOut; + BYTE *out = dst[0]; YUV444_PACKED_LOOP_HEAD_LE(width, height, y, u, v, out) if (b9Bit) { yv <<= 1; @@ -712,7 +686,7 @@ HRESULT CLAVPixFmtConverter::ConvertTov410(const uint8_t* const src[4], const in vv <<= 1; } YUV444_v410_PACK - YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride) + YUV444_PACKED_LOOP_END(y, u, v, out, sourceStride, dstStride[0]) av_freep(&pTmpBuffer); diff --git a/decoder/LAVVideo/pixconv/interleave.cpp b/decoder/LAVVideo/pixconv/interleave.cpp index b9c132fa..699d157e 100644 --- a/decoder/LAVVideo/pixconv/interleave.cpp +++ b/decoder/LAVVideo/pixconv/interleave.cpp @@ -30,8 +30,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410) const uint16_t *u = (const uint16_t *)src[1]; const uint16_t *v = (const uint16_t *)src[2]; - ptrdiff_t inStride = srcStride[0] >> 1; - ptrdiff_t outStride = dstStride << 2; + const ptrdiff_t inStride = srcStride[0] >> 1; + const ptrdiff_t outStride = dstStride[0]; int shift = 10 - bpp; ptrdiff_t line, i; @@ -44,7 +44,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410) _mm_sfence(); for (line = 0; line < height; ++line) { - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); for (i = 0; i < width; i+=8) { PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (y+i)); diff --git a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp index 47a75a91..b7a0c16d 100644 --- a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp +++ b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp @@ -28,7 +28,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3) { const uint16_t *rgb = (const uint16_t *)src[0]; const ptrdiff_t inStride = srcStride[0] >> 1; - const ptrdiff_t outStride = dstStride * 4; + const ptrdiff_t outStride = dstStride[0]; ptrdiff_t line, i; int processWidth = width * 3; @@ -43,7 +43,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3) _mm_sfence(); for (line = 0; line < height; line++) { - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); // Load dithering coefficients for this line if (ditherMode == LAVDither_Random) { @@ -96,7 +96,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb) // Dither to RGB24/32 with SSE2 const uint16_t *rgb = (const uint16_t *)dstBS[0]; const ptrdiff_t inStride = srcStride[0] >> 1; - const ptrdiff_t outStride = dstStride * (out32 ? 4 : 3); + const ptrdiff_t outStride = dstStride[0]; ptrdiff_t line, i; int processWidth = width * 3; @@ -117,7 +117,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb) if (out32) { dst128 = (__m128i *)rgb24buffer; } else { - dst128 = (__m128i *)(dst + line * outStride); + dst128 = (__m128i *)(dst[0] + line * outStride); } // Load dithering coefficients for this line @@ -143,7 +143,7 @@ DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb) rgb += inStride; if (out32) { uint32_t *src24 = (uint32_t *)rgb24buffer; - uint32_t *dst32 = (uint32_t *)(dst + line * outStride); + uint32_t *dst32 = (uint32_t *)(dst[0] + line * outStride); for (i = 0; i < width; i += 4) { uint32_t sa = src24[0]; uint32_t sb = src24[1]; diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp index 5e68dc2f..4a5003ce 100644 --- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp +++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp @@ -407,8 +407,6 @@ static int __stdcall yuv2rgb_process_lines(const uint8_t *srcY, const uint8_t *s const uint8_t *v = srcV; uint8_t *rgb = dst; - dstStride *= (3 + out32); - ptrdiff_t line = sliceYStart; ptrdiff_t lastLine = sliceYEnd; @@ -577,15 +575,15 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb) const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 3, 4, 0) : NULL; if (ditherMode == LAVDither_Random && dithers != NULL) { if (m_ColorProps.VideoTransferMatrix == 7) { - yuv2rgb_dispatch(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers); + yuv2rgb_dispatch(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers); } else { - yuv2rgb_dispatch(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers); + yuv2rgb_dispatch(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, dithers); } } else { if (m_ColorProps.VideoTransferMatrix == 7) { - yuv2rgb_dispatch(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL); + yuv2rgb_dispatch(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL); } else { - yuv2rgb_dispatch(src, srcStride, dst, dstStride, width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL); + yuv2rgb_dispatch(src, srcStride, dst[0], dstStride[0], width, height, inputFormat, bpp, m_NumThreads, coeffs, NULL); } } diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp index 6591d75d..ba116abd 100644 --- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp +++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp @@ -31,13 +31,14 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le) const uint16_t *u = (const uint16_t *)src[1]; const uint16_t *v = (const uint16_t *)src[2]; - const ptrdiff_t inYStride = srcStride[0] >> 1; - const ptrdiff_t inUVStride = srcStride[1] >> 1; + const ptrdiff_t inYStride = srcStride[0] >> 1; + const ptrdiff_t inUVStride = srcStride[1] >> 1; - ptrdiff_t outLumaStride = dstStride; - ptrdiff_t outChromaStride = dstStride; - ptrdiff_t chromaWidth = width; - ptrdiff_t chromaHeight = height; + const ptrdiff_t outYStride = dstStride[0]; + const ptrdiff_t outUVStride = dstStride[1]; + + ptrdiff_t chromaWidth = width; + ptrdiff_t chromaHeight = height; LAVDitherMode ditherMode = m_pSettings->GetDitherMode(); const uint16_t *dithers = GetRandomDitherCoeffs(height, 4, 8, 0); @@ -46,19 +47,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le) if (inputFormat == LAVPixFmt_YUV420bX) chromaHeight = chromaHeight >> 1; - if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX) { + if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX) chromaWidth = (chromaWidth + 1) >> 1; - outChromaStride = outChromaStride >> 1; - } ptrdiff_t line, i; __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7; - uint8_t *dstY = dst; - uint8_t *dstV = dstY + outLumaStride * height; - uint8_t *dstU = dstV + outChromaStride * chromaHeight; - _mm_sfence(); // Process Y @@ -74,7 +69,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le) xmm4 = xmm5 = xmm6 = xmm7; } - __m128i *dst128Y = (__m128i *)(dstY + line * outLumaStride); + __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride); for (i = 0; i < width; i+=32) { // Load pixels into registers, and apply dithering @@ -92,9 +87,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le) // Process U/V for chromaHeight lines if (line < chromaHeight) { - __m128i *dst128UV = (__m128i *)(dstV + line * outLumaStride); - __m128i *dst128U = (__m128i *)(dstU + line * outChromaStride); - __m128i *dst128V = (__m128i *)(dstV + line * outChromaStride); + __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride); + __m128i *dst128U = (__m128i *)(dst[2] + line * outUVStride); + __m128i *dst128V = (__m128i *)(dst[1] + line * outUVStride); for (i = 0; i < chromaWidth; i+=16) { PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i+0), bpp); /* U0U0U0U0 */ @@ -137,11 +132,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le) const uint16_t *u = (const uint16_t *)src[1]; const uint16_t *v = (const uint16_t *)src[2]; - const ptrdiff_t inYStride = srcStride[0] >> 1; - const ptrdiff_t inUVStride = srcStride[1] >> 1; - const ptrdiff_t outStride = dstStride << 1; - const ptrdiff_t uvHeight = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height; - const ptrdiff_t uvWidth = (width + 1) >> 1; + const ptrdiff_t inYStride = srcStride[0] >> 1; + const ptrdiff_t inUVStride = srcStride[1] >> 1; + const ptrdiff_t outYStride = dstStride[0]; + const ptrdiff_t outUVStride = dstStride[1]; + const ptrdiff_t uvHeight = (outputFormat == LAVOutPixFmt_P010 || outputFormat == LAVOutPixFmt_P016) ? (height >> 1) : height; + const ptrdiff_t uvWidth = (width + 1) >> 1; ptrdiff_t line, i; __m128i xmm0,xmm1,xmm2; @@ -150,7 +146,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le) // Process Y for (line = 0; line < height; ++line) { - __m128i *dst128Y = (__m128i *)(dst + line * outStride); + __m128i *dst128Y = (__m128i *)(dst[0] + line * outYStride); for (i = 0; i < width; i+=16) { // Load 8 pixels into register @@ -164,11 +160,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le) y += inYStride; } - BYTE *dstUV = dst + (height * outStride); - // Process UV for (line = 0; line < uvHeight; ++line) { - __m128i *dst128UV = (__m128i *)(dstUV + line * outStride); + __m128i *dst128UV = (__m128i *)(dst[1] + line * outUVStride); for (i = 0; i < uvWidth; i+=8) { // Load 8 pixels into register @@ -198,23 +192,18 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv) const ptrdiff_t inLumaStride = srcStride[0]; const ptrdiff_t inChromaStride = srcStride[1]; - const ptrdiff_t outLumaStride = dstStride; - ptrdiff_t outChromaStride = dstStride; + + const ptrdiff_t outLumaStride = dstStride[0]; + const ptrdiff_t outChromaStride = dstStride[1]; ptrdiff_t line; - ptrdiff_t chromaWidth = width; - ptrdiff_t chromaHeight = height; + ptrdiff_t chromaWidth = width; + ptrdiff_t chromaHeight = height; if (inputFormat == LAVPixFmt_YUV420) chromaHeight = chromaHeight >> 1; - if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422) { + if (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_YUV422) chromaWidth = (chromaWidth + 1) >> 1; - outChromaStride = outChromaStride >> 1; - } - - uint8_t *dstY = dst; - uint8_t *dstV = dstY + height * outLumaStride; - uint8_t *dstU = dstV + chromaHeight * outChromaStride; // Copy planes @@ -223,12 +212,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv) // Y if ((outLumaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) { for(line = 0; line < height; ++line) { - PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width); + PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width); y += inLumaStride; } } else { for(line = 0; line < height; ++line) { - memcpy(dstY + outLumaStride * line, y, width); + memcpy(dst[0] + outLumaStride * line, y, width); y += inLumaStride; } } @@ -237,16 +226,16 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv) if ((outChromaStride % 16) == 0 && ((intptr_t)dst % 16u) == 0) { for(line = 0; line < chromaHeight; ++line) { PIXCONV_MEMCPY_ALIGNED_TWO( - dstU + outChromaStride * line, u, - dstV + outChromaStride * line, v, + dst[2] + outChromaStride * line, u, + dst[1] + outChromaStride * line, v, chromaWidth); u += inChromaStride; v += inChromaStride; } } else { for(line = 0; line < chromaHeight; ++line) { - memcpy(dstU + outChromaStride * line, u, chromaWidth); - memcpy(dstV + outChromaStride * line, v, chromaWidth); + memcpy(dst[2] + outChromaStride * line, u, chromaWidth); + memcpy(dst[1] + outChromaStride * line, v, chromaWidth); u += inChromaStride; v += inChromaStride; } @@ -263,14 +252,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12) const ptrdiff_t inLumaStride = srcStride[0]; const ptrdiff_t inChromaStride = srcStride[1]; - const ptrdiff_t outStride = dstStride; + + const ptrdiff_t outLumaStride = dstStride[0]; + const ptrdiff_t outChromaStride = dstStride[1]; const ptrdiff_t chromaWidth = (width + 1) >> 1; const ptrdiff_t chromaHeight = height >> 1; - uint8_t *dstY = dst; - uint8_t *dstUV = dstY + height * outStride; - ptrdiff_t line,i; __m128i xmm0,xmm1,xmm2,xmm3; @@ -278,13 +266,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_nv12) // Y for(line = 0; line < height; ++line) { - PIXCONV_MEMCPY_ALIGNED32(dstY + outStride * line, y, width); + PIXCONV_MEMCPY_ALIGNED32(dst[0] + outLumaStride * line, y, width); y += inLumaStride; } // U/V for(line = 0; line < chromaHeight; ++line) { - __m128i *dst128UV = (__m128i *)(dstUV + line * outStride); + __m128i *dst128UV = (__m128i *)(dst[1] + line * outChromaStride); for (i = 0; i < chromaWidth; i+=16) { PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (v+i)); /* VVVV */ @@ -313,7 +301,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy) const ptrdiff_t inLumaStride = srcStride[0]; const ptrdiff_t inChromaStride = srcStride[1]; - const ptrdiff_t outStride = dstStride << 1; + + const ptrdiff_t outStride = dstStride[0]; const ptrdiff_t chromaWidth = (width + 1) >> 1; @@ -323,7 +312,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy) _mm_sfence(); for (line = 0; line < height; ++line) { - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); for (i = 0; i < chromaWidth; i+=16) { // Load pixels @@ -386,7 +375,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le) const ptrdiff_t inLumaStride = srcStride[0] >> 1; const ptrdiff_t inChromaStride = srcStride[1] >> 1; - const ptrdiff_t outStride = dstStride << 1; + const ptrdiff_t outStride = dstStride[0]; const ptrdiff_t chromaWidth = (width + 1) >> 1; LAVDitherMode ditherMode = m_pSettings->GetDitherMode(); @@ -400,7 +389,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le) _mm_sfence(); for (line = 0; line < height; ++line) { - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); // Load dithering coefficients for this line if (ditherMode == LAVDither_Random) { @@ -459,15 +448,11 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12) const uint8_t *y = src[0]; const uint8_t *uv = src[1]; - const ptrdiff_t inStride = srcStride[0]; - const ptrdiff_t outLumaStride = dstStride; - const ptrdiff_t outChromaStride = dstStride >> 1; - - const ptrdiff_t chromaHeight = height >> 1; - - uint8_t *dstY = dst; - uint8_t *dstV = dstY + height * outLumaStride; - uint8_t *dstU = dstV + chromaHeight * outChromaStride; + const ptrdiff_t inLumaStride = srcStride[0]; + const ptrdiff_t inChromaStride = srcStride[1]; + const ptrdiff_t outLumaStride = dstStride[0]; + const ptrdiff_t outChromaStride = dstStride[1]; + const ptrdiff_t chromaHeight = height >> 1; ptrdiff_t line, i; __m128i xmm0,xmm1,xmm2,xmm3,xmm7; @@ -478,13 +463,13 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12) // Copy the y for (line = 0; line < height; line++) { - PIXCONV_MEMCPY_ALIGNED(dstY + outLumaStride * line, y, width); - y += inStride; + PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width); + y += inLumaStride; } for (line = 0; line < chromaHeight; line++) { - __m128i *dstV128 = (__m128i *)(dstV + outChromaStride * line); - __m128i *dstU128 = (__m128i *)(dstU + outChromaStride * line); + __m128i *dstV128 = (__m128i *)(dst[1] + outChromaStride * line); + __m128i *dstU128 = (__m128i *)(dst[2] + outChromaStride * line); for (i = 0; i < width; i+=32) { PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, uv+i+0); @@ -505,7 +490,7 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_yv12) _mm_stream_si128(dstU128++, xmm0); _mm_stream_si128(dstV128++, xmm2); } - uv += inStride; + uv += inChromaStride; } return S_OK; @@ -516,39 +501,38 @@ DECLARE_CONV_FUNC_IMPL(convert_nv12_nv12) const uint8_t *y = src[0]; const uint8_t *uv = src[1]; - const ptrdiff_t inStride = srcStride[0]; - const ptrdiff_t outStride = dstStride; - const ptrdiff_t chromaHeight = (height >> 1); - - uint8_t *dstY = dst; - uint8_t *dstUV = dstY + height * outStride; + const ptrdiff_t inLumaStride = srcStride[0]; + const ptrdiff_t inChromaStride = srcStride[1]; + const ptrdiff_t outLumaStride = dstStride[0]; + const ptrdiff_t outChromaStride = dstStride[1]; + const ptrdiff_t chromaHeight = height >> 1; ptrdiff_t line; _mm_sfence(); // Use SSE2 copy when the stride is aligned - if ((outStride % 16) == 0) { + if ((dstStride[0] % 16) == 0) { // Copy the data for (line = 0; line < height; line++) { - PIXCONV_MEMCPY_ALIGNED(dstY + outStride * line, y, width); - y += inStride; + PIXCONV_MEMCPY_ALIGNED(dst[0] + outLumaStride * line, y, width); + y += inLumaStride; } for (line = 0; line < chromaHeight; line++) { - PIXCONV_MEMCPY_ALIGNED(dstUV + outStride * line, uv, width); - uv += inStride; + PIXCONV_MEMCPY_ALIGNED(dst[1] + outChromaStride * line, uv, width); + uv += inChromaStride; } } else { // Copy the data for (line = 0; line < height; line++) { - memcpy(dstY + outStride * line, y, width); - y += inStride; + memcpy(dst[0] + outLumaStride * line, y, width); + y += inLumaStride; } for (line = 0; line < chromaHeight; line++) { - memcpy(dstUV + outStride * line, uv, width); - uv += inStride; + memcpy(dst[1] + outChromaStride * line, uv, width); + uv += inChromaStride; } } diff --git a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp index c5859623..869992a3 100644 --- a/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp +++ b/decoder/LAVVideo/pixconv/yuv420_yuy2.cpp @@ -168,8 +168,6 @@ static int __stdcall yuv420yuy2_process_lines(const uint8_t *srcY, const uint8_t const uint8_t *v = srcV; uint8_t *yuy2 = dst; - dstStride *= 2; - // Processing starts at line 1, and ends at height - 1. The first and last line have special handling ptrdiff_t line = 1; const ptrdiff_t lastLine = height - 1; @@ -253,9 +251,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_yuy2) LAVDitherMode ditherMode = m_pSettings->GetDitherMode(); const uint16_t *dithers = (ditherMode == LAVDither_Random) ? GetRandomDitherCoeffs(height, DITHER_STEPS * 2, bpp - 8 + 2, 0) : NULL; if (ditherMode == LAVDither_Random && dithers != NULL) { - yuv420yuy2_dispatch(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, dithers); + yuv420yuy2_dispatch(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], dithers); } else { - yuv420yuy2_dispatch(inputFormat, bpp, src[0], src[1], src[2], dst, width, height, srcStride[0], srcStride[1], dstStride, NULL); + yuv420yuy2_dispatch(inputFormat, bpp, src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], NULL); } return S_OK; diff --git a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp index a84e2940..40d7ea16 100644 --- a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp +++ b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp @@ -39,7 +39,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv) const uint8_t *v = (const uint8_t *)src[2]; const ptrdiff_t inStride = srcStride[0]; - const ptrdiff_t outStride = dstStride << 2; + const ptrdiff_t outStride = dstStride[0]; ptrdiff_t line, i; @@ -50,7 +50,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv) _mm_sfence(); for (line = 0; line < height; ++line) { - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); for (i = 0; i < width; i+=16) { // Load pixels into registers @@ -95,7 +95,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le) const uint16_t *v = (const uint16_t *)src[2]; const ptrdiff_t inStride = srcStride[0] >> 1; - const ptrdiff_t outStride = dstStride << 2; + const ptrdiff_t outStride = dstStride[0]; LAVDitherMode ditherMode = m_pSettings->GetDitherMode(); const uint16_t *dithers = GetRandomDitherCoeffs(height, 3, 8, 0); @@ -121,7 +121,7 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le) xmm4 = xmm5 = xmm6; } - __m128i *dst128 = (__m128i *)(dst + line * outStride); + __m128i *dst128 = (__m128i *)(dst[0] + line * outStride); for (i = 0; i < width; i+=8) { // Load pixels into registers, and apply dithering -- cgit v1.2.3