From 9aa533700849631331600fc3c3eeaf93d1141910 Mon Sep 17 00:00:00 2001 From: Hendrik Leppkes Date: Sun, 16 Feb 2014 00:02:58 +0100 Subject: Move multithreading out of the template function to reduce binary size. --- decoder/LAVVideo/pixconv/yuv2rgb.cpp | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'decoder/LAVVideo/pixconv') diff --git a/decoder/LAVVideo/pixconv/yuv2rgb.cpp b/decoder/LAVVideo/pixconv/yuv2rgb.cpp index 454a99c2..aa68d919 100644 --- a/decoder/LAVVideo/pixconv/yuv2rgb.cpp +++ b/decoder/LAVVideo/pixconv/yuv2rgb.cpp @@ -394,7 +394,7 @@ static int yuv2rgb_convert_pixels(const uint8_t* &srcY, const uint8_t* &srcU, co } template -static int __stdcall yuv2rgb_process_lines(const uint8_t *srcY, const uint8_t *srcU, const uint8_t *srcV, uint8_t *dst, int width, int height, ptrdiff_t srcStrideY, ptrdiff_t srcStrideUV, ptrdiff_t dstStride, ptrdiff_t sliceYStart, ptrdiff_t sliceYEnd, RGBCoeffs *coeffs, const uint16_t *dithers) +static int __stdcall yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, const uint8_t *srcV, uint8_t *dst, int width, int height, ptrdiff_t srcStrideY, ptrdiff_t srcStrideUV, ptrdiff_t dstStride, ptrdiff_t sliceYStart, ptrdiff_t sliceYEnd, RGBCoeffs *coeffs, const uint16_t *dithers) { const uint8_t *y = srcY; const uint8_t *u = srcU; @@ -467,24 +467,6 @@ static int __stdcall yuv2rgb_process_lines(const uint8_t *srcY, const uint8_t *s return 0; } -template -inline int yuv2rgb_convert(const uint8_t *srcY, const uint8_t *srcU, const uint8_t *srcV, uint8_t *dst, int width, int height, ptrdiff_t srcStrideY, ptrdiff_t srcStrideUV, ptrdiff_t dstStride, RGBCoeffs *coeffs, const uint16_t *dithers, int threads) -{ - if (threads <= 1) { - yuv2rgb_process_lines(srcY, srcU, srcV, dst, width, height, srcStrideY, srcStrideUV, dstStride, 0, height, coeffs, dithers); - } else { - const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12); - const ptrdiff_t lines_per_thread = (height / threads)&~1; - - Concurrency::parallel_for(0, threads, [&](int i) { - const ptrdiff_t starty = (i * lines_per_thread); - const ptrdiff_t endy = (i == (threads-1)) ? height : starty + lines_per_thread + is_odd; - yuv2rgb_process_lines(srcY, srcU, srcV, dst, width, height, srcStrideY, srcStrideUV, dstStride, starty + (i ? is_odd : 0), endy, coeffs, dithers); - }); - } - return 0; -} - template DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb) { @@ -516,8 +498,19 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_rgb) return E_FAIL; } - // run conversion - convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], coeffs, dithers, m_NumThreads); + // run conversion, threaded + if (m_NumThreads <= 1) { + convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], 0, height, coeffs, dithers); + } else { + const int is_odd = (inputFormat == LAVPixFmt_YUV420 || inputFormat == LAVPixFmt_NV12); + const ptrdiff_t lines_per_thread = (height / m_NumThreads)&~1; + + Concurrency::parallel_for(0, m_NumThreads, [&](int i) { + const ptrdiff_t starty = (i * lines_per_thread); + const ptrdiff_t endy = (i == (m_NumThreads - 1)) ? height : starty + lines_per_thread + is_odd; + convFn(src[0], src[1], src[2], dst[0], width, height, srcStride[0], srcStride[1], dstStride[0], starty + (i ? is_odd : 0), endy, coeffs, dithers); + }); + } return S_OK; } -- cgit v1.2.3