From f140a99f8b3178e0fb2dc6fee0c63a2e298ebdad Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 16 Jun 2015 23:35:23 +0200 Subject: swscale/output: Factorize rgb48 and 64bit code Signed-off-by: Michael Niedermayer --- libswscale/output.c | 282 +++++++++++++--------------------------------------- 1 file changed, 71 insertions(+), 211 deletions(-) (limited to 'libswscale') diff --git a/libswscale/output.c b/libswscale/output.c index 6048e2bbd7..56ed5f62eb 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -684,7 +684,7 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha) + int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) { int i; int A1 = 0xffff<<14, A2 = 0xffff<<14; @@ -744,12 +744,19 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } else { + output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); + dest += 6; + } } } @@ -758,7 +765,7 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) + enum AVPixelFormat target, int hasAlpha, int eightbytes) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -799,12 +806,19 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } else { + output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); + dest += 6; + } } } @@ -812,7 +826,7 @@ static av_always_inline void yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) + int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -848,12 +862,19 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } else { + output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); + dest += 6; + } } } else { const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; @@ -887,180 +908,19 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); - output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); - dest += 8; - } - } -} - -static av_always_inline void -yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, - const int32_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int32_t **chrUSrc, - const int32_t **chrVSrc, int chrFilterSize, - const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha) -{ - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int j; - int Y1 = -0x40000000; - int Y2 = -0x40000000; - int U = -128 << 23; // 19 - int V = -128 << 23; - int R, G, B; - - for (j = 0; j < lumFilterSize; j++) { - Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j]; - Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j]; - } - for (j = 0; j < chrFilterSize; j++) {; - U += chrUSrc[j][i] * (unsigned)chrFilter[j]; - V += chrVSrc[j][i] * (unsigned)chrFilter[j]; - } - - // 8bit: 12+15=27; 16-bit: 12+19=31 - Y1 >>= 14; // 10 - Y1 += 0x10000; - Y2 >>= 14; - Y2 += 0x10000; - U >>= 14; - V >>= 14; - - // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; // 21 - Y2 += 1 << 13; - // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } -} - -static av_always_inline void -yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2], - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf[2], uint16_t *dest, int dstW, - int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *buf0 = buf[0], *buf1 = buf[1], - *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], - *vbuf0 = vbuf[0], *vbuf1 = vbuf[1]; - int yalpha1 = 4096 - yalpha; - int uvalpha1 = 4096 - uvalpha; - int i; - - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14; - int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14; - int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; - int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } -} - -static av_always_inline void -yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, - const int32_t *ubuf[2], const int32_t *vbuf[2], - const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha) -{ - const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; - int i; - - if (uvalpha < 2048) { - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + (-128 << 11)) >> 2; - int V = (vbuf0[i] + (-128 << 11)) >> 2; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; - } - } else { - const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; - for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = (buf0[i * 2] ) >> 2; - int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; - int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; - int R, G, B; - - Y1 -= c->yuv2rgb_y_offset; - Y2 -= c->yuv2rgb_y_offset; - Y1 *= c->yuv2rgb_y_coeff; - Y2 *= c->yuv2rgb_y_coeff; - Y1 += 1 << 13; - Y2 += 1 << 13; - - R = V * c->yuv2rgb_v2r_coeff; - G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; - B = U * c->yuv2rgb_u2b_coeff; - - output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14); - output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14); - output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14); - output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); - output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); - output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); - dest += 6; + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14); + output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14); + dest += 8; + } else { + output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14); + output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14); + output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14); + dest += 6; + } } } } @@ -1069,7 +929,7 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, #undef r_b #undef b_r -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \ +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -1084,7 +944,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt, hasAlpha); \ + alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \ } \ \ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ @@ -1098,7 +958,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ **abuf = (const int32_t **) _abuf; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \ + dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \ } \ \ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ @@ -1112,21 +972,21 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ *abuf0 = (const int32_t *) _abuf0; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt, hasAlpha); \ + dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \ } -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) /* * Write out 2 RGB pixels in the target pixel format. This function takes a -- cgit v1.2.3