Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2012-02-05 02:16:15 +0400
committerHendrik Leppkes <h.leppkes@gmail.com>2012-02-05 02:18:59 +0400
commitfe0d04f11b22b03268d80035ef9575793c3af778 (patch)
tree24496cf46a849cf46ff9df04a5c539c92aea492c /decoder/LAVVideo/pixconv
parentb3c9732acb19a9a045ae56b7ff0ac68f55a2607c (diff)
Implement random dithering in YUV420 -> YV12/NV12
Diffstat (limited to 'decoder/LAVVideo/pixconv')
-rw-r--r--decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp53
1 files changed, 35 insertions, 18 deletions
diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
index 8a4d1571..7c1aacb9 100644
--- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
@@ -41,6 +41,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
int chromaWidth = width;
int chromaHeight = height;
+ LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
+ const uint16_t *dithers = GetRandomDitherCoeffs(height, 2, 8, 0);
+
if (inputFormat == LAVPixFmt_YUV420bX)
chromaHeight = chromaHeight >> 1;
if (inputFormat == LAVPixFmt_YUV420bX || inputFormat == LAVPixFmt_YUV422bX) {
@@ -50,25 +53,29 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
int line, i;
- __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5;
+ __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
uint8_t *dstY = dst;
uint8_t *dstV = dst + outLumaStride * height;
uint8_t *dstU = dstV + outChromaStride * chromaHeight;
- xmm5 = _mm_set1_epi32(0xff00ff00);
-
// Process Y
for (line = 0; line < height; ++line) {
// Load dithering coefficients for this line
- PIXCONV_LOAD_DITHER_COEFFS(xmm4,line,8,dithers);
+ if (ditherMode == LAVDither_Random) {
+ xmm4 = _mm_load_si128((const __m128i *)(dithers + (line << 4) + 0));
+ xmm5 = _mm_load_si128((const __m128i *)(dithers + (line << 4) + 8));
+ } else {
+ PIXCONV_LOAD_DITHER_COEFFS(xmm4,line,8,dithers);
+ xmm5 = xmm4;
+ }
__m128i *dst128Y = (__m128i *)(dst + line * outLumaStride);
for (i = 0; i < width; i+=16) {
// Load pixels into registers, and apply dithering
PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+i), shift); /* Y0Y0Y0Y0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm4, (y+i+8), shift); /* Y0Y0Y0Y0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (y+i+8), shift); /* Y0Y0Y0Y0 */
xmm0 = _mm_packus_epi16(xmm0, xmm1); /* YYYYYYYY */
// Write data back
@@ -78,11 +85,20 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
y += inYStride;
}
- // Process U & V
+ xmm7 = _mm_set1_epi32(0xff00ff00);
+ // Process U & V
for (line = 0; line < chromaHeight; ++line) {
// Load dithering coefficients for this line
- PIXCONV_LOAD_DITHER_COEFFS(xmm4,line,8,dithers);
+ if (ditherMode == LAVDither_Random) {
+ xmm4 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 0));
+ xmm5 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 8));
+ xmm6 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 16));
+ xmm7 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 24));
+ } else {
+ PIXCONV_LOAD_DITHER_COEFFS(xmm4,line,8,dithers);
+ xmm5 = xmm6 = xmm7 = xmm4;
+ }
__m128i *dst128UV = (__m128i *)(dstV + line * outLumaStride);
__m128i *dst128U = (__m128i *)(dstU + line * outChromaStride);
@@ -90,24 +106,25 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
for (i = 0; i < chromaWidth; i+=16) {
PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i), shift); /* U0U0U0U0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm4, (u+i+8), shift); /* U0U0U0U0 */
- PIXCONV_LOAD_PIXEL16_DITHER_HIGH(xmm2, xmm4, (v+i), shift); /* 0V0V0V0V */
- PIXCONV_LOAD_PIXEL16_DITHER_HIGH(xmm3, xmm4, (v+i+8), shift); /* 0V0V0V0V */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (u+i+8), shift); /* U0U0U0U0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (v+i), shift); /* V0V0V0V0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (v+i+8), shift); /* V0V0V0V0 */
if (nv12) {
- xmm2 = _mm_and_si128(xmm2, xmm5);
- xmm3 = _mm_and_si128(xmm3, xmm5);
- xmm0 = _mm_or_si128(xmm0, xmm2); /* UVUVUVUV */
- xmm1 = _mm_or_si128(xmm1, xmm3); /* UVUVUVUV */
+ xmm0 = _mm_packus_epi16(xmm0, xmm1);
+ xmm2 = _mm_packus_epi16(xmm2, xmm3);
+
+ xmm1 = xmm0;
+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm2);
+ xmm1 = _mm_unpackhi_epi8(xmm1, xmm2);
+
_mm_stream_si128(dst128UV++, xmm0);
_mm_stream_si128(dst128UV++, xmm1);
} else {
xmm0 = _mm_packus_epi16(xmm0, xmm1); /* UUUUUUUU */
- _mm_stream_si128(dst128U++, xmm0);
-
- xmm2 = _mm_srli_epi16(xmm2, 8);
- xmm3 = _mm_srli_epi16(xmm3, 8);
xmm2 = _mm_packus_epi16(xmm2, xmm3); /* VVVVVVVV */
+
+ _mm_stream_si128(dst128U++, xmm0);
_mm_stream_si128(dst128V++, xmm2);
}
}