Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2012-07-05 15:57:12 +0400
committerHendrik Leppkes <h.leppkes@gmail.com>2012-07-05 18:16:48 +0400
commit691f56aa69f9f7efc7641932281b9aaa62ef865a (patch)
tree55f7ee75e61f7a7dce0da2b686f399897779a883 /decoder/LAVVideo/pixconv
parent2bf951f323555e448255e6959d30b0a95aa39004 (diff)
Unify shift/bpp parameters of SSE2 pixel loading macros
Diffstat (limited to 'decoder/LAVVideo/pixconv')
-rw-r--r--decoder/LAVVideo/pixconv/interleave.cpp9
-rw-r--r--decoder/LAVVideo/pixconv/pixconv_sse2_templates.h36
-rw-r--r--decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp49
-rw-r--r--decoder/LAVVideo/pixconv/yuv444_ayuv.cpp9
4 files changed, 48 insertions, 55 deletions
diff --git a/decoder/LAVVideo/pixconv/interleave.cpp b/decoder/LAVVideo/pixconv/interleave.cpp
index cbd8773a..00280dff 100644
--- a/decoder/LAVVideo/pixconv/interleave.cpp
+++ b/decoder/LAVVideo/pixconv/interleave.cpp
@@ -47,9 +47,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_y410)
__m128i *dst128 = (__m128i *)(dst + line * outStride);
for (i = 0; i < width; i+=8) {
- PIXCONV_LOAD_PIXEL16(xmm0, (y+i), shift);
- PIXCONV_LOAD_PIXEL16(xmm1, (u+i), shift);
- PIXCONV_LOAD_PIXEL16(xmm2, (v+i), shift+4); // +4 so its directly aligned properly (data from bit 14 to bit 4)
+ PIXCONV_LOAD_PIXEL8_ALIGNED(xmm0, (y+i));
+ xmm0 = _mm_slli_epi16(xmm0, shift);
+ PIXCONV_LOAD_PIXEL8_ALIGNED(xmm1, (u+i));
+ xmm1 = _mm_slli_epi16(xmm1, shift);
+ PIXCONV_LOAD_PIXEL8_ALIGNED(xmm2, (v+i));
+ xmm2 = _mm_slli_epi16(xmm2, shift+4); // +4 so its directly aligned properly (data from bit 14 to bit 4)
xmm3 = _mm_unpacklo_epi16(xmm1, xmm2); // 0VVVVV00000UUUUU
xmm4 = _mm_unpackhi_epi16(xmm1, xmm2); // 0VVVVV00000UUUUU
diff --git a/decoder/LAVVideo/pixconv/pixconv_sse2_templates.h b/decoder/LAVVideo/pixconv/pixconv_sse2_templates.h
index d5d82e82..366e87c2 100644
--- a/decoder/LAVVideo/pixconv/pixconv_sse2_templates.h
+++ b/decoder/LAVVideo/pixconv/pixconv_sse2_templates.h
@@ -19,6 +19,8 @@
#pragma once
+#include <emmintrin.h>
+
// Load the dithering coefficients for this line
// reg - register to load coefficients into
// line - index of line to process (0 based)
@@ -28,16 +30,23 @@
reg = _mm_load_si128((const __m128i *)name); \
reg = _mm_srli_epi16(reg, 8-bits); /* shift to the required dithering strength */
+// Load 8 16-bit pixels into a register, using aligned memory access
+// reg - register to store pixels in
+// src - memory pointer of the source
+// bpp - bit depth of the pixels
+#define PIXCONV_LOAD_PIXEL16(reg,src,bpp) \
+ reg = _mm_load_si128((const __m128i *)(src)); /* load (aligned) */ \
+ reg = _mm_slli_epi16(reg, 16-bpp); /* shift to 16-bit */
+
// Load 8 16-bit pixels into a register, and dither them to 8 bit
// The 8-bit pixels will be in the high-bytes of the 8 16-bit parts
// NOTE: the low-bytes are clobbered, and not empty.
// reg - register to store pixels in
// dreg - register with dithering coefficients
// src - memory pointer of the source
-// shift - shift offset to 8-bit (ie. 2 for 10bit)
-#define PIXCONV_LOAD_PIXEL16_DITHER_HIGH(reg,dreg,src,shift) \
- reg = _mm_load_si128((const __m128i *)(src)); /* load (aligned) */ \
- reg = _mm_slli_epi16(reg, 8-shift); /* shift to 16-bit */ \
+// bpp - bit depth of the pixels
+#define PIXCONV_LOAD_PIXEL16_DITHER_HIGH(reg,dreg,src,bpp) \
+ PIXCONV_LOAD_PIXEL16(reg,src,bpp) \
reg = _mm_adds_epu16(reg, dreg); /* dither */
// Load 8 16-bit pixels into a register, and dither them to 8 bit
@@ -45,9 +54,9 @@
// reg - register to store pixels in
// dreg - register with dithering coefficients
// src - memory pointer of the source
-// shift - shift offset to 8-bit (ie. 2 for 10bit)
-#define PIXCONV_LOAD_PIXEL16_DITHER(reg,dreg,src,shift) \
- PIXCONV_LOAD_PIXEL16_DITHER_HIGH(reg,dreg,src,shift) \
+// bpp - bit depth of the pixels
+#define PIXCONV_LOAD_PIXEL16_DITHER(reg,dreg,src,bpp) \
+ PIXCONV_LOAD_PIXEL16_DITHER_HIGH(reg,dreg,src,bpp) \
reg = _mm_srli_epi16(reg, 8); /* shift to 8-bit */
// Load 8 16-bit pixels into a register, and dither them to 8 bit
@@ -55,9 +64,9 @@
// reg - register to store pixels in
// dreg - register with dithering coefficients
// src - memory pointer of the source
-// shift - shift offset to 16-bit (ie. 6 for 10bit)
-#define PIXCONV_LOAD_PIXEL16_DITHER_PACKED(reg,dreg,zero,src,shift) \
- PIXCONV_LOAD_PIXEL16_DITHER(reg,dreg,src,shift) /* load unpacked */ \
+// bpp - bit depth of the pixels
+#define PIXCONV_LOAD_PIXEL16_DITHER_PACKED(reg,dreg,zero,src,bpp) \
+ PIXCONV_LOAD_PIXEL16_DITHER(reg,dreg,src,bpp) /* load unpacked */ \
reg = _mm_packus_epi16(reg, zero); /* pack */
// Load 16 8-bit pixels into a register
@@ -72,13 +81,6 @@
#define PIXCONV_LOAD_PIXEL8_ALIGNED(reg,src) \
reg = _mm_load_si128((const __m128i *)(src)); /* load (aligned) */
-// Load 8 16-bit pixels into a register, using aligned memory access
-// reg - register to store pixels in
-// src - memory pointer of the source
-#define PIXCONV_LOAD_PIXEL16(reg,src,shift) \
- reg = _mm_load_si128((const __m128i *)(src)); /* load (aligned) */ \
- reg = _mm_slli_epi16(reg, shift); /* shift to 16-bit */
-
// Load 4 8-bit pixels into the register
// reg - register to store pixels in
// src - source memory
diff --git a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
index c3c9a16e..c91d6f63 100644
--- a/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
+++ b/decoder/LAVVideo/pixconv/yuv2yuv_unscaled.cpp
@@ -34,8 +34,6 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
const int inYStride = srcStride[0] >> 1;
const int inUVStride = srcStride[1] >> 1;
- const int shift = bpp - 8;
-
int outLumaStride = dstStride;
int outChromaStride = dstStride;
int chromaWidth = width;
@@ -80,12 +78,12 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
for (i = 0; i < width; i+=32) {
// Load pixels into registers, and apply dithering
- PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+i+ 0), shift); /* Y0Y0Y0Y0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (y+i+ 8), shift); /* Y0Y0Y0Y0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (y+i+16), shift); /* Y0Y0Y0Y0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (y+i+24), shift); /* Y0Y0Y0Y0 */
- xmm0 = _mm_packus_epi16(xmm0, xmm1); /* YYYYYYYY */
- xmm2 = _mm_packus_epi16(xmm2, xmm3); /* YYYYYYYY */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+i+ 0), bpp); /* Y0Y0Y0Y0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (y+i+ 8), bpp); /* Y0Y0Y0Y0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (y+i+16), bpp); /* Y0Y0Y0Y0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (y+i+24), bpp); /* Y0Y0Y0Y0 */
+ xmm0 = _mm_packus_epi16(xmm0, xmm1); /* YYYYYYYY */
+ xmm2 = _mm_packus_epi16(xmm2, xmm3); /* YYYYYYYY */
// Write data back
_mm_stream_si128(dst128Y++, xmm0);
@@ -99,13 +97,13 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
__m128i *dst128V = (__m128i *)(dstV + line * outChromaStride);
for (i = 0; i < chromaWidth; i+=16) {
- PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i), shift); /* U0U0U0U0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (u+i+8), shift); /* U0U0U0U0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (v+i), shift); /* V0V0V0V0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (v+i+8), shift); /* V0V0V0V0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (u+i+0), bpp); /* U0U0U0U0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (u+i+8), bpp); /* U0U0U0U0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (v+i+0), bpp); /* V0V0V0V0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (v+i+8), bpp); /* V0V0V0V0 */
- xmm0 = _mm_packus_epi16(xmm0, xmm1); /* UUUUUUUU */
- xmm2 = _mm_packus_epi16(xmm2, xmm3); /* VVVVVVVV */
+ xmm0 = _mm_packus_epi16(xmm0, xmm1); /* UUUUUUUU */
+ xmm2 = _mm_packus_epi16(xmm2, xmm3); /* VVVVVVVV */
if (nv12) {
xmm1 = xmm0;
xmm0 = _mm_unpacklo_epi8(xmm0, xmm2);
@@ -133,7 +131,6 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv_yv_nv12_dither_le)
template HRESULT CLAVPixFmtConverter::convert_yuv_yv_nv12_dither_le<0>CONV_FUNC_PARAMS;
template HRESULT CLAVPixFmtConverter::convert_yuv_yv_nv12_dither_le<1>CONV_FUNC_PARAMS;
-template <int shift>
DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
{
const uint16_t *y = (const uint16_t *)src[0];
@@ -157,8 +154,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
for (i = 0; i < width; i+=16) {
// Load 8 pixels into register
- PIXCONV_LOAD_PIXEL16(xmm0, (y+i), shift); /* YYYY */
- PIXCONV_LOAD_PIXEL16(xmm1, (y+i+8), shift); /* YYYY */
+ PIXCONV_LOAD_PIXEL16(xmm0, (y+i+0), bpp); /* YYYY */
+ PIXCONV_LOAD_PIXEL16(xmm1, (y+i+8), bpp); /* YYYY */
// and write them out
_mm_stream_si128(dst128Y++, xmm0);
_mm_stream_si128(dst128Y++, xmm1);
@@ -175,8 +172,8 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
for (i = 0; i < uvWidth; i+=8) {
// Load 8 pixels into register
- PIXCONV_LOAD_PIXEL16(xmm0, (v+i), shift); /* VVVV */
- PIXCONV_LOAD_PIXEL16(xmm1, (u+i), shift); /* UUUU */
+ PIXCONV_LOAD_PIXEL16(xmm0, (v+i), bpp); /* VVVV */
+ PIXCONV_LOAD_PIXEL16(xmm1, (u+i), bpp); /* UUUU */
xmm2 = xmm0;
xmm0 = _mm_unpacklo_epi16(xmm1, xmm0); /* UVUV */
@@ -193,11 +190,6 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv420_px1x_le)
return S_OK;
}
-// Force creation of these two variants
-template HRESULT CLAVPixFmtConverter::convert_yuv420_px1x_le<0>CONV_FUNC_PARAMS;
-template HRESULT CLAVPixFmtConverter::convert_yuv420_px1x_le<6>CONV_FUNC_PARAMS;
-template HRESULT CLAVPixFmtConverter::convert_yuv420_px1x_le<7>CONV_FUNC_PARAMS;
-
DECLARE_CONV_FUNC_IMPL(convert_yuv_yv)
{
const uint8_t *y = src[0];
@@ -396,7 +388,6 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
const int inChromaStride = srcStride[1] >> 1;
const int outStride = dstStride << 1;
const int chromaWidth = (width + 1) >> 1;
- const int shift = bpp - 8;
LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
const uint16_t *dithers = GetRandomDitherCoeffs(height, 4, 8, 0);
@@ -424,10 +415,10 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv422_yuy2_uyvy_dither_le)
for (i = 0; i < chromaWidth; i+=8) {
// Load pixels
- PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+(i*2)+0), shift); /* YYYY */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (y+(i*2)+8), shift); /* YYYY */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (u+i), shift); /* UUUU */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (v+i), shift); /* VVVV */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+(i*2)+0), bpp); /* YYYY */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm1, xmm5, (y+(i*2)+8), bpp); /* YYYY */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (u+i), bpp); /* UUUU */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm3, xmm7, (v+i), bpp); /* VVVV */
// Pack Ys
xmm0 = _mm_packus_epi16(xmm0, xmm1);
diff --git a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
index 0c3f328f..926fe3be 100644
--- a/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
+++ b/decoder/LAVVideo/pixconv/yuv444_ayuv.cpp
@@ -102,9 +102,6 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
if (dithers == NULL)
ditherMode = LAVDither_Ordered;
- // Number of bits to shift to reach 8
- int shift = bpp - 8;
-
int line, i;
__m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
@@ -128,9 +125,9 @@ DECLARE_CONV_FUNC_IMPL(convert_yuv444_ayuv_dither_le)
for (i = 0; i < width; i+=8) {
// Load pixels into registers, and apply dithering
- PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+i), shift); /* Y0Y0Y0Y0 */
- PIXCONV_LOAD_PIXEL16_DITHER_HIGH(xmm1, xmm5, (u+i), shift); /* U0U0U0U0 */
- PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (v+i), shift); /* V0V0V0V0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm0, xmm4, (y+i), bpp); /* Y0Y0Y0Y0 */
+ PIXCONV_LOAD_PIXEL16_DITHER_HIGH(xmm1, xmm5, (u+i), bpp); /* U0U0U0U0 */
+ PIXCONV_LOAD_PIXEL16_DITHER(xmm2, xmm6, (v+i), bpp); /* V0V0V0V0 */
// Interlave into AYUV
xmm0 = _mm_or_si128(xmm0, xmm7); /* YAYAYAYA */