Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/LAVFilters.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2013-05-09 19:25:07 +0400
committerHendrik Leppkes <h.leppkes@gmail.com>2013-05-09 19:27:32 +0400
commit4724973327077d99e73460531986effe97b5504a (patch)
tree868af667785c79239ab6fca3e837065c2348ef3c /decoder
parent1e1e72543f3e54ccd542d55d080dcd16e9245456 (diff)
Add SSSE3 RGB48->RGB32/RGB24 converter
Diffstat (limited to 'decoder')
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.cpp7
-rw-r--r--decoder/LAVVideo/LAVPixFmtConverter.h3
-rw-r--r--decoder/LAVVideo/LAVVideo.vcxproj1
-rw-r--r--decoder/LAVVideo/LAVVideo.vcxproj.filters3
-rw-r--r--decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp130
5 files changed, 143 insertions, 1 deletions
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.cpp b/decoder/LAVVideo/LAVPixFmtConverter.cpp
index 6362e870..b60540f6 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.cpp
+++ b/decoder/LAVVideo/LAVPixFmtConverter.cpp
@@ -45,7 +45,7 @@
* RGB24 - - - - - - - - - - - - - - - x -
* RGB32 - - - - - - - - - - - - - - - - x
* ARGB32 - - - - - - - - - - - - - - - - x
- * RGB48 - - - - - - - - - - - - - - - - -
+ * RGB48 - - - - - - - - - - - - - - - x x
*
* Every processing path has a swscale fallback (even those with a "-" above), every combination of input/output is possible, just not optimized (ugly and/or slow)
*/
@@ -322,6 +322,11 @@ void CLAVPixFmtConverter::SelectConvertFunction()
|| (m_OutputPixFmt == LAVOutPixFmt_RGB24 && m_InputPixFmt == LAVPixFmt_RGB24)) {
convert = &CLAVPixFmtConverter::plane_copy;
m_RequiredAlignment = 0;
+ } else if (m_InputPixFmt == LAVPixFmt_RGB48 && (m_OutputPixFmt == LAVOutPixFmt_RGB32 || m_OutputPixFmt == LAVOutPixFmt_RGB24) && (cpu & AV_CPU_FLAG_SSSE3)) {
+ if (m_OutputPixFmt == LAVOutPixFmt_RGB32)
+ convert = &CLAVPixFmtConverter::convert_rgb48_rgb32_ssse3;
+ else
+ convert = &CLAVPixFmtConverter::convert_rgb48_rgb24_ssse3;
} else if (cpu & AV_CPU_FLAG_SSE2) {
if (m_OutputPixFmt == LAVOutPixFmt_AYUV && m_InputPixFmt == LAVPixFmt_YUV444bX) {
convert = &CLAVPixFmtConverter::convert_yuv444_ayuv_dither_le;
diff --git a/decoder/LAVVideo/LAVPixFmtConverter.h b/decoder/LAVVideo/LAVPixFmtConverter.h
index 7ac3e228..1b4895bf 100644
--- a/decoder/LAVVideo/LAVPixFmtConverter.h
+++ b/decoder/LAVVideo/LAVPixFmtConverter.h
@@ -143,6 +143,9 @@ private:
template <int uyvy> DECLARE_CONV_FUNC(convert_yuv422_yuy2_uyvy_dither_le);
template <int nv12> DECLARE_CONV_FUNC(convert_yuv_yv_nv12_dither_le);
+ DECLARE_CONV_FUNC(convert_rgb48_rgb32_ssse3);
+ DECLARE_CONV_FUNC(convert_rgb48_rgb24_ssse3);
+
template <int out32> DECLARE_CONV_FUNC(convert_yuv_rgb);
RGBCoeffs* getRGBCoeffs(int width, int height);
const uint16_t* GetRandomDitherCoeffs(int height, int coeffs, int bits, int line);
diff --git a/decoder/LAVVideo/LAVVideo.vcxproj b/decoder/LAVVideo/LAVVideo.vcxproj
index 8e4c760a..01678879 100644
--- a/decoder/LAVVideo/LAVVideo.vcxproj
+++ b/decoder/LAVVideo/LAVVideo.vcxproj
@@ -116,6 +116,7 @@
<ClCompile Include="pixconv\convert_generic.cpp" />
<ClCompile Include="pixconv\interleave.cpp" />
<ClCompile Include="pixconv\pixconv.cpp" />
+ <ClCompile Include="pixconv\rgb2rgb_unscaled.cpp" />
<ClCompile Include="pixconv\yuv2rgb.cpp" />
<ClCompile Include="pixconv\yuv2yuv_unscaled.cpp" />
<ClCompile Include="pixconv\yuv420_yuy2.cpp" />
diff --git a/decoder/LAVVideo/LAVVideo.vcxproj.filters b/decoder/LAVVideo/LAVVideo.vcxproj.filters
index 4433b291..7164bbf5 100644
--- a/decoder/LAVVideo/LAVVideo.vcxproj.filters
+++ b/decoder/LAVVideo/LAVVideo.vcxproj.filters
@@ -153,6 +153,9 @@
<ClCompile Include="VideoInputPin.cpp">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="pixconv\rgb2rgb_unscaled.cpp">
+ <Filter>Source Files\pixconv</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h">
diff --git a/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
new file mode 100644
index 00000000..c7219659
--- /dev/null
+++ b/decoder/LAVVideo/pixconv/rgb2rgb_unscaled.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2010-2013 Hendrik Leppkes
+ * http://www.1f0.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "stdafx.h"
+
+#include <emmintrin.h>
+
+#include "pixconv_internal.h"
+#include "pixconv_sse2_templates.h"
+
+DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb32_ssse3)
+{
+ const uint16_t *rgb = (const uint16_t *)src[0];
+ const ptrdiff_t inStride = srcStride[0] >> 1;
+ const ptrdiff_t outStride = dstStride * 4;
+ ptrdiff_t line, i;
+
+ int processWidth = width * 3;
+
+ LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
+ const uint16_t *dithers = GetRandomDitherCoeffs(height, 4, 8, 0);
+ if (dithers == NULL)
+ ditherMode = LAVDither_Ordered;
+
+ __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
+ __m128i mask = _mm_setr_epi8(0,1,2,3,4,5,-1,-1,6,7,8,9,10,11,-1,-1);
+
+ _mm_sfence();
+ for (line = 0; line < height; line++) {
+ __m128i *dst128 = (__m128i *)(dst + line * outStride);
+
+ // Load dithering coefficients for this line
+ if (ditherMode == LAVDither_Random) {
+ xmm5 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 0));
+ xmm6 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 8));
+ xmm7 = _mm_load_si128((const __m128i *)(dithers + (line << 5) + 16));
+ } else {
+ PIXCONV_LOAD_DITHER_COEFFS(xmm7,line,8,dithers);
+ xmm5 = xmm6 = xmm7;
+ }
+ for (i = 0; i < processWidth; i += 24) {
+ PIXCONV_LOAD_ALIGNED(xmm0, (rgb + i)); /* load */
+ _mm_adds_epu16(xmm0, xmm5); /* apply dithering coefficients */
+ xmm0 = _mm_srli_epi16(xmm0, 8); /* shift to 8-bit */
+ PIXCONV_LOAD_ALIGNED(xmm1, (rgb + i + 8)); /* load */
+ _mm_adds_epu16(xmm1, xmm6); /* apply dithering coefficients */
+ xmm1 = _mm_srli_epi16(xmm1, 8); /* shift to 8-bit */
+ PIXCONV_LOAD_ALIGNED(xmm2, (rgb + i + 16)); /* load */
+ _mm_adds_epu16(xmm2, xmm7); /* apply dithering coefficients */
+ xmm2 = _mm_srli_epi16(xmm2, 8); /* shift to 8-bit */
+
+ xmm3 = _mm_shuffle_epi8(xmm0, mask);
+ xmm4 = _mm_shuffle_epi8(_mm_alignr_epi8(xmm1, xmm0, 12), mask);
+ xmm0 = _mm_shuffle_epi8(_mm_alignr_epi8(xmm2, xmm1, 8), mask);
+ xmm1 = _mm_shuffle_epi8(_mm_alignr_epi8(xmm2, xmm2, 4), mask);
+
+ xmm3 = _mm_packus_epi16(xmm3, xmm4);
+ xmm0 = _mm_packus_epi16(xmm0, xmm1);
+
+ _mm_stream_si128(dst128++, xmm3);
+ _mm_stream_si128(dst128++, xmm0);
+ }
+
+ rgb += inStride;
+ }
+
+ return S_OK;
+}
+
+DECLARE_CONV_FUNC_IMPL(convert_rgb48_rgb24_ssse3)
+{
+ const uint16_t *rgb = (const uint16_t *)src[0];
+ const ptrdiff_t inStride = srcStride[0] >> 1;
+ const ptrdiff_t outStride = dstStride * 3;
+ ptrdiff_t line, i;
+
+ int processWidth = width * 3;
+
+ LAVDitherMode ditherMode = m_pSettings->GetDitherMode();
+ const uint16_t *dithers = GetRandomDitherCoeffs(height, 2, 8, 0);
+ if (dithers == NULL)
+ ditherMode = LAVDither_Ordered;
+
+ __m128i xmm0,xmm1,xmm6,xmm7;
+
+ _mm_sfence();
+ for (line = 0; line < height; line++) {
+ __m128i *dst128 = (__m128i *)(dst + line * outStride);
+
+ // Load dithering coefficients for this line
+ if (ditherMode == LAVDither_Random) {
+ xmm6 = _mm_load_si128((const __m128i *)(dithers + (line << 4) + 0));
+ xmm7 = _mm_load_si128((const __m128i *)(dithers + (line << 4) + 8));
+ } else {
+ PIXCONV_LOAD_DITHER_COEFFS(xmm7,line,8,dithers);
+ xmm6 = xmm7;
+ }
+ for (i = 0; i < processWidth; i += 16) {
+ PIXCONV_LOAD_ALIGNED(xmm0, (rgb + i)); /* load */
+ _mm_adds_epu16(xmm0, xmm6); /* apply dithering coefficients */
+ xmm0 = _mm_srli_epi16(xmm0, 8); /* shift to 8-bit */
+ PIXCONV_LOAD_ALIGNED(xmm1, (rgb + i + 8)); /* load */
+ _mm_adds_epu16(xmm1, xmm7); /* apply dithering coefficients */
+ xmm1 = _mm_srli_epi16(xmm1, 8); /* shift to 8-bit */
+
+ xmm0 = _mm_packus_epi16(xmm0, xmm1);
+ _mm_stream_si128(dst128++, xmm0);
+ }
+
+ rgb += inStride;
+ }
+
+ return S_OK;
+}