diff options
author | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-08-02 12:54:35 +0400 |
---|---|---|
committer | XhmikosR <xhmikosr@users.sourceforge.net> | 2010-08-02 12:54:35 +0400 |
commit | c5d184664fec4340b57082cd1cc31220d9f1220b (patch) | |
tree | 34c6103e95b6538a8e71d1ba87897536b7b51015 /src/filters/transform | |
parent | aef6f86e8400fa5ee1e9d9ddc6190412ef09e519 (diff) |
updated ffmpeg (thanks to Aleksoid for finding the conflict in avcore\utils.c for Debug VS2010 builds)
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@2180 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform')
29 files changed, 1179 insertions, 1035 deletions
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj index 1a57edd70..e189fd1d4 100644 --- a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj +++ b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj @@ -45,7 +45,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
/>
<Tool
@@ -115,7 +115,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
DebugInformationFormat="3"
/>
@@ -185,7 +185,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
CompileAs="2"
/>
@@ -260,7 +260,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
EnableEnhancedInstructionSet="0"
/>
@@ -334,7 +334,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="WIN32;_DEBUG"
/>
<Tool
@@ -393,7 +393,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="_WIN64;_DEBUG;"
DebugInformationFormat="3"
/>
@@ -452,7 +452,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="WIN32;NDEBUG"
UsePrecompiledHeader="0"
/>
@@ -513,7 +513,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
+ AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;"$(DXSDK_DIR)Include""
PreprocessorDefinitions="_WIN64;NDEBUG"
EnableEnhancedInstructionSet="0"
UsePrecompiledHeader="0"
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj index b312e3b72..5b50054f3 100644 --- a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj +++ b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj @@ -152,7 +152,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug Filter|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
@@ -170,7 +170,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
@@ -186,7 +186,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release Filter|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
</ClCompile>
@@ -209,7 +209,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
</ClCompile>
@@ -230,7 +230,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<ProjectReference />
@@ -246,7 +246,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
@@ -260,7 +260,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -279,7 +279,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
<PrecompiledHeader>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile index 1497e5ab7..f5b7c3b0b 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile +++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile @@ -1,4 +1,5 @@ LAVC_DIR = libavcodec
+LAVCORE_DIR=libavcore
LAVU_DIR = libavutil
LSWS_DIR = libswscale
PNG_DIR = ../../../../thirdparty/libpng
@@ -17,6 +18,7 @@ OUT_DIRS = ../../../../../bin/obj/Release_x64/libavcodec_gcc/ \ ../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec/amr_float \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec/x86 \
+ ../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavutil \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libswscale \
$(SLIB_DIR)
@@ -25,12 +27,13 @@ OUT_DIRS = ../../../../../bin/obj/Release_Win32/libavcodec_gcc/ \ ../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec/amr_float \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec/x86 \
+ ../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavutil \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libswscale \
$(SLIB_DIR)
endif
-CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
+CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVCORE_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
-DHAVE_AV_CONFIG_H -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -std=gnu99
SRCS_C=\
@@ -156,6 +159,9 @@ SRCS_C=\ $(LAVC_DIR)/x86/vp6dsp_sse2.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
+ $(LAVCORE_DIR)/avcore_utils.c \
+ $(LAVCORE_DIR)/imgutils.c \
+\
$(LAVU_DIR)/crc.c \
$(LAVU_DIR)/intfloat_readwrite.c \
$(LAVU_DIR)/inverse.c \
@@ -213,6 +219,7 @@ clean: $(OUT_DIR)$(LAVC_DIR)/*.o $(OUT_DIR)$(LAVC_DIR)/*.d \
$(OUT_DIR)$(LAVC_DIR)/x86/*.o $(OUT_DIR)$(LAVC_DIR)/x86/*.d \
$(OUT_DIR)$(LAVC_DIR)/amr_float/*.o $(OUT_DIR)$(LAVC_DIR)/amr_float/*.d \
+ $(OUT_DIR)$(LAVCORE_DIR)/*.o $(OUT_DIR)$(LAVCORE_DIR)/*.d \
$(OUT_DIR)$(LAVU_DIR)/*.o $(OUT_DIR)$(LAVU_DIR)/*.d \
$(OUT_DIR)$(LSWS_DIR)/*.o $(OUT_DIR)$(LSWS_DIR)/*.d \
$(ZLIB_DIR)/*.o $(ZLIB_DIR)/*.d $(PNG_DIR)/*.o $(SLIB)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt index 3ad7a140e..74108e99b 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt +++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt @@ -13,5 +13,6 @@ The following files have MPC-specific custom code (compared to ffdshow): * libavcodec/mpc_helper.c
* libavcodec/mpeg12.c
* libavcodec/vp3.c
+* libavcore/avcore_utils.c (renamed from utils.c to avoid conflicts in MSVC2010)
* libavutil/internal.h
* libavutil/log.h
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj index 0d276b5ba..110c08cba 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj @@ -1591,6 +1591,26 @@ </FileConfiguration>
</File>
</Filter>
+ <Filter
+ Name="libavcore"
+ >
+ <File
+ RelativePath=".\libavcore\avcore.h"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\avcore_utils.c"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\imgutils.c"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\imgutils.h"
+ >
+ </File>
+ </Filter>
<File
RelativePath=".\array_allocator.h"
>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj index fe19a7491..141c87d5c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj @@ -126,6 +126,8 @@ <ClInclude Include="libavcodec\vp8dsp.h" />
<ClInclude Include="libavcodec\wmv2.h" />
<ClInclude Include="libavcodec\xiph.h" />
+ <ClInclude Include="libavcore\avcore.h" />
+ <ClInclude Include="libavcore\imgutils.h" />
<ClInclude Include="libavutil\attributes.h" />
<ClInclude Include="libavutil\avconfig.h" />
<ClInclude Include="libavutil\avstring.h" />
@@ -276,6 +278,8 @@ <ClCompile Include="libavcodec\wmv2.c" />
<ClCompile Include="libavcodec\wmv2dec.c" />
<ClCompile Include="libavcodec\xiph.c" />
+ <ClCompile Include="libavcore\imgutils.c" />
+ <ClCompile Include="libavcore\avcore_utils.c" />
<ClCompile Include="libavutil\crc.c" />
<ClCompile Include="libavutil\intfloat_readwrite.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters index 9cf688ad0..67bf63abb 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters @@ -13,6 +13,9 @@ <Filter Include="libavcodec\amr_float">
<UniqueIdentifier>{dcef6bb8-4262-415c-935a-c8dd0f056c4c}</UniqueIdentifier>
</Filter>
+ <Filter Include="libavcore">
+ <UniqueIdentifier>{bb994511-43fc-42df-8ba2-b6186af844b2}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="libswscale\asmalign.h">
@@ -427,6 +430,12 @@ <ClInclude Include="libavcodec\AVPaletteControl.h">
<Filter>libavcodec</Filter>
</ClInclude>
+ <ClInclude Include="libavcore\avcore.h">
+ <Filter>libavcore</Filter>
+ </ClInclude>
+ <ClInclude Include="libavcore\imgutils.h">
+ <Filter>libavcore</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="libswscale\isP4HT.c">
@@ -813,5 +822,11 @@ <ClCompile Include="libavcodec\avpacket.c">
<Filter>libavcodec</Filter>
</ClCompile>
+ <ClCompile Include="libavcore\imgutils.c">
+ <Filter>libavcore</Filter>
+ </ClCompile>
+ <ClCompile Include="libavcore\avcore_utils.c">
+ <Filter>libavcore</Filter>
+ </ClCompile>
</ItemGroup>
</Project>
\ No newline at end of file diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c index 3db5fb47d..293fa3c8a 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c @@ -35,9 +35,9 @@ #include "internal.h"
#include "imgconvert.h"
#include "libavutil/pixdesc.h"
+#include "libavcore/imgutils.h"
#if HAVE_MMX
-#include "x86/mmx.h"
#include "x86/dsputil_mmx.h"
#endif
@@ -748,144 +748,18 @@ int ff_set_systematic_pal(uint32_t pal[256], enum PixelFormat pix_fmt){ return 0;
}
+#if LIBAVCODEC_VERSION_MAJOR < 53
int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width)
{
- int i;
- const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
- int max_plane_step [4];
- int max_plane_step_comp[4];
-
- memset(picture->linesize, 0, sizeof(picture->linesize));
-
- if (desc->flags & PIX_FMT_HWACCEL)
- return -1;
-
- if (desc->flags & PIX_FMT_BITSTREAM) {
- picture->linesize[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
- return 0;
- }
-
- memset(max_plane_step , 0, sizeof(max_plane_step ));
- memset(max_plane_step_comp, 0, sizeof(max_plane_step_comp));
- for (i = 0; i < 4; i++) {
- const AVComponentDescriptor *comp = &(desc->comp[i]);
- if ((comp->step_minus1+1) > max_plane_step[comp->plane]) {
- max_plane_step [comp->plane] = comp->step_minus1+1;
- max_plane_step_comp[comp->plane] = i;
- }
- }
-
- for (i = 0; i < 4; i++) {
- int s = (max_plane_step_comp[i] == 1 || max_plane_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
- picture->linesize[i] = max_plane_step[i] * (((width + (1 << s) - 1)) >> s);
- }
-
- return 0;
+ return av_fill_image_linesizes(picture->linesize, pix_fmt, width);
}
int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
int height)
{
- int size, h2, size2;
- const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
-
- size = picture->linesize[0] * height;
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- case PIX_FMT_YUV422P:
- case PIX_FMT_YUV444P:
- case PIX_FMT_YUV410P:
- case PIX_FMT_YUV411P:
- case PIX_FMT_YUV440P:
- case PIX_FMT_YUVJ420P:
- case PIX_FMT_YUVJ422P:
- case PIX_FMT_YUVJ444P:
- case PIX_FMT_YUVJ440P:
- case PIX_FMT_YUV420P16LE:
- case PIX_FMT_YUV422P16LE:
- case PIX_FMT_YUV444P16LE:
- case PIX_FMT_YUV420P16BE:
- case PIX_FMT_YUV422P16BE:
- case PIX_FMT_YUV444P16BE:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size2;
- picture->data[3] = NULL;
- return size + 2 * size2;
- case PIX_FMT_YUVA420P:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size2;
- picture->data[3] = picture->data[1] + size2 + size2;
- return 2 * size + 2 * size2;
- case PIX_FMT_NV12:
- case PIX_FMT_NV21:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size + size2;
- case PIX_FMT_RGB24:
- case PIX_FMT_BGR24:
- case PIX_FMT_ARGB:
- case PIX_FMT_ABGR:
- case PIX_FMT_RGBA:
- case PIX_FMT_BGRA:
- case PIX_FMT_RGB48BE:
- case PIX_FMT_RGB48LE:
- case PIX_FMT_GRAY16BE:
- case PIX_FMT_GRAY16LE:
- case PIX_FMT_BGR444BE:
- case PIX_FMT_BGR444LE:
- case PIX_FMT_BGR555BE:
- case PIX_FMT_BGR555LE:
- case PIX_FMT_BGR565BE:
- case PIX_FMT_BGR565LE:
- case PIX_FMT_RGB444BE:
- case PIX_FMT_RGB444LE:
- case PIX_FMT_RGB555BE:
- case PIX_FMT_RGB555LE:
- case PIX_FMT_RGB565BE:
- case PIX_FMT_RGB565LE:
- case PIX_FMT_YUYV422:
- case PIX_FMT_UYVY422:
- case PIX_FMT_UYYVYY411:
- case PIX_FMT_RGB4:
- case PIX_FMT_BGR4:
- case PIX_FMT_MONOWHITE:
- case PIX_FMT_MONOBLACK:
- case PIX_FMT_Y400A:
- picture->data[0] = ptr;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size;
- case PIX_FMT_PAL8:
- case PIX_FMT_RGB8:
- case PIX_FMT_BGR8:
- case PIX_FMT_RGB4_BYTE:
- case PIX_FMT_BGR4_BYTE:
- case PIX_FMT_GRAY8:
- size2 = (size + 3) & ~3;
- picture->data[0] = ptr;
- picture->data[1] = ptr + size2; /* palette is stored here as 256 32 bit words */
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size2 + 256 * 4;
- default:
- picture->data[0] = NULL;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return -1;
- }
+ return av_fill_image_pointers(picture->data, pix_fmt, height, ptr, picture->linesize);
}
+#endif
void ff_img_copy_plane(uint8_t *dst, int dst_wrap,
const uint8_t *src, int src_wrap,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h index 48e2f1271..f09fcbfc0 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h @@ -27,9 +27,13 @@ #include <stdint.h> #include "avcodec.h" +#if LIBAVCODEC_VERSION_MAJOR < 53 +attribute_deprecated int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width); +attribute_deprecated int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt, int height); +#endif int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane); diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c index 99ff3fa62..93268052a 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c @@ -1151,7 +1151,7 @@ typedef struct Mpeg1Context { MpegEncContext mpeg_enc_ctx;
int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
int repeat_field; /* true if we must repeat the field */
- AVPanScan pan_scan; /** some temporary storage for the panscan */
+ AVPanScan pan_scan; /**< some temporary storage for the panscan */
int slice_count;
int swap_uv;//indicate VCR2
int save_aspect_info;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c index 1391625c0..78925d915 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c @@ -28,6 +28,7 @@ #include "libavutil/avstring.h"
#include "libavutil/crc.h"
#include "libavutil/pixdesc.h"
+#include "libavcore/imgutils.h"
#include "avcodec.h"
#include "dsputil.h"
#include "imgconvert.h"
@@ -281,7 +282,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ do {
// NOTE: do not align linesizes individually, this breaks e.g. assumptions
// that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
- ff_fill_linesize(&picture, s->pix_fmt, w);
+ av_fill_image_linesizes(picture.linesize, s->pix_fmt, w);
// increase alignment of w for next try (rhs gives the lowest bit set in w)
w += w & ~(w-1);
@@ -291,7 +292,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ }
} while (unaligned);
- tmpsize = ff_fill_pointer(&picture, NULL, s->pix_fmt, h);
+ tmpsize = av_fill_image_pointers(picture.data, s->pix_fmt, h, NULL, picture.linesize);
if (tmpsize < 0)
return -1;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c index 3ebff017a..bb9b62e07 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c @@ -42,7 +42,7 @@ static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size, vp56_init_range_decoder(&s->c, buf, buf_size); s->framep[VP56_FRAME_CURRENT]->key_frame = !vp56_rac_get(c); vp56_rac_get(c); - vp56_init_dequant(s, vp56_rac_gets(c, 6)); + ff_vp56_init_dequant(s, vp56_rac_gets(c, 6)); if (s->framep[VP56_FRAME_CURRENT]->key_frame) { vp56_rac_gets(c, 8); @@ -254,7 +254,7 @@ static av_cold int vp5_decode_init(AVCodecContext *avctx) { VP56Context *s = avctx->priv_data; - vp56_init(avctx, 1, 0); + ff_vp56_init(avctx, 1, 0); s->vp56_coord_div = vp5_coord_div; s->parse_vector_adjustment = vp5_parse_vector_adjustment; s->parse_coeff = vp5_parse_coeff; @@ -273,8 +273,8 @@ AVCodec vp5_decoder = { sizeof(VP56Context), vp5_decode_init, NULL, - vp56_free, - vp56_decode_frame, + ff_vp56_free, + ff_vp56_decode_frame, /*.capabilities = */CODEC_CAP_DR1, /*.next = */NULL, /*.flush = */NULL, diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c index 188bfcfbf..3f5569eb8 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c @@ -28,7 +28,7 @@ #include "vp56data.h" -void vp56_init_dequant(VP56Context *s, int quantizer) +void ff_vp56_init_dequant(VP56Context *s, int quantizer) { s->quantizer = quantizer; s->dequant_dc = vp56_dc_dequant[quantizer] << 2; @@ -481,7 +481,7 @@ static int vp56_size_changed(AVCodecContext *avctx) return 0; } -int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, +int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size) { VP56Context *s = avctx->priv_data; @@ -638,7 +638,7 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, return buf_size; } -av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha) +av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) { VP56Context *s = avctx->priv_data; int i; @@ -677,7 +677,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha) } } -av_cold int vp56_free(AVCodecContext *avctx) +av_cold int ff_vp56_free(AVCodecContext *avctx) { VP56Context *s = avctx->priv_data; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h index a4ef49ede..69518fa73 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h @@ -56,7 +56,7 @@ typedef struct { bits left) in order to eliminate a negate in cache refilling */ const uint8_t *buffer; const uint8_t *end; - unsigned long code_word; + unsigned int code_word; } VP56RangeCoder; typedef struct { @@ -170,10 +170,10 @@ struct vp56_context { }; -void vp56_init(AVCodecContext *avctx, int flip, int has_alpha); -int vp56_free(AVCodecContext *avctx); -void vp56_init_dequant(VP56Context *s, int quantizer); -int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, +void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha); +int ff_vp56_free(AVCodecContext *avctx); +void ff_vp56_init_dequant(VP56Context *s, int quantizer); +int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size); @@ -191,25 +191,12 @@ static inline void vp56_init_range_decoder(VP56RangeCoder *c, c->code_word = bytestream_get_be16(&c->buffer); } -static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c) { - /* Don't put c->high in a local variable; if we do that, gcc gets - * the stupids and turns the code below into a branch again. */ + int shift = ff_h264_norm_shift[c->high] - 1; int bits = c->bits; - unsigned long code_word = c->code_word; - unsigned int low = 1 + (((c->high - 1) * prob) >> 8); - unsigned int low_shift = low << 8; - int bit = code_word >= low_shift; - int shift; + unsigned int code_word = c->code_word; - /* Incantation to convince GCC to turn these into conditional moves - * instead of branches -- faster, as this branch is basically - * unpredictable. */ - c->high = bit ? c->high - low : low; - code_word = bit ? code_word - low_shift : code_word; - - /* normalize */ - shift = ff_h264_norm_shift[c->high] - 1; c->high <<= shift; code_word <<= shift; bits += shift; @@ -218,29 +205,62 @@ static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) bits -= 8; } c->bits = bits; - c->code_word = code_word; + return code_word; +} + +#if ARCH_X86 +#include "x86/vp56_arith.h" +#endif + +#ifndef vp56_rac_get_prob +#define vp56_rac_get_prob vp56_rac_get_prob +static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +{ + unsigned int code_word = vp56_rac_renorm(c); + unsigned int low = 1 + (((c->high - 1) * prob) >> 8); + unsigned int low_shift = low << 8; + int bit = code_word >= low_shift; + + c->high = bit ? c->high - low : low; + c->code_word = bit ? code_word - low_shift : code_word; + return bit; } +#endif + +// branchy variant, to be used where there's a branch based on the bit decoded +static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) +{ + unsigned long code_word = vp56_rac_renorm(c); + unsigned low = 1 + (((c->high - 1) * prob) >> 8); + unsigned low_shift = low << 8; + + if (code_word >= low_shift) { + c->high -= low; + c->code_word = code_word - low_shift; + return 1; + } + + c->high = low; + c->code_word = code_word; + return 0; +} static inline int vp56_rac_get(VP56RangeCoder *c) { + unsigned int code_word = vp56_rac_renorm(c); /* equiprobable */ int low = (c->high + 1) >> 1; unsigned int low_shift = low << 8; - int bit = c->code_word >= low_shift; + int bit = code_word >= low_shift; if (bit) { - c->high = (c->high - low) << 1; - c->code_word -= low_shift; + c->high -= low; + code_word -= low_shift; } else { - c->high = low << 1; + c->high = low; } - /* normalize */ - c->code_word <<= 1; - if (++c->bits == 0 && c->buffer < c->end) { - c->bits = -8; - c->code_word |= *c->buffer++; - } + c->code_word = code_word; return bit; } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c index ce0ec642a..4c2aec67a 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c @@ -54,7 +54,7 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size, int separated_coeff = buf[0] & 1; s->framep[VP56_FRAME_CURRENT]->key_frame = !(buf[0] & 0x80); - vp56_init_dequant(s, (buf[0] >> 1) & 0x3F); + ff_vp56_init_dequant(s, (buf[0] >> 1) & 0x3F); if (s->framep[VP56_FRAME_CURRENT]->key_frame) { sub_version = buf[1] >> 3; @@ -576,8 +576,8 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx) { VP56Context *s = avctx->priv_data; - vp56_init(avctx, avctx->codec->id == CODEC_ID_VP6, - avctx->codec->id == CODEC_ID_VP6A); + ff_vp56_init(avctx, avctx->codec->id == CODEC_ID_VP6, + avctx->codec->id == CODEC_ID_VP6A); s->vp56_coord_div = vp6_coord_div; s->parse_vector_adjustment = vp6_parse_vector_adjustment; s->filter = vp6_filter; @@ -594,7 +594,7 @@ static av_cold int vp6_decode_free(AVCodecContext *avctx) VP56Context *s = avctx->priv_data; int pt, ct, cg; - vp56_free(avctx); + ff_vp56_free(avctx); for (pt=0; pt<2; pt++) { free_vlc(&s->dccv_vlc[pt]); @@ -614,7 +614,7 @@ AVCodec vp6_decoder = { vp6_decode_init, NULL, vp6_decode_free, - vp56_decode_frame, + ff_vp56_decode_frame, /*.capabilities = */CODEC_CAP_DR1, /*.next = */NULL, /*.flush = */NULL, @@ -632,7 +632,7 @@ AVCodec vp6f_decoder = { vp6_decode_init, NULL, vp6_decode_free, - vp56_decode_frame, + ff_vp56_decode_frame, /*.capabilities = */CODEC_CAP_DR1, /*.next = */NULL, /*.flush = */NULL, @@ -650,7 +650,7 @@ AVCodec vp6a_decoder = { vp6_decode_init, NULL, vp6_decode_free, - vp56_decode_frame, + ff_vp56_decode_frame, /*.capabilities = */CODEC_CAP_DR1, /*.next = */NULL, /*.flush = */NULL, diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c index 2dd086b70..6524a7141 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c @@ -3,6 +3,7 @@ * * Copyright (C) 2010 David Conrad * Copyright (C) 2010 Ronald S. Bultje + * Copyright (C) 2010 Jason Garrett-Glaser * * This file is part of FFmpeg. * @@ -198,8 +199,6 @@ typedef struct { } prob[2]; } VP8Context; -#define RL24(p) (AV_RL16(p) + ((p)[2] << 16)) - static void vp8_decode_flush(AVCodecContext *avctx) { VP8Context *s = avctx->priv_data; @@ -211,6 +210,7 @@ static void vp8_decode_flush(AVCodecContext *avctx) memset(s->framep, 0, sizeof(s->framep)); av_freep(&s->macroblocks_base); + av_freep(&s->filter_strength); av_freep(&s->intra4x4_pred_mode_base); av_freep(&s->top_nnz); av_freep(&s->edge_emu_buffer); @@ -307,7 +307,7 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) return -1; for (i = 0; i < s->num_coeff_partitions-1; i++) { - int size = RL24(sizes + 3*i); + int size = AV_RL24(sizes + 3*i); if (buf_size - size < 0) return -1; @@ -402,7 +402,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) s->keyframe = !(buf[0] & 1); s->profile = (buf[0]>>1) & 7; s->invisible = !(buf[0] & 0x10); - header_size = RL24(buf) >> 5; + header_size = AV_RL24(buf) >> 5; buf += 3; buf_size -= 3; @@ -420,8 +420,8 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) } if (s->keyframe) { - if (RL24(buf) != 0x2a019d) { - av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf)); + if (AV_RL24(buf) != 0x2a019d) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); return AVERROR_INVALIDDATA; } width = AV_RL16(buf+3) & 0x3fff; @@ -495,7 +495,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) for (j = 0; j < 8; j++) for (k = 0; k < 3; k++) for (l = 0; l < NUM_DCT_TOKENS-1; l++) - if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l])) + if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); if ((s->mbskip_enabled = vp8_rac_get(c))) @@ -516,15 +516,15 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) // 17.2 MV probability update for (i = 0; i < 2; i++) for (j = 0; j < 19; j++) - if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j])) + if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) s->prob->mvc[i][j] = vp8_rac_get_nn(c); } return 0; } -static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, - int mb_x, int mb_y) +static av_always_inline +void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) { #define MARGIN (16 << 2) dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), @@ -533,8 +533,9 @@ static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, ((s->mb_height - 1 - mb_y) << 6) + MARGIN); } -static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, - VP56mv near[2], VP56mv *best, uint8_t cnt[4]) +static av_always_inline +void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + VP56mv near[2], VP56mv *best, uint8_t cnt[4]) { VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, mb - 1 /* left */, @@ -589,7 +590,7 @@ static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) best_idx = CNT_NEAREST; - clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y); + mb->mv = near_mv[best_idx]; near[0] = near_mv[CNT_NEAREST]; near[1] = near_mv[CNT_NEAR]; } @@ -599,9 +600,9 @@ static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, */ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) { - int x = 0; + int bit, x = 0; - if (vp56_rac_get_prob(c, p[0])) { + if (vp56_rac_get_prob_branchy(c, p[0])) { int i; for (i = 0; i < 3; i++) @@ -610,13 +611,23 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) x += vp56_rac_get_prob(c, p[9 + i]) << i; if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) x += 8; - } else - x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); + } else { + // small_mvtree + const uint8_t *ps = p+2; + bit = vp56_rac_get_prob(c, *ps); + ps += 1 + 3*bit; + x += 4*bit; + bit = vp56_rac_get_prob(c, *ps); + ps += 1 + bit; + x += 2*bit; + x += vp56_rac_get_prob(c, *ps); + } return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; } -static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) +static av_always_inline +const uint8_t *get_submv_prob(uint32_t left, uint32_t top) { if (left == top) return vp8_submv_prob[4-!!left]; @@ -629,8 +640,8 @@ static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) * Split motion vector prediction, 16.4. * @returns the number of motion vectors parsed (2, 4 or 16) */ -static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, - VP8Macroblock *mb, VP56mv *base_mv) +static av_always_inline +int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) { int part_idx = mb->partitioning = vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); @@ -663,11 +674,11 @@ static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { case VP8_SUBMVMODE_NEW4X4: - mb->bmv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]); - mb->bmv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]); + mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); + mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); break; case VP8_SUBMVMODE_ZERO4X4: - AV_WN32A(&mb->bmv[n], 0); + AV_ZERO32(&mb->bmv[n]); break; case VP8_SUBMVMODE_LEFT4X4: AV_WN32A(&mb->bmv[n], left); @@ -681,8 +692,9 @@ static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, return num; } -static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, - int stride, int keyframe) +static av_always_inline +void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, + int stride, int keyframe) { int x, y, t, l, i; @@ -703,8 +715,9 @@ static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, } } -static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, - uint8_t *intra4x4, uint8_t *segment) +static av_always_inline +void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + uint8_t *intra4x4, uint8_t *segment) { VP56RangeCoder *c = &s->c; @@ -724,13 +737,13 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); mb->ref_frame = VP56_FRAME_CURRENT; - } else if (vp56_rac_get_prob(c, s->prob->intra)) { + } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { VP56mv near[2], best; uint8_t cnt[4] = { 0 }; uint8_t p[4]; // inter MB, 16.2 - if (vp56_rac_get_prob(c, s->prob->last)) + if (vp56_rac_get_prob_branchy(c, s->prob->last)) mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; else @@ -746,10 +759,11 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); switch (mb->mode) { case VP8_MVMODE_SPLIT: - mb->mv = mb->bmv[decode_splitmvs(s, c, mb, &best) - 1]; + clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); + mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; break; case VP8_MVMODE_ZERO: - AV_WN32A(&mb->mv, 0); + AV_ZERO32(&mb->mv); break; case VP8_MVMODE_NEAREST: clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); @@ -758,8 +772,9 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); break; case VP8_MVMODE_NEW: - mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]); - mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]); + clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); + mb->mv.y += + read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += + read_mv_component(c, s->prob->mvc[1]); break; } if (mb->mode != VP8_MVMODE_SPLIT) { @@ -776,7 +791,7 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); mb->ref_frame = VP56_FRAME_CURRENT; mb->partitioning = VP8_SPLITMVMODE_NONE; - AV_WN32A(&mb->bmv[0], 0); + AV_ZERO32(&mb->bmv[0]); } } @@ -795,41 +810,67 @@ static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], uint8_t probs[8][3][NUM_DCT_TOKENS-1], int i, int zero_nhood, int16_t qmul[2]) { - int token, nonzero = 0; - int offset = 0; + uint8_t *token_prob; + int nonzero = 0; + int coeff; - for (; i < 16; i++) { - token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); + do { + token_prob = probs[vp8_coeff_band[i]][zero_nhood]; - if (token == DCT_EOB) - break; - else if (token >= DCT_CAT1) { - int cat = token-DCT_CAT1; - token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); - token += 3 + (2<<cat); - } + if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB + return nonzero; - // after the first token, the non-zero prediction context becomes - // based on the last decoded coeff - if (!token) { +skip_eob: + if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 zero_nhood = 0; - offset = 1; - continue; - } else if (token == 1) + token_prob = probs[vp8_coeff_band[++i]][0]; + if (i < 16) + goto skip_eob; + return nonzero; // invalid input; blocks should end with EOB + } + + if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 + coeff = 1; zero_nhood = 1; - else + } else { zero_nhood = 2; + if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 + coeff = vp56_rac_get_prob(c, token_prob[4]); + if (coeff) + coeff += vp56_rac_get_prob(c, token_prob[5]); + coeff += 2; + } else { + // DCT_CAT* + if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { + if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 + coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); + } else { // DCT_CAT2 + coeff = 7; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); + } + } else { // DCT_CAT3 and up + int a = vp56_rac_get_prob(c, token_prob[8]); + int b = vp56_rac_get_prob(c, token_prob[9+a]); + int cat = (a<<1) + b; + coeff = 3 + (8<<cat); + coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); + } + } + } + // todo: full [16] qmat? load into register? - block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; - nonzero = i+1; - offset = 0; - } + block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; + nonzero = ++i; + } while (i < 16); + return nonzero; } -static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, - uint8_t t_nnz[9], uint8_t l_nnz[9]) +static av_always_inline +void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, + uint8_t t_nnz[9], uint8_t l_nnz[9]) { LOCAL_ALIGNED_16(DCTELEM, dc,[16]); int i, x, y, luma_start = 0, luma_ctx = 3; @@ -926,21 +967,22 @@ void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_ } } -static int check_intra_pred_mode(int mode, int mb_x, int mb_y) +static av_always_inline +int check_intra_pred_mode(int mode, int mb_x, int mb_y) { if (mode == DC_PRED8x8) { - if (!(mb_x|mb_y)) - mode = DC_128_PRED8x8; - else if (!mb_y) + if (!mb_x) { + mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; + } else if (!mb_y) { mode = LEFT_DC_PRED8x8; - else if (!mb_x) - mode = TOP_DC_PRED8x8; + } } return mode; } -static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, - uint8_t *intra4x4, int mb_x, int mb_y) +static av_always_inline +void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + uint8_t *intra4x4, int mb_x, int mb_y) { int x, y, mode, nnz, tr; @@ -1022,11 +1064,12 @@ static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, * @param linesize size of a single line of plane data, including padding * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ -static inline void vp8_mc(VP8Context *s, int luma, - uint8_t *dst, uint8_t *src, const VP56mv *mv, - int x_off, int y_off, int block_w, int block_h, - int width, int height, int linesize, - vp8_mc_func mc_func[3][3]) +static av_always_inline +void vp8_mc(VP8Context *s, int luma, + uint8_t *dst, uint8_t *src, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) { if (AV_RN32A(mv)) { static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; @@ -1050,11 +1093,12 @@ static inline void vp8_mc(VP8Context *s, int luma, mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); } -static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], - AVFrame *ref_frame, int x_off, int y_off, - int bx_off, int by_off, - int block_w, int block_h, - int width, int height, VP56mv *mv) +static av_always_inline +void vp8_mc_part(VP8Context *s, uint8_t *dst[3], + AVFrame *ref_frame, int x_off, int y_off, + int bx_off, int by_off, + int block_w, int block_h, + int width, int height, VP56mv *mv) { VP56mv uvmv = *mv; @@ -1085,7 +1129,7 @@ static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], /* Fetch pixels for estimated mv 4 macroblocks ahead. * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ -static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) +static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) { /* Don't prefetch refs that haven't been used very often this frame. */ if (s->ref_count[ref-1] > (mb_xy >> 5)) { @@ -1103,8 +1147,9 @@ static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, i /** * Apply motion vectors to prediction buffer, chapter 18. */ -static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, - int mb_x, int mb_y) +static av_always_inline +void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + int mb_x, int mb_y) { int x_off = mb_x << 4, y_off = mb_y << 4; int width = 16*s->mb_width, height = 16*s->mb_height; @@ -1187,7 +1232,7 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, } } -static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) +static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) { int x, y, ch; @@ -1238,7 +1283,7 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) } } -static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) +static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) { int interior_limit, filter_level; @@ -1278,7 +1323,7 @@ static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStren f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; } -static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) { int mbedge_lim, bedge_lim, hev_thresh; int filter_level = f->filter_level; @@ -1347,7 +1392,7 @@ static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int } } -static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) { int mbedge_lim, bedge_lim; int filter_level = f->filter_level; @@ -1416,7 +1461,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, VP8Context *s = avctx->priv_data; int ret, mb_x, mb_y, i, y, referenced; enum AVDiscard skip_thresh; - AVFrame *curframe = NULL; + AVFrame *av_uninit(curframe); if ((ret = decode_frame_header(s, buf, buf_size)) < 0) return ret; diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h index 9f56ab63b..1bdac16be 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h @@ -118,16 +118,6 @@ static const int8_t vp8_pred16x16_tree_mvinter[4][2] = { { -VP8_MVMODE_NEW, -VP8_MVMODE_SPLIT } // '1110', '1111' }; -static const int8_t vp8_small_mvtree[7][2] = { - { 1, 4 }, - { 2, 3 }, - { -0, -1 }, // '000', '001' - { -2, -3 }, // '010', '011' - { 5, 6 }, - { -4, -5 }, // '100', '101' - { -6, -7 } // '110', '111' -}; - static const uint8_t vp8_mbsplits[5][16] = { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, @@ -337,21 +327,6 @@ static const uint8_t vp8_coeff_band[16] = 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 }; -static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] = -{ - { -DCT_EOB, 1 }, // '0' - { -DCT_0, 2 }, // '10' - { -DCT_1, 3 }, // '110' - { 4, 6 }, - { -DCT_2, 5 }, // '11100' - { -DCT_3, -DCT_4 }, // '111010', '111011' - { 7, 8 }, - { -DCT_CAT1, -DCT_CAT2 }, // '111100', '111101' - { 9, 10 }, - { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101' - { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111' -}; - static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 }; static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 }; static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 }; @@ -359,10 +334,9 @@ static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 }; static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 }; static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const uint8_t * const vp8_dct_cat_prob[6] = +// only used for cat3 and above; cat 1 and 2 are referenced directly +static const uint8_t * const vp8_dct_cat_prob[] = { - vp8_dct_cat1_prob, - vp8_dct_cat2_prob, vp8_dct_cat3_prob, vp8_dct_cat4_prob, vp8_dct_cat5_prob, diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c index d1859080c..b158c71d2 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c @@ -23,8 +23,8 @@ #include "libavutil/common.h"
#include "libavcodec/dsputil.h"
+#include "libavutil/x86_cpu.h"
#include "dsputil_mmx.h"
-#include "mmx.h"
#define ROW_SHIFT 11
#define COL_SHIFT 6
@@ -87,104 +87,115 @@ static inline void idct_row (int16_t * row, int offset, static inline void mmxext_row_head (int16_t * const row, const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ __asm__ volatile(
+ "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq (%1), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+ "movq 8(%1), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
- pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+ "pshufw $0x4e, %%mm2, %%mm2 \n\t" /* mm2 = x2 x0 x6 x4 */
+ :: "r" ((row+offset)), "r" (table)
+ );
}
static inline void mmxext_row (const int16_t * const table,
const int32_t * const rounder)
{
- movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */
- pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
+ __asm__ volatile (
+ "movq 16(%0), %%mm1 \n\t" /* mm1 = -C5 -C1 C3 C1 */
+ "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
- pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
- pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */
+ "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
+ "pshufw $0x4e, %%mm6, %%mm6 \n\t" /* mm6 = x3 x1 x7 x5 */
- movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */
- pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
+ "movq 24(%0), %%mm7 \n\t" /* mm7 = -C7 C3 C7 C5 */
+ "pmaddwd %%mm5, %%mm1 \n\t" /* mm1= -C1*x5-C5*x7 C1*x1+C3*x3 */
- paddd_m2r (*rounder, mm3); /* mm3 += rounder */
- pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
+ "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
+ "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
- pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
- paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+ "pmaddwd 40(%0), %%mm2 \n\t" /* mm2= C4*x0-C2*x2 -C4*x4+C2*x6 */
+ "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
- movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+ "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
+ "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
- paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+ "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
+ "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
- paddd_m2r (*rounder, mm0); /* mm0 += rounder */
- psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+ "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
+ "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
- psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
- paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
+ "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
- paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
- psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+ "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
- paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
- movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */
+ "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
+ "movq %%mm0, %%mm4 \n\t" /* mm4 = a3 a2 + rounder */
- paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
- psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */
+ "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
+ "psubd %%mm5, %%mm4 \n\t" /* mm4 = a3-b3 a2-b2 + rounder */
+ : : "r" (table), "r" (rounder));
}
static inline void mmxext_row_tail (int16_t * const row, const int store)
{
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
- packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+ "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
+ "pshufw $0xb1, %%mm4, %%mm4 \n\t" /* mm4 = y7 y6 y5 y4 */
- /* slot */
+ /* slot */
- movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq %%mm4, 8(%0) \n\t" /* save y7 y6 y5 y4 */
+ :: "r" (row+store)
+ );
}
static inline void mmxext_row_mid (int16_t * const row, const int store,
const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+ "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+ "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
+ "pshufw $0xb1, %%mm4, %%mm4\n\t" /* mm4 = y7 y6 y5 y4 */
- movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
- movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq (%3), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
+ "movq %%mm4, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3= -C4*x4-C2*x6 C4*x0+C2*x2 */
- movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
- pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+ "movq 8(%3), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
+ "pshufw $0x4e, %%mm2, %%mm2\n\t" /* mm2 = x2 x0 x6 x4 */
+ :: "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
+ );
}
@@ -202,119 +213,132 @@ static inline void mmxext_row_mid (int16_t * const row, const int store, static inline void mmx_row_head (int16_t * const row, const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ __asm__ volatile (
+ "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq (%1), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
+ "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
- movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ "movq 8(%1), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
- movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
- punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+ "movq 16(%1), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
+ "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
+ :: "r" ((row+offset)), "r" (table)
+ );
}
static inline void mmx_row (const int16_t * const table,
const int32_t * const rounder)
{
- pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
- punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */
+ __asm__ volatile (
+ "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
+ "punpckldq %%mm5, %%mm5 \n\t" /* mm5 = x3 x1 x3 x1 */
- pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
- punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */
+ "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
+ "punpckhdq %%mm6, %%mm6 \n\t" /* mm6 = x7 x5 x7 x5 */
- movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */
- pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
+ "movq 24(%0), %%mm7 \n\t" /* mm7 = -C5 -C1 C7 C5 */
+ "pmaddwd %%mm5, %%mm1 \n\t" /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
- paddd_m2r (*rounder, mm3); /* mm3 += rounder */
- pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
+ "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
+ "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
- pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
- paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+ "pmaddwd 40(%0), %%mm2 \n\t" /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
+ "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
- movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+ "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
+ "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
- paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+ "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
+ "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
- paddd_m2r (*rounder, mm0); /* mm0 += rounder */
- psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+ "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
+ "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
- psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
- paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
+ "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
- paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
- psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+ "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
- paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
- movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */
+ "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
+ "movq %%mm0, %%mm7 \n\t" /* mm7 = a3 a2 + rounder */
- paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
- psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */
+ "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
+ "psubd %%mm5, %%mm7 \n\t" /* mm7 = a3-b3 a2-b2 + rounder */
+ :: "r" (table), "r" (rounder)
+ );
}
static inline void mmx_row_tail (int16_t * const row, const int store)
{
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
- packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */
+ "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
+ "movq %%mm7, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
- pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */
+ "pslld $16, %%mm7 \n\t" /* mm7 = y7 0 y5 0 */
- psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */
+ "psrld $16, %%mm4 \n\t" /* mm4 = 0 y6 0 y4 */
- por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */
+ "por %%mm4, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
- /* slot */
+ /* slot */
- movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq %%mm7, 8(%0) \n\t" /* save y7 y6 y5 y4 */
+ :: "r" (row+store)
+ );
}
static inline void mmx_row_mid (int16_t * const row, const int store,
const int offset, const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+ __asm__ volatile (
+ "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
- packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
- psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */
+ "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
+ "movq %%mm7, %%mm1 \n\t" /* mm1 = y6 y7 y4 y5 */
- movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
- pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */
+ "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
+ "psrld $16, %%mm7 \n\t" /* mm7 = 0 y6 0 y4 */
- movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
- por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */
+ "movq (%3), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
+ "pslld $16, %%mm1 \n\t" /* mm1 = y7 0 y5 0 */
- movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
- punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+ "movq 8(%3), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
+ "por %%mm1, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
- movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ "movq 16(%3), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
+ "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
+
+ "movq %%mm7, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ : : "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
+ );
}
@@ -398,140 +422,145 @@ static inline void idct_col (int16_t * const col, const int offset) #define T3 43790
#define C4 23170
- DECLARE_ALIGNED(8, static const short, t1_vector)[] = {T1,T1,T1,T1};
- DECLARE_ALIGNED(8, static const short, t2_vector)[] = {T2,T2,T2,T2};
- DECLARE_ALIGNED(8, static const short, t3_vector)[] = {T3,T3,T3,T3};
- DECLARE_ALIGNED(8, static const short, c4_vector)[] = {C4,C4,C4,C4};
+ DECLARE_ALIGNED(8, static const short, t1_vector)[] = {
+ T1,T1,T1,T1,
+ T2,T2,T2,T2,
+ T3,T3,T3,T3,
+ C4,C4,C4,C4
+ };
/* column code adapted from Peter Gubanov */
/* http://www.elecard.com/peter/idct.shtml */
- movq_m2r (*t1_vector, mm0); /* mm0 = T1 */
+ __asm__ volatile (
+ "movq (%0), %%mm0 \n\t" /* mm0 = T1 */
- movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */
- movq_r2r (mm0, mm2); /* mm2 = T1 */
+ "movq 2*8(%1), %%mm1 \n\t" /* mm1 = x1 */
+ "movq %%mm0, %%mm2 \n\t" /* mm2 = T1 */
- movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */
- pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */
+ "movq 7*2*8(%1), %%mm4 \n\t" /* mm4 = x7 */
+ "pmulhw %%mm1, %%mm0 \n\t" /* mm0 = T1*x1 */
- movq_m2r (*t3_vector, mm5); /* mm5 = T3 */
- pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */
+ "movq 16(%0), %%mm5 \n\t" /* mm5 = T3 */
+ "pmulhw %%mm4, %%mm2 \n\t" /* mm2 = T1*x7 */
- movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */
- movq_r2r (mm5, mm7); /* mm7 = T3-1 */
+ "movq 2*5*8(%1), %%mm6 \n\t" /* mm6 = x5 */
+ "movq %%mm5, %%mm7 \n\t" /* mm7 = T3-1 */
- movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */
- psubsw_r2r (mm4, mm0); /* mm0 = v17 */
+ "movq 3*8*2(%1), %%mm3 \n\t" /* mm3 = x3 */
+ "psubsw %%mm4, %%mm0 \n\t" /* mm0 = v17 */
- movq_m2r (*t2_vector, mm4); /* mm4 = T2 */
- pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */
+ "movq 8(%0), %%mm4 \n\t" /* mm4 = T2 */
+ "pmulhw %%mm3, %%mm5 \n\t" /* mm5 = (T3-1)*x3 */
- paddsw_r2r (mm2, mm1); /* mm1 = u17 */
- pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */
+ "paddsw %%mm2, %%mm1 \n\t" /* mm1 = u17 */
+ "pmulhw %%mm6, %%mm7 \n\t" /* mm7 = (T3-1)*x5 */
- /* slot */
+ /* slot */
- movq_r2r (mm4, mm2); /* mm2 = T2 */
- paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */
+ "movq %%mm4, %%mm2 \n\t" /* mm2 = T2 */
+ "paddsw %%mm3, %%mm5 \n\t" /* mm5 = T3*x3 */
- pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
- paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */
+ "pmulhw 2*8*2(%1), %%mm4 \n\t" /* mm4 = T2*x2 */
+ "paddsw %%mm6, %%mm7 \n\t" /* mm7 = T3*x5 */
- psubsw_r2r (mm6, mm5); /* mm5 = v35 */
- paddsw_r2r (mm3, mm7); /* mm7 = u35 */
+ "psubsw %%mm6, %%mm5 \n\t" /* mm5 = v35 */
+ "paddsw %%mm3, %%mm7 \n\t" /* mm7 = u35 */
- movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */
- movq_r2r (mm0, mm6); /* mm6 = v17 */
+ "movq 6*8*2(%1), %%mm3 \n\t" /* mm3 = x6 */
+ "movq %%mm0, %%mm6 \n\t" /* mm6 = v17 */
- pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */
- psubsw_r2r (mm5, mm0); /* mm0 = b3 */
+ "pmulhw %%mm3, %%mm2 \n\t" /* mm2 = T2*x6 */
+ "psubsw %%mm5, %%mm0 \n\t" /* mm0 = b3 */
- psubsw_r2r (mm3, mm4); /* mm4 = v26 */
- paddsw_r2r (mm6, mm5); /* mm5 = v12 */
+ "psubsw %%mm3, %%mm4 \n\t" /* mm4 = v26 */
+ "paddsw %%mm6, %%mm5 \n\t" /* mm5 = v12 */
- movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */
- movq_r2r (mm1, mm6); /* mm6 = u17 */
+ "movq %%mm0, 3*8*2(%1)\n\t" /* save b3 in scratch0 */
+ "movq %%mm1, %%mm6 \n\t" /* mm6 = u17 */
- paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
- paddsw_r2r (mm7, mm6); /* mm6 = b0 */
+ "paddsw 2*8*2(%1), %%mm2 \n\t" /* mm2 = u26 */
+ "paddsw %%mm7, %%mm6 \n\t" /* mm6 = b0 */
- psubsw_r2r (mm7, mm1); /* mm1 = u12 */
- movq_r2r (mm1, mm7); /* mm7 = u12 */
+ "psubsw %%mm7, %%mm1 \n\t" /* mm1 = u12 */
+ "movq %%mm1, %%mm7 \n\t" /* mm7 = u12 */
- movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */
- paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */
+ "movq 0*8(%1), %%mm3 \n\t" /* mm3 = x0 */
+ "paddsw %%mm5, %%mm1 \n\t" /* mm1 = u12+v12 */
- movq_m2r (*c4_vector, mm0); /* mm0 = C4/2 */
- psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */
+ "movq 24(%0), %%mm0 \n\t" /* mm0 = C4/2 */
+ "psubsw %%mm5, %%mm7 \n\t" /* mm7 = u12-v12 */
- movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */
- pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */
+ "movq %%mm6, 5*8*2(%1)\n\t" /* save b0 in scratch1 */
+ "pmulhw %%mm0, %%mm1 \n\t" /* mm1 = b1/2 */
- movq_r2r (mm4, mm6); /* mm6 = v26 */
- pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */
+ "movq %%mm4, %%mm6 \n\t" /* mm6 = v26 */
+ "pmulhw %%mm0, %%mm7 \n\t" /* mm7 = b2/2 */
- movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */
- movq_r2r (mm3, mm0); /* mm0 = x0 */
+ "movq 4*8*2(%1), %%mm5 \n\t" /* mm5 = x4 */
+ "movq %%mm3, %%mm0 \n\t" /* mm0 = x0 */
- psubsw_r2r (mm5, mm3); /* mm3 = v04 */
- paddsw_r2r (mm5, mm0); /* mm0 = u04 */
+ "psubsw %%mm5, %%mm3 \n\t" /* mm3 = v04 */
+ "paddsw %%mm5, %%mm0 \n\t" /* mm0 = u04 */
- paddsw_r2r (mm3, mm4); /* mm4 = a1 */
- movq_r2r (mm0, mm5); /* mm5 = u04 */
+ "paddsw %%mm3, %%mm4 \n\t" /* mm4 = a1 */
+ "movq %%mm0, %%mm5 \n\t" /* mm5 = u04 */
- psubsw_r2r (mm6, mm3); /* mm3 = a2 */
- paddsw_r2r (mm2, mm5); /* mm5 = a0 */
+ "psubsw %%mm6, %%mm3 \n\t" /* mm3 = a2 */
+ "paddsw %%mm2, %%mm5 \n\t" /* mm5 = a0 */
- paddsw_r2r (mm1, mm1); /* mm1 = b1 */
- psubsw_r2r (mm2, mm0); /* mm0 = a3 */
+ "paddsw %%mm1, %%mm1 \n\t" /* mm1 = b1 */
+ "psubsw %%mm2, %%mm0 \n\t" /* mm0 = a3 */
- paddsw_r2r (mm7, mm7); /* mm7 = b2 */
- movq_r2r (mm3, mm2); /* mm2 = a2 */
+ "paddsw %%mm7, %%mm7 \n\t" /* mm7 = b2 */
+ "movq %%mm3, %%mm2 \n\t" /* mm2 = a2 */
- movq_r2r (mm4, mm6); /* mm6 = a1 */
- paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */
+ "movq %%mm4, %%mm6 \n\t" /* mm6 = a1 */
+ "paddsw %%mm7, %%mm3 \n\t" /* mm3 = a2+b2 */
- psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */
- paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y2 */
+ "paddsw %%mm1, %%mm4\n\t" /* mm4 = a1+b1 */
- psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */
- psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y1 */
+ "psubsw %%mm1, %%mm6 \n\t" /* mm6 = a1-b1 */
- movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */
- psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */
+ "movq 5*8*2(%1), %%mm1 \n\t" /* mm1 = b0 */
+ "psubsw %%mm7, %%mm2 \n\t" /* mm2 = a2-b2 */
- psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */
- movq_r2r (mm5, mm7); /* mm7 = a0 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm6\n\t" /* mm6 = y6 */
+ "movq %%mm5, %%mm7 \n\t" /* mm7 = a0 */
- movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */
- psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */
+ "movq %%mm4, 1*8*2(%1)\n\t" /* save y1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm2\n\t" /* mm2 = y5 */
- movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */
- paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */
+ "movq %%mm3, 2*8*2(%1)\n\t" /* save y2 */
+ "paddsw %%mm1, %%mm5 \n\t" /* mm5 = a0+b0 */
- movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */
- psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */
+ "movq 3*8*2(%1), %%mm4 \n\t" /* mm4 = b3 */
+ "psubsw %%mm1, %%mm7 \n\t" /* mm7 = a0-b0 */
- psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */
- movq_r2r (mm0, mm3); /* mm3 = a3 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm5\n\t" /* mm5 = y0 */
+ "movq %%mm0, %%mm3 \n\t" /* mm3 = a3 */
- movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */
- psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */
+ "movq %%mm2, 5*8*2(%1)\n\t" /* save y5 */
+ "psubsw %%mm4, %%mm3 \n\t" /* mm3 = a3-b3 */
- psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */
- paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm7\n\t" /* mm7 = y7 */
+ "paddsw %%mm0, %%mm4 \n\t" /* mm4 = a3+b3 */
- movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */
- psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */
+ "movq %%mm5, 0*8*2(%1)\n\t" /* save y0 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y4 */
- movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */
- psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */
+ "movq %%mm6, 6*8*2(%1)\n\t" /* save y6 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y3 */
- movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */
+ "movq %%mm7, 7*8*2(%1)\n\t" /* save y7 */
- movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */
+ "movq %%mm3, 4*8*2(%1)\n\t" /* save y4 */
- movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */
+ "movq %%mm4, 3*8*2(%1)\n\t" /* save y3 */
+ :: "r" (t1_vector), "r" (col+offset)
+ );
#undef T1
#undef T2
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h deleted file mode 100644 index ef064e3e3..000000000 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h +++ /dev/null @@ -1,267 +0,0 @@ -/*
- * mmx.h
- * Copyright (C) 1997-2001 H. Dietz and R. Fisher
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef AVCODEC_X86_MMX_H
-#define AVCODEC_X86_MMX_H
-
-#warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
-
-
-#define mmx_i2r(op,imm,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_m2r(op,mem,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem))
-
-#define mmx_r2m(op,reg,mem) \
- __asm__ volatile (#op " %%" #reg ", %0" \
- : "=m" (mem) \
- : /* nothing */ )
-
-#define mmx_r2r(op,regs,regd) \
- __asm__ volatile (#op " %" #regs ", %" #regd)
-
-
-#define emms() __asm__ volatile ("emms")
-
-#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
-#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
-#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
-
-#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
-#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
-#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
-
-#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
-#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
-#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
-#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
-
-#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
-#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
-
-#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
-#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
-#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
-#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
-#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
-#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
-
-#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
-#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
-#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
-#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
-
-#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
-#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
-#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
-#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
-
-#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
-#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
-
-#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
-#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
-
-#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
-#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
-#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
-#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
-#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
-#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
-
-#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
-#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
-#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
-#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
-#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
-#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
-
-#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
-#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
-
-#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
-#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
-
-#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
-#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
-
-#define por_m2r(var,reg) mmx_m2r (por, var, reg)
-#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
-
-#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
-#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
-#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
-#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
-#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
-#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
-#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
-#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
-#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
-
-#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
-#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
-#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
-#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
-#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
-#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
-
-#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
-#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
-#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
-#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
-#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
-#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
-#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
-#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
-#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
-
-#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
-#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
-#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
-#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
-#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
-#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
-
-#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
-#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
-#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
-#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
-
-#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
-#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
-#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
-#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
-
-#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
-#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
-#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
-#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
-#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
-#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
-
-#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
-#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
-#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
-#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
-#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
-#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
-
-#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
-#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
-
-
-/* 3DNOW extensions */
-
-#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
-#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
-
-
-/* AMD MMX extensions - also available in intel SSE */
-
-
-#define mmx_m2ri(op,mem,reg,imm) \
- __asm__ volatile (#op " %1, %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem), "i" (imm))
-#define mmx_r2ri(op,regs,regd,imm) \
- __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_fetch(mem,hint) \
- __asm__ volatile ("prefetch" #hint " %0" \
- : /* nothing */ \
- : "m" (mem))
-
-
-#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
-
-#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
-
-#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
-#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
-#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
-#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
-
-#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
-
-#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
-
-#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
-#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
-
-#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
-#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
-
-#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
-#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
-
-#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
-#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
-
-#define pmovmskb(mmreg,reg) \
- __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
-
-#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
-#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
-
-#define prefetcht0(mem) mmx_fetch (mem, t0)
-#define prefetcht1(mem) mmx_fetch (mem, t1)
-#define prefetcht2(mem) mmx_fetch (mem, t2)
-#define prefetchnta(mem) mmx_fetch (mem, nta)
-
-#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
-#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
-
-#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
-#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
-
-#define sfence() __asm__ volatile ("sfence\n\t")
-
-/* SSE2 */
-#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
-#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
-#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
-#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
-
-#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
-
-#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
-#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
-#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
-#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
-#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
-#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
-
-#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
-
-#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
-#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
-
-#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
-#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
-
-
-#endif /* AVCODEC_X86_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h new file mode 100644 index 000000000..95f96e365 --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h @@ -0,0 +1,54 @@ +/**
+ * VP5 and VP6 compatible video decoder (arith decoder)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2010 Eli Friedman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP56_ARITH_H
+#define AVCODEC_X86_VP56_ARITH_H
+
+#if HAVE_FAST_CMOV
+#define vp56_rac_get_prob vp56_rac_get_prob
+static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+ unsigned int code_word = vp56_rac_renorm(c);
+ unsigned int high = c->high;
+ unsigned int low = 1 + (((high - 1) * prob) >> 8);
+ unsigned int low_shift = low << 8;
+ int bit = 0;
+
+ __asm__(
+ "subl %4, %1 \n\t"
+ "subl %3, %2 \n\t"
+ "leal (%2, %3), %3 \n\t"
+ "setae %b0 \n\t"
+ "cmovb %4, %1 \n\t"
+ "cmovb %3, %2 \n\t"
+ : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift)
+ : "r"(low)
+ );
+
+ c->high = high;
+ c->code_word = code_word;
+ return bit;
+}
+#endif
+
+#endif /* AVCODEC_X86_VP56_ARITH_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c index e06da5e42..dd7dc696e 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c @@ -350,7 +350,6 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) #endif c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; - c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; @@ -362,6 +361,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) if (mm_flags & FF_MM_SSE2) { c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; @@ -396,6 +397,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) if (mm_flags & FF_MM_SSE4) { c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; } diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm index 4aa901e27..4f430d80c 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm @@ -145,6 +145,10 @@ filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 pw_20091: times 4 dw 20091 pw_17734: times 4 dw 17734 +pb_27_63: times 8 db 27, 63 +pb_18_63: times 8 db 18, 63 +pb_9_63: times 8 db 9, 63 + cextern pb_1 cextern pw_3 cextern pb_3 @@ -438,48 +442,43 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6 jg .nextrow REP_RET -; 4x4 block, H-only 4-tap filter INIT_XMM -cglobal put_vp8_epel8_h4_sse2, 6, 6, 8 - shl r5d, 4 +cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 + shl r5d, 5 %ifdef PIC - lea r11, [fourtap_filter_hw_m] + lea r11, [fourtap_filter_v_m] %endif - mova m5, [fourtap_filter_hw+r5-16] ; set up 4tap filter in words - mova m6, [fourtap_filter_hw+r5] + lea r5, [fourtap_filter_v+r5-32] pxor m7, m7 - + mova m4, [pw_64] + mova m5, [r5+ 0] + mova m6, [r5+16] +%ifdef m8 + mova m8, [r5+32] + mova m9, [r5+48] +%endif .nextrow - movh m0, [r2-1] - punpcklbw m0, m7 ; ABCDEFGH - mova m1, m0 - mova m2, m0 - mova m3, m0 - psrldq m1, 2 ; BCDEFGH - psrldq m2, 4 ; CDEFGH - psrldq m3, 6 ; DEFGH - punpcklwd m0, m1 ; ABBCCDDE - punpcklwd m2, m3 ; CDDEEFFG - pmaddwd m0, m5 - pmaddwd m2, m6 - paddd m0, m2 - - movh m1, [r2+3] - punpcklbw m1, m7 ; ABCDEFGH - mova m2, m1 - mova m3, m1 - mova m4, m1 - psrldq m2, 2 ; BCDEFGH - psrldq m3, 4 ; CDEFGH - psrldq m4, 6 ; DEFGH - punpcklwd m1, m2 ; ABBCCDDE - punpcklwd m3, m4 ; CDDEEFFG - pmaddwd m1, m5 - pmaddwd m3, m6 - paddd m1, m3 - - packssdw m0, m1 - paddsw m0, [pw_64] + movq m0, [r2-1] + movq m1, [r2-0] + movq m2, [r2+1] + movq m3, [r2+2] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + pmullw m0, m5 + pmullw m1, m6 +%ifdef m8 + pmullw m2, m8 + pmullw m3, m9 +%else + pmullw m2, [r5+32] + pmullw m3, [r5+48] +%endif + paddsw m0, m1 + paddsw m2, m3 + paddsw m0, m2 + paddsw m0, m4 psraw m0, 7 packuswb m0, m7 movh [r0], m0 ; store @@ -491,62 +490,57 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 8 jg .nextrow REP_RET -cglobal put_vp8_epel8_h6_sse2, 6, 6, 8 +cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 lea r5d, [r5*3] + shl r5d, 4 %ifdef PIC - lea r11, [sixtap_filter_hw_m] + lea r11, [sixtap_filter_v_m] %endif - lea r5, [sixtap_filter_hw+r5*8] + lea r5, [sixtap_filter_v+r5-96] pxor m7, m7 - + mova m6, [pw_64] +%ifdef m8 + mova m8, [r5+ 0] + mova m9, [r5+16] + mova m10, [r5+32] + mova m11, [r5+48] + mova m12, [r5+64] + mova m13, [r5+80] +%endif .nextrow - movu m0, [r2-2] - mova m6, m0 - mova m4, m0 - punpcklbw m0, m7 ; ABCDEFGHI - mova m1, m0 - mova m2, m0 - mova m3, m0 - psrldq m1, 2 ; BCDEFGH - psrldq m2, 4 ; CDEFGH - psrldq m3, 6 ; DEFGH - psrldq m4, 4 - punpcklbw m4, m7 ; EFGH - mova m5, m4 - psrldq m5, 2 ; FGH - punpcklwd m0, m1 ; ABBCCDDE - punpcklwd m2, m3 ; CDDEEFFG - punpcklwd m4, m5 ; EFFGGHHI - pmaddwd m0, [r5-48] - pmaddwd m2, [r5-32] - pmaddwd m4, [r5-16] - paddd m0, m2 - paddd m0, m4 - - psrldq m6, 4 - mova m4, m6 - punpcklbw m6, m7 ; ABCDEFGHI - mova m1, m6 - mova m2, m6 - mova m3, m6 - psrldq m1, 2 ; BCDEFGH - psrldq m2, 4 ; CDEFGH - psrldq m3, 6 ; DEFGH - psrldq m4, 4 - punpcklbw m4, m7 ; EFGH - mova m5, m4 - psrldq m5, 2 ; FGH - punpcklwd m6, m1 ; ABBCCDDE - punpcklwd m2, m3 ; CDDEEFFG - punpcklwd m4, m5 ; EFFGGHHI - pmaddwd m6, [r5-48] - pmaddwd m2, [r5-32] - pmaddwd m4, [r5-16] - paddd m6, m2 - paddd m6, m4 - - packssdw m0, m6 - paddsw m0, [pw_64] + movq m0, [r2-2] + movq m1, [r2-1] + movq m2, [r2-0] + movq m3, [r2+1] + movq m4, [r2+2] + movq m5, [r2+3] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + punpcklbw m5, m7 +%ifdef m8 + pmullw m0, m8 + pmullw m1, m9 + pmullw m2, m10 + pmullw m3, m11 + pmullw m4, m12 + pmullw m5, m13 +%else + pmullw m0, [r5+ 0] + pmullw m1, [r5+16] + pmullw m2, [r5+32] + pmullw m3, [r5+48] + pmullw m4, [r5+64] + pmullw m5, [r5+80] +%endif + paddsw m1, m4 + paddsw m0, m5 + paddsw m1, m2 + paddsw m0, m3 + paddsw m0, m1 + paddsw m0, m6 psraw m0, 7 packuswb m0, m7 movh [r0], m0 ; store @@ -1360,44 +1354,123 @@ cglobal vp8_luma_dc_wht_mmx, 2,3 movd [%7+%9*2], m%4 %endmacro -%macro SPLATB_REG 3-4 +; write 4 or 8 words in the mmx/xmm registers as 8 lines +; 1 and 2 are the registers to write, this can be the same (for SSE2) +; for pre-SSE4: +; 3 is a general-purpose register that we will clobber +; for SSE4: +; 3 is a pointer to the destination's 5th line +; 4 is a pointer to the destination's 4th line +; 5/6 is -stride and +stride +%macro WRITE_2x4W 6 + movd %3, %1 + punpckhdq %1, %1 + mov [%4+%5*4], %3w + shr %3, 16 + add %4, %6 + mov [%4+%5*4], %3w + + movd %3, %1 + add %4, %5 + mov [%4+%5*2], %3w + shr %3, 16 + mov [%4+%5 ], %3w + + movd %3, %2 + punpckhdq %2, %2 + mov [%4 ], %3w + shr %3, 16 + mov [%4+%6 ], %3w + + movd %3, %2 + add %4, %6 + mov [%4+%6 ], %3w + shr %3, 16 + mov [%4+%6*2], %3w + add %4, %5 +%endmacro + +%macro WRITE_8W_SSE2 5 + movd %2, %1 + psrldq %1, 4 + mov [%3+%4*4], %2w + shr %2, 16 + add %3, %5 + mov [%3+%4*4], %2w + + movd %2, %1 + psrldq %1, 4 + add %3, %4 + mov [%3+%4*2], %2w + shr %2, 16 + mov [%3+%4 ], %2w + + movd %2, %1 + psrldq %1, 4 + mov [%3 ], %2w + shr %2, 16 + mov [%3+%5 ], %2w + + movd %2, %1 + add %3, %5 + mov [%3+%5 ], %2w + shr %2, 16 + mov [%3+%5*2], %2w +%endmacro + +%macro WRITE_8W_SSE4 5 + pextrw [%3+%4*4], %1, 0 + pextrw [%2+%4*4], %1, 1 + pextrw [%3+%4*2], %1, 2 + pextrw [%3+%4 ], %1, 3 + pextrw [%3 ], %1, 4 + pextrw [%2 ], %1, 5 + pextrw [%2+%5 ], %1, 6 + pextrw [%2+%5*2], %1, 7 +%endmacro + +%macro SPLATB_REG_MMX 2-3 movd %1, %2 -%ifidn %3, ssse3 - pshufb %1, %4 -%else punpcklbw %1, %1 -%if mmsize == 16 ; sse2 - pshuflw %1, %1, 0x0 - punpcklqdq %1, %1 -%elifidn %3, mmx punpcklwd %1, %1 punpckldq %1, %1 -%else ; mmxext +%endmacro + +%macro SPLATB_REG_MMXEXT 2-3 + movd %1, %2 + punpcklbw %1, %1 pshufw %1, %1, 0x0 -%endif -%endif +%endmacro + +%macro SPLATB_REG_SSE2 2-3 + movd %1, %2 + punpcklbw %1, %1 + pshuflw %1, %1, 0x0 + punpcklqdq %1, %1 +%endmacro + +%macro SPLATB_REG_SSSE3 3 + movd %1, %2 + pshufb %1, %3 %endmacro %macro SIMPLE_LOOPFILTER 3 cglobal vp8_%2_loop_filter_simple_%1, 3, %3 -%ifidn %2, h - mov r5, rsp ; backup stack pointer - and rsp, ~(mmsize-1) ; align stack -%endif %if mmsize == 8 ; mmx/mmxext mov r3, 2 %endif -%ifidn %1, ssse3 +%ifnidn %1, sse2 +%if mmsize == 16 pxor m0, m0 %endif - SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register +%endif + SPLATB_REG m7, r2, m0 ; splat "flim" into register ; set up indexes to address 4 rows mov r2, r1 neg r1 %ifidn %2, h lea r0, [r0+4*r2-2] - sub rsp, mmsize*2 ; (aligned) storage space for saving p1/q1 %endif %if mmsize == 8 ; mmx / mmxext @@ -1418,9 +1491,6 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2, r3 %endif TRANSPOSE4x4W 0, 1, 2, 3, 4 - - mova [rsp], m0 ; store p1 - mova [rsp+mmsize], m3 ; store q1 %endif ; simple_limit @@ -1491,17 +1561,21 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 mova [r0], m4 mova [r0+r1], m6 %else ; h - mova m0, [rsp] ; p1 - SWAP 2, 4 ; p0 - SWAP 1, 6 ; q0 - mova m3, [rsp+mmsize] ; q1 + inc r0 + SBUTTERFLY bw, 6, 4, 0 - TRANSPOSE4x4B 0, 1, 2, 3, 4 %if mmsize == 16 ; sse2 - add r3, r1 ; change from r4*8*stride to r0+8*stride - WRITE_4x4D 0, 1, 2, 3, r0, r4, r3, r1, r2, 16 +%ifidn %1, sse4 + inc r4 +%endif + WRITE_8W m6, r4, r0, r1, r2 + lea r4, [r3+r1+1] +%ifidn %1, sse4 + inc r3 +%endif + WRITE_8W m4, r3, r4, r1, r2 %else ; mmx/mmxext - WRITE_4x2D 0, 1, 2, 3, r0, r4, r1, r2 + WRITE_2x4W m6, m4, r4, r0, r1, r2 %endif %endif @@ -1510,34 +1584,33 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 %ifidn %2, v add r0, 8 ; advance 8 cols = pixels %else ; h - lea r0, [r0+r2*8] ; advance 8 rows = lines + lea r0, [r0+r2*8-1] ; advance 8 rows = lines %endif dec r3 jg .next8px -%ifidn %2, v REP_RET -%else ; h - mov rsp, r5 ; restore stack pointer - RET -%endif %else ; sse2 -%ifidn %2, h - mov rsp, r5 ; restore stack pointer -%endif RET %endif %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX SIMPLE_LOOPFILTER mmx, v, 4 -SIMPLE_LOOPFILTER mmx, h, 6 +SIMPLE_LOOPFILTER mmx, h, 5 +%define SPLATB_REG SPLATB_REG_MMXEXT SIMPLE_LOOPFILTER mmxext, v, 4 -SIMPLE_LOOPFILTER mmxext, h, 6 +SIMPLE_LOOPFILTER mmxext, h, 5 INIT_XMM +%define SPLATB_REG SPLATB_REG_SSE2 +%define WRITE_8W WRITE_8W_SSE2 SIMPLE_LOOPFILTER sse2, v, 3 -SIMPLE_LOOPFILTER sse2, h, 6 +SIMPLE_LOOPFILTER sse2, h, 5 +%define SPLATB_REG SPLATB_REG_SSSE3 SIMPLE_LOOPFILTER ssse3, v, 3 -SIMPLE_LOOPFILTER ssse3, h, 6 +SIMPLE_LOOPFILTER ssse3, h, 5 +%define WRITE_8W WRITE_8W_SSE4 +SIMPLE_LOOPFILTER sse4, h, 5 ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride, @@ -1573,15 +1646,17 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%ifidn %1, ssse3 +%ifnidn %1, sse2 +%if mmsize == 16 pxor m7, m7 %endif +%endif %ifndef m8 ; mmx/mmxext or sse2 on x86-32 ; splat function arguments - SPLATB_REG m0, E_reg, %1, m7 ; E - SPLATB_REG m1, I_reg, %1, m7 ; I - SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG m0, E_reg, m7 ; E + SPLATB_REG m1, I_reg, m7 ; I + SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh ; align stack mov stack_reg, rsp ; backup stack pointer @@ -1614,9 +1689,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %define q0backup m8 ; splat function arguments - SPLATB_REG flim_E, E_reg, %1, m7 ; E - SPLATB_REG flim_I, I_reg, %1, m7 ; I - SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG flim_E, E_reg, m7 ; E + SPLATB_REG flim_I, I_reg, m7 ; I + SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif %if mmsize == 8 && %4 == 16 ; mmx/mmxext @@ -2028,17 +2103,20 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX INNER_LOOPFILTER mmx, v, 6, 16, 0 INNER_LOOPFILTER mmx, h, 6, 16, 0 -INNER_LOOPFILTER mmxext, v, 6, 16, 0 -INNER_LOOPFILTER mmxext, h, 6, 16, 0 - INNER_LOOPFILTER mmx, v, 6, 8, 0 INNER_LOOPFILTER mmx, h, 6, 8, 0 + +%define SPLATB_REG SPLATB_REG_MMXEXT +INNER_LOOPFILTER mmxext, v, 6, 16, 0 +INNER_LOOPFILTER mmxext, h, 6, 16, 0 INNER_LOOPFILTER mmxext, v, 6, 8, 0 INNER_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM +%define SPLATB_REG SPLATB_REG_SSE2 INNER_LOOPFILTER sse2, v, 5, 16, 13 %ifdef m8 INNER_LOOPFILTER sse2, h, 5, 16, 13 @@ -2048,6 +2126,7 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13 INNER_LOOPFILTER sse2, v, 6, 8, 13 INNER_LOOPFILTER sse2, h, 6, 8, 13 +%define SPLATB_REG SPLATB_REG_SSSE3 INNER_LOOPFILTER ssse3, v, 5, 16, 13 %ifdef m8 INNER_LOOPFILTER ssse3, h, 5, 16, 13 @@ -2062,67 +2141,6 @@ INNER_LOOPFILTER ssse3, h, 6, 8, 13 ; int flimE, int flimI, int hev_thr); ;----------------------------------------------------------------------------- -; write 4 or 8 words in the mmx/xmm registers as 8 lines -; 1 and 2 are the registers to write, this can be the same (for SSE2) -; for pre-SSE4: -; 3 is a general-purpose register that we will clobber -; for SSE4: -; 3 is a pointer to the destination's 5th line -; 4 is a pointer to the destination's 4th line -; 5/6 is -stride and +stride -; 7 is optimization string -%macro WRITE_8W 7 -%ifidn %7, sse4 - pextrw [%4+%5*4], %1, 0 - pextrw [%3+%5*4], %1, 1 - pextrw [%4+%5*2], %1, 2 - pextrw [%4+%5 ], %1, 3 - pextrw [%4 ], %1, 4 - pextrw [%3 ], %1, 5 - pextrw [%3+%6 ], %1, 6 - pextrw [%3+%6*2], %1, 7 -%else - movd %3, %1 -%if mmsize == 8 - punpckhdq %1, %1 -%else - psrldq %1, 4 -%endif - mov [%4+%5*4], %3w - shr %3, 16 - add %4, %6 - mov [%4+%5*4], %3w - - movd %3, %1 -%if mmsize == 16 - psrldq %1, 4 -%endif - add %4, %5 - mov [%4+%5*2], %3w - shr %3, 16 - mov [%4+%5 ], %3w - - movd %3, %2 -%if mmsize == 8 - punpckhdq %2, %2 -%else - psrldq %2, 4 -%endif - mov [%4 ], %3w - shr %3, 16 - mov [%4+%6 ], %3w - - movd %3, %2 - add %4, %6 - mov [%4+%6 ], %3w - shr %3, 16 - mov [%4+%6*2], %3w -%if mmsize == 8 - add %4, %5 -%endif -%endif -%endmacro - %macro MBEDGE_LOOPFILTER 5 %if %4 == 8 ; chroma cglobal vp8_%2_loop_filter8uv_mbedge_%1, 6, %3, %5 @@ -2152,24 +2170,35 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%ifidn %1, ssse3 +%define ssse3_or_higher 0 +%ifnidn %1, sse2 +%if mmsize == 16 +%define ssse3_or_higher 1 +%endif +%endif + +%if ssse3_or_higher pxor m7, m7 %endif %ifndef m8 ; mmx/mmxext or sse2 on x86-32 ; splat function arguments - SPLATB_REG m0, E_reg, %1, m7 ; E - SPLATB_REG m1, I_reg, %1, m7 ; I - SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG m0, E_reg, m7 ; E + SPLATB_REG m1, I_reg, m7 ; I + SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack +%if mmsize == 16 + sub rsp, mmsize * 7 +%else sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result +%endif %define flim_E [rsp] %define flim_I [rsp+mmsize] @@ -2180,7 +2209,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup [rsp+mmsize*4] %define p2backup [rsp+mmsize*5] %define q2backup [rsp+mmsize*6] +%if mmsize == 16 +%define lim_sign [rsp] +%else %define lim_sign [rsp+mmsize*7] +%endif mova flim_E, m0 mova flim_I, m1 @@ -2197,12 +2230,12 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup m8 %define p2backup m13 %define q2backup m14 -%define lim_sign m15 +%define lim_sign m9 ; splat function arguments - SPLATB_REG flim_E, E_reg, %1, m7 ; E - SPLATB_REG flim_I, I_reg, %1, m7 ; I - SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG flim_E, E_reg, m7 ; E + SPLATB_REG flim_I, I_reg, m7 ; I + SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif %if mmsize == 8 && %4 == 16 ; mmx/mmxext @@ -2543,7 +2576,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 paddusb m4, m1 ; q0-f1 ; filter_mbedge (m2-m5 = p1-q1; lim_res carries w) +%if ssse3_or_higher + mova m7, [pb_1] +%else mova m7, [pw_63] +%endif %ifdef m8 SWAP 1, 8 %else @@ -2552,15 +2589,40 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pxor m0, m0 mova m6, m1 pcmpgtb m0, m1 ; which are negative +%if ssse3_or_higher + punpcklbw m6, m7 ; interleave with "1" for rounding + punpckhbw m1, m7 +%else punpcklbw m6, m0 ; signed byte->word punpckhbw m1, m0 +%endif mova lim_sign, m0 +%if ssse3_or_higher + mova m7, [pb_27_63] +%ifndef m8 + mova lim_res, m1 +%endif +%ifdef m10 + SWAP 0, 10 ; don't lose lim_sign copy +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%ifdef m10 + SWAP 0, 10 +%else + mova m0, lim_sign +%endif +%else mova mask_res, m6 ; backup for later in filter mova lim_res, m1 pmullw m6, [pw_27] pmullw m1, [pw_27] paddw m6, m7 paddw m1, m7 +%endif psraw m6, 7 psraw m1, 7 packsswb m6, m1 ; a0 @@ -2568,18 +2630,39 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubb m1, m6 pand m1, m0 ; -a0 pandn m0, m6 ; +a0 +%if ssse3_or_higher + mova m6, [pb_18_63] ; pipelining +%endif psubusb m3, m1 paddusb m4, m1 paddusb m3, m0 ; p0+a0 psubusb m4, m0 ; q0-a0 - mova m6, mask_res +%if ssse3_or_higher + SWAP 6, 7 +%ifdef m10 + SWAP 1, 10 +%else mova m1, lim_res +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%ifdef m10 + SWAP 0, 10 +%endif mova m0, lim_sign +%else + mova m6, mask_res + mova m1, lim_res pmullw m6, [pw_18] pmullw m1, [pw_18] paddw m6, m7 paddw m1, m7 +%endif + mova m0, lim_sign psraw m6, 7 psraw m1, 7 packsswb m6, m1 ; a1 @@ -2587,11 +2670,27 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 psubb m1, m6 pand m1, m0 ; -a1 pandn m0, m6 ; +a1 +%if ssse3_or_higher + mova m6, [pb_9_63] +%endif psubusb m2, m1 paddusb m5, m1 paddusb m2, m0 ; p1+a1 psubusb m5, m0 ; q1-a1 +%if ssse3_or_higher + SWAP 6, 7 +%ifdef m10 + SWAP 1, 10 +%else + mova m1, lim_res +%endif + mova m0, m7 + pmaddubsw m7, m6 + SWAP 6, 7 + pmaddubsw m0, m1 + SWAP 1, 0 +%else %ifdef m8 SWAP 6, 12 SWAP 1, 8 @@ -2603,8 +2702,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pmullw m1, [pw_9] paddw m6, m7 paddw m1, m7 -%ifdef m15 - SWAP 7, 15 +%endif +%ifdef m9 + SWAP 7, 9 %else mova m7, lim_sign %endif @@ -2656,17 +2756,20 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %if mmsize == 8 ; mmx/mmxext (h) WRITE_4x2D 1, 2, 3, 4, dst_reg, dst2_reg, mstride_reg, stride_reg add dst_reg, 4 - WRITE_8W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg, %4 + WRITE_2x4W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg %else ; sse2 (h) lea dst8_reg, [dst8_reg+mstride_reg+1] WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4 lea dst_reg, [dst2_reg+mstride_reg+4] lea dst8_reg, [dst8_reg+mstride_reg+4] - WRITE_8W m5, m5, dst2_reg, dst_reg, mstride_reg, stride_reg, %2 -%ifidn %2, sse4 - lea dst_reg, [dst8_reg+ stride_reg] +%ifidn %1, sse4 + add dst2_reg, 4 +%endif + WRITE_8W m5, dst2_reg, dst_reg, mstride_reg, stride_reg +%ifidn %1, sse4 + lea dst2_reg, [dst8_reg+ stride_reg] %endif - WRITE_8W m6, m6, dst2_reg, dst8_reg, mstride_reg, stride_reg, %2 + WRITE_8W m6, dst2_reg, dst8_reg, mstride_reg, stride_reg %endif %endif @@ -2696,38 +2799,44 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX MBEDGE_LOOPFILTER mmx, v, 6, 16, 0 MBEDGE_LOOPFILTER mmx, h, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 - MBEDGE_LOOPFILTER mmx, v, 6, 8, 0 MBEDGE_LOOPFILTER mmx, h, 6, 8, 0 + +%define SPLATB_REG SPLATB_REG_MMXEXT +MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 +MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM -MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 +%define SPLATB_REG SPLATB_REG_SSE2 +%define WRITE_8W WRITE_8W_SSE2 +MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 +MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 +%define SPLATB_REG SPLATB_REG_SSSE3 +MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 +MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 +MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 +%define WRITE_8W WRITE_8W_SSE4 %ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 8, 15 diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h new file mode 100644 index 000000000..5a2a7c73f --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h @@ -0,0 +1,58 @@ +/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCORE_AVCORE_H
+#define AVCORE_AVCORE_H
+
+/**
+ * @file
+ * shared media utilities for the libav* libraries
+ */
+
+#include <libavutil/avutil.h>
+
+#define LIBAVCORE_VERSION_MAJOR 0
+#define LIBAVCORE_VERSION_MINOR 2
+#define LIBAVCORE_VERSION_MICRO 0
+
+#define LIBAVCORE_VERSION_INT AV_VERSION_INT(LIBAVCORE_VERSION_MAJOR, \
+ LIBAVCORE_VERSION_MINOR, \
+ LIBAVCORE_VERSION_MICRO)
+#define LIBAVCORE_VERSION AV_VERSION(LIBAVCORE_VERSION_MAJOR, \
+ LIBAVCORE_VERSION_MINOR, \
+ LIBAVCORE_VERSION_MICRO)
+#define LIBAVCORE_BUILD LIBAVCORE_VERSION_INT
+
+#define LIBAVCORE_IDENT "Lavcore" AV_STRINGIFY(LIBAVCORE_VERSION)
+
+/**
+ * Return the LIBAVCORE_VERSION_INT constant.
+ */
+unsigned avcore_version(void);
+
+/**
+ * Return the libavcore build-time configuration.
+ */
+const char *avcore_configuration(void);
+
+/**
+ * Return the libavcore license.
+ */
+const char *avcore_license(void);
+
+#endif /* AVCORE_AVCORE_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c new file mode 100644 index 000000000..badb9ee36 --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c @@ -0,0 +1,43 @@ +/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "avcore.h"
+
+/**
+ * @file
+ * various utility functions
+ */
+
+unsigned avcore_version(void)
+{
+ return LIBAVCORE_VERSION_INT;
+}
+
+#if 0
+const char *avcore_configuration(void)
+{
+ return FFMPEG_CONFIGURATION;
+}
+
+const char *avcore_license(void)
+{
+#define LICENSE_PREFIX "libavcore license: "
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c new file mode 100644 index 000000000..b14dd32a8 --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c @@ -0,0 +1,97 @@ +/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * misc image utilities
+ */
+
+#include "imgutils.h"
+#include "libavutil/pixdesc.h"
+
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
+{
+ int i;
+ const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+ int max_step [4]; /* max pixel step for each plane */
+ int max_step_comp[4]; /* the component for each plane which has the max pixel step */
+
+ memset(linesizes, 0, 4*sizeof(linesizes[0]));
+
+ if (desc->flags & PIX_FMT_HWACCEL)
+ return AVERROR(EINVAL);
+
+ if (desc->flags & PIX_FMT_BITSTREAM) {
+ linesizes[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
+ return 0;
+ }
+
+ memset(max_step , 0, sizeof(max_step ));
+ memset(max_step_comp, 0, sizeof(max_step_comp));
+ for (i = 0; i < 4; i++) {
+ const AVComponentDescriptor *comp = &(desc->comp[i]);
+ if ((comp->step_minus1+1) > max_step[comp->plane]) {
+ max_step [comp->plane] = comp->step_minus1+1;
+ max_step_comp[comp->plane] = i;
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
+ linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s);
+ }
+
+ return 0;
+}
+
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4])
+{
+ int i, total_size, size[4], has_plane[4];
+
+ const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+ memset(data , 0, sizeof(data[0])*4);
+ memset(size , 0, sizeof(size));
+ memset(has_plane, 0, sizeof(has_plane));
+
+ if (desc->flags & PIX_FMT_HWACCEL)
+ return AVERROR(EINVAL);
+
+ data[0] = ptr;
+ size[0] = linesizes[0] * height;
+
+ if (desc->flags & PIX_FMT_PAL) {
+ size[0] = (size[0] + 3) & ~3;
+ data[1] = ptr + size[0]; /* palette is stored here as 256 32 bits words */
+ return size[0] + 256 * 4;
+ }
+
+ for (i = 0; i < 4; i++)
+ has_plane[desc->comp[i].plane] = 1;
+
+ total_size = size[0];
+ for (i = 1; has_plane[i] && i < 4; i++) {
+ int h, s = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
+ data[i] = data[i-1] + size[i-1];
+ h = (height + (1 << s) - 1) >> s;
+ size[i] = h * linesizes[i];
+ total_size += size[i];
+ }
+
+ return total_size;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h new file mode 100644 index 000000000..b8024de8e --- /dev/null +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h @@ -0,0 +1,53 @@ +/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCORE_IMGUTILS_H
+#define AVCORE_IMGUTILS_H
+
+/**
+ * @file
+ * misc image utilities
+ */
+
+#include "libavutil/pixfmt.h"
+#include "avcore.h"
+
+/**
+ * Fill plane linesizes for an image with pixel format pix_fmt and
+ * width width.
+ *
+ * @param linesizes array to be filled with the linesize for each plane
+ * @return >= 0 in case of success, a negative error code otherwise
+ */
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width);
+
+/**
+ * Fill plane data pointers for an image with pixel format pix_fmt and
+ * height height.
+ *
+ * @param data pointers array to be filled with the pointer for each image plane
+ * @param ptr the pointer to a buffer which will contain the image
+ * @param linesizes[4] the array containing the linesize for each
+ * plane, should be filled by av_fill_image_linesizes()
+ * @return the size in bytes required for the image buffer, a negative
+ * error code in case of failure
+ */
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4]);
+
+#endif /* AVCORE_IMGUTILS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c index b40d0e591..b9f4e902a 100644 --- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c +++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c @@ -63,6 +63,7 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl) print_prefix= line[strlen(line)-1] == '\n';
if(print_prefix && !strcmp(line, prev)){
count++;
+ fprintf(stderr, " Last message repeated %d times\r", count);
return;
}
if(count>0){
diff --git a/src/filters/transform/MpaDecFilter/MpaDecFilter.vcproj b/src/filters/transform/MpaDecFilter/MpaDecFilter.vcproj index d55f2230b..9a06fb0f2 100644 --- a/src/filters/transform/MpaDecFilter/MpaDecFilter.vcproj +++ b/src/filters/transform/MpaDecFilter/MpaDecFilter.vcproj @@ -45,7 +45,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;FLAC__NO_DLL"
/>
<Tool
@@ -117,7 +117,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;FLAC__NO_DLL"
DebugInformationFormat="3"
/>
@@ -189,7 +189,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;FLAC__NO_DLL"
/>
<Tool
@@ -263,7 +263,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;FLAC__NO_DLL"
EnableEnhancedInstructionSet="0"
/>
@@ -336,7 +336,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="WIN32;_DEBUG;FLAC__NO_DLL"
/>
<Tool
@@ -395,7 +395,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="_WIN64;_DEBUG;FLAC__NO_DLL"
DebugInformationFormat="3"
/>
@@ -454,7 +454,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="WIN32;NDEBUG;FLAC__NO_DLL"
UsePrecompiledHeader="0"
/>
@@ -514,7 +514,7 @@ <Tool
Name="VCCLCompilerTool"
AdditionalOptions="/MP"
- AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
+ AdditionalIncludeDirectories="..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses"
PreprocessorDefinitions="_WIN64;NDEBUG;FLAC__NO_DLL"
EnableEnhancedInstructionSet="0"
UsePrecompiledHeader="0"
diff --git a/src/filters/transform/MpaDecFilter/MpaDecFilter.vcxproj b/src/filters/transform/MpaDecFilter/MpaDecFilter.vcxproj index 439e607d2..f99038e12 100644 --- a/src/filters/transform/MpaDecFilter/MpaDecFilter.vcxproj +++ b/src/filters/transform/MpaDecFilter/MpaDecFilter.vcxproj @@ -152,7 +152,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug Filter|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
@@ -173,7 +173,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
@@ -192,7 +192,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release Filter|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
@@ -214,7 +214,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
</ClCompile>
@@ -234,7 +234,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Lib>
@@ -249,7 +249,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;_DEBUG;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
@@ -262,7 +262,7 @@ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -280,7 +280,7 @@ </Midl>
<ClCompile>
<AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
- <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>..\..\..\..\include;libflac\include;..\mpcvideodec\ffmpeg;..\mpcvideodec\ffmpeg\libavcodec;..\mpcvideodec\ffmpeg\libavcore;..\mpcvideodec\ffmpeg\libavutil;..\..\BaseClasses;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;NDEBUG;FLAC__NO_DLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
<PrecompiledHeader>
|