Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/filters/transform/MPCVideoDec')
-rw-r--r--src/filters/transform/MPCVideoDec/FfmpegContext.c38
-rw-r--r--src/filters/transform/MPCVideoDec/FfmpegContext.h9
-rw-r--r--src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj10
-rw-r--r--src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj22
-rw-r--r--src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp17
-rw-r--r--src/filters/transform/MPCVideoDec/MPCVideoDecFilter.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/Makefile23
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/Makefile_201023
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/config.h61
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.def4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj24
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters12
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/adpcm.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/audioconvert.h8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h173
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c59
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h56
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flvdec.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c7
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c15
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h12
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c53
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/libamr.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c6
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c6
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv40.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/sp5xdec.c106
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq1dec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c66
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c27
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c7
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/config.asm1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c304
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_ssse3.c208
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c342
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h37
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c13
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm45
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_chromamc.asm671
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock.asm (renamed from src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock_sse2.asm)240
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm865
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct_sse2.asm54
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_intrapred_init.c103
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_qpel_mmx.c1209
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight.asm375
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight_sse2.asm170
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c2422
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_sse2_xvid.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c13
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/simple_idct_mmx.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c15
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_yasm.asm6
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp.asm618
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c436
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h36
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c187
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.h31
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm10
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c7
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c17
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm26
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h12
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c110
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h70
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.c102
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h17
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.c68
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.h52
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/eval.c523
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lls.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c33
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h14
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/cpu.c (renamed from src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c)52
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc2
95 files changed, 5914 insertions, 4546 deletions
diff --git a/src/filters/transform/MPCVideoDec/FfmpegContext.c b/src/filters/transform/MPCVideoDec/FfmpegContext.c
index f2f902dd4..2a94030e5 100644
--- a/src/filters/transform/MPCVideoDec/FfmpegContext.c
+++ b/src/filters/transform/MPCVideoDec/FfmpegContext.c
@@ -56,11 +56,11 @@ const byte ZZ_SCAN8[64] =
// FIXME : remove duplicate declaration with ffmpeg ??
typedef struct Mpeg1Context {
MpegEncContext mpeg_enc_ctx;
- int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
- int repeat_field; /* true if we must repeat the field */
- AVPanScan pan_scan; /** some temporary storage for the panscan */
+ int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
+ int repeat_field; /* true if we must repeat the field */
+ AVPanScan pan_scan; /** some temporary storage for the panscan */
int slice_count;
- int swap_uv;//indicate VCR2
+ int swap_uv; //indicate VCR2
int save_aspect_info;
int save_width, save_height, save_progressive_seq;
AVRational frame_rate_ext; ///< MPEG-2 specific framerate modificator
@@ -68,19 +68,26 @@ typedef struct Mpeg1Context {
DXVA_SliceInfo* pSliceInfo;
} Mpeg1Context;
-
-int IsVista()
+BOOL IsVistaOrAbove()
{
+ //only check once then cache the result
+ static BOOL checked = FALSE;
+ static BOOL result = FALSE;
OSVERSIONINFO osver;
- osver.dwOSVersionInfoSize = sizeof( OSVERSIONINFO );
+ if (!checked)
+ {
+ checked = TRUE;
+
+ osver.dwOSVersionInfoSize = sizeof( OSVERSIONINFO );
- if ( GetVersionEx( &osver ) &&
+ if (GetVersionEx( &osver ) &&
osver.dwPlatformId == VER_PLATFORM_WIN32_NT &&
(osver.dwMajorVersion >= 6 ) )
- return 1;
+ result = TRUE;
+ }
- return 0;
+ return result;
}
char* GetFFMpegPictureType(int nType)
@@ -162,6 +169,7 @@ int FFH264CheckCompatibility(int nWidth, int nHeight, struct AVCodecContext* pAV
int video_is_level51 = 0;
int no_level51_support = 1;
int too_much_ref_frames = 0;
+ int profile_higher_than_high = 0;
int max_ref_frames = 0;
int max_ref_frames_dpb41 = min(11, 8388608/(nWidth * nHeight) );
@@ -176,12 +184,13 @@ int FFH264CheckCompatibility(int nWidth, int nHeight, struct AVCodecContext* pAV
if (cur_sps != NULL)
{
video_is_level51 = cur_sps->level_idc >= 51 ? 1 : 0;
+ profile_higher_than_high = (cur_sps->profile_idc > 100);
max_ref_frames = max_ref_frames_dpb41; // default value is calculate
if (nPCIVendor == PCIV_nVidia)
{
// nVidia cards support level 5.1 since drivers v6.14.11.7800 for XP and drivers v7.15.11.7800 for Vista/7
- if (IsVista())
+ if (IsVistaOrAbove())
{
if (DriverVersionCheck(VideoDriverVersion, 7, 15, 11, 7800))
{
@@ -191,7 +200,8 @@ int FFH264CheckCompatibility(int nWidth, int nHeight, struct AVCodecContext* pAV
if(nWidth >= 1280) {
max_ref_frames = 16;
}
- else {
+ else
+ {
max_ref_frames = 11;
}
}
@@ -231,7 +241,7 @@ int FFH264CheckCompatibility(int nWidth, int nHeight, struct AVCodecContext* pAV
}
}
- return (video_is_level51 * no_level51_support * DXVA_UNSUPPORTED_LEVEL) + (too_much_ref_frames * DXVA_TOO_MUCH_REF_FRAMES);
+ return (video_is_level51 * no_level51_support * DXVA_UNSUPPORTED_LEVEL) + (too_much_ref_frames * DXVA_TOO_MANY_REF_FRAMES) + (profile_higher_than_high * DXVA_PROFILE_HIGHER_THAN_HIGH);
}
@@ -430,7 +440,7 @@ void FFH264UpdateRefFramesList (DXVA_PicParams_H264* pDXVAPicParams, struct AVCo
else if (i >= h->short_ref_count && i < h->long_ref_count)
{
// Long list reference frames
- pic = h->short_ref[h->short_ref_count + h->long_ref_count - i - 1];
+ pic = h->short_ref[h->short_ref_count + h->long_ref_count - i - 1];
AssociatedFlag = 1;
}
else
diff --git a/src/filters/transform/MPCVideoDec/FfmpegContext.h b/src/filters/transform/MPCVideoDec/FfmpegContext.h
index e817a8f2d..958b9a16d 100644
--- a/src/filters/transform/MPCVideoDec/FfmpegContext.h
+++ b/src/filters/transform/MPCVideoDec/FfmpegContext.h
@@ -36,9 +36,10 @@ enum PCI_Vendors
};
// Bitmasks for DXVA compatibility check
-#define DXVA_UNSUPPORTED_LEVEL 1
-#define DXVA_TOO_MUCH_REF_FRAMES 2
-#define DXVA_INCOMPATIBLE_SAR 4
+#define DXVA_UNSUPPORTED_LEVEL 1
+#define DXVA_TOO_MANY_REF_FRAMES 2
+#define DXVA_INCOMPATIBLE_SAR 4
+#define DXVA_PROFILE_HIGHER_THAN_HIGH 8
// === H264 functions
void FFH264DecodeBuffer (struct AVCodecContext* pAVCtx, BYTE* pBuffer, UINT nSize, int* pFramePOC, int* pOutPOC, REFERENCE_TIME* pOutrtStart);
@@ -59,7 +60,7 @@ HRESULT FFMpeg2DecodeFrame (DXVA_PictureParameters* pPicParams, DXVA_QmatrixDa
struct AVCodecContext* pAVCtx, struct AVFrame* pFrame, int* nNextCodecIndex, int* nFieldType, int* nSliceType, BYTE* pBuffer, UINT nSize);
// === Common functions
-int IsVista();
+BOOL IsVistaOrAbove();
char* GetFFMpegPictureType(int nType);
int FFIsInterlaced(struct AVCodecContext* pAVCtx, int nHeight);
unsigned long FFGetMBNumber(struct AVCodecContext* pAVCtx);
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
index e189fd1d4..1590b20f6 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcproj
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="windows-1250"?>
<VisualStudioProject
ProjectType="Visual C++"
- Version="9,00"
+ Version="9.00"
Name="MPCVideoDec"
ProjectGUID="{58E36BF5-4A06-47E4-BD40-4CCEF8C634DF}"
RootNamespace="MPCVideoDec"
@@ -44,7 +44,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
/>
@@ -114,7 +113,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;_DEBUG;_USRDLL"
DebugInformationFormat="3"
@@ -184,7 +182,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
CompileAs="2"
@@ -259,7 +256,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="REGISTER_FILTER;WIN32;NDEBUG;_USRDLL"
EnableEnhancedInstructionSet="0"
@@ -333,7 +329,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="WIN32;_DEBUG"
/>
@@ -392,7 +387,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="_WIN64;_DEBUG;"
DebugInformationFormat="3"
@@ -451,7 +445,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="WIN32;NDEBUG"
UsePrecompiledHeader="0"
@@ -512,7 +505,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
AdditionalIncludeDirectories="..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;&quot;$(DXSDK_DIR)Include&quot;"
PreprocessorDefinitions="_WIN64;NDEBUG"
EnableEnhancedInstructionSet="0"
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj
index a8bec74b7..d33a1a37f 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDec.vcxproj
@@ -33,14 +33,6 @@
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
- <ProjectConfiguration Include="Template|Win32">
- <Configuration>Template</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Template|x64">
- <Configuration>Template</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{58E36BF5-4A06-47E4-BD40-4CCEF8C634DF}</ProjectGuid>
@@ -68,9 +60,6 @@
<UseOfMfc>Static</UseOfMfc>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Template|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseOfMfc>Static</UseOfMfc>
@@ -114,9 +103,6 @@
<Import Project="..\..\..\common.props" />
<Import Project="..\..\..\debug.props" />
</ImportGroup>
- <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Template|x64'" Label="PropertySheets">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="..\..\..\common.props" />
@@ -151,7 +137,6 @@
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug Filter|Win32'">
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
@@ -169,7 +154,6 @@
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;_DEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
@@ -185,7 +169,6 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release Filter|Win32'">
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
@@ -208,7 +191,6 @@
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>REGISTER_FILTER;WIN32;NDEBUG;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
@@ -229,7 +211,6 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
@@ -245,7 +226,6 @@
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
@@ -259,7 +239,6 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>
@@ -278,7 +257,6 @@
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
- <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..\..\..\include;ffmpeg;ffmpeg\libavcodec;ffmpeg\libavcore;ffmpeg\libavutil;..\..\BaseClasses;$(DXSDK_DIR)Include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_WIN64;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
index 2a2ff9dbe..13a66a4d9 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.cpp
@@ -558,14 +558,13 @@ CMPCVideoDecFilter::CMPCVideoDecFilter(LPUNKNOWN lpunk, HRESULT* phr)
: CBaseVideoFilter(NAME("MPC - Video decoder"), lpunk, phr, __uuidof(this))
{
HWND hWnd = NULL;
- for (int i=0; i<countof(ffCodecs); i++)
+
+ if(IsVistaOrAbove())
{
- if(ffCodecs[i].nFFCodec == CODEC_ID_H264)
+ for (int i=0; i<countof(ffCodecs); i++)
{
- if(IsVista())
- {
+ if(ffCodecs[i].nFFCodec == CODEC_ID_H264)
ffCodecs[i].DXVAModes = &DXVA_H264_VISTA;
- }
}
}
@@ -1096,7 +1095,7 @@ HRESULT CMPCVideoDecFilter::SetMediaType(PIN_DIRECTION direction,const CMediaTyp
m_pAVCtx->error_recognition = m_nErrorRecognition;
m_pAVCtx->idct_algo = m_nIDCTAlgo;
m_pAVCtx->skip_loop_filter = (AVDiscard)m_nDiscardMode;
- m_pAVCtx->dsp_mask = FF_MM_FORCE | m_pCpuId->GetFeatures();
+ m_pAVCtx->dsp_mask = AV_CPU_FLAG_FORCE | m_pCpuId->GetFeatures();
m_pAVCtx->postgain = 1.0f;
m_pAVCtx->debug_mv = 0;
@@ -1142,7 +1141,7 @@ HRESULT CMPCVideoDecFilter::SetMediaType(PIN_DIRECTION direction,const CMediaTyp
break;
case 2 :
// skip reference frame check
- if(nCompat != DXVA_TOO_MUCH_REF_FRAMES) m_bDXVACompatible = false;
+ if(nCompat != DXVA_TOO_MANY_REF_FRAMES) m_bDXVACompatible = false;
break;
}
}
@@ -1151,7 +1150,7 @@ HRESULT CMPCVideoDecFilter::SetMediaType(PIN_DIRECTION direction,const CMediaTyp
break;
case CODEC_ID_MPEG2VIDEO :
// DSP is disable for DXVA decoding (to keep default idct_permutation)
- m_pAVCtx->dsp_mask ^= FF_MM_FORCE;
+ m_pAVCtx->dsp_mask ^= AV_CPU_FLAG_FORCE;
break;
}
@@ -1954,7 +1953,7 @@ BOOL CMPCVideoDecFilter::IsSupportedDecoderConfig(const D3DFORMAT nD3DFormat, co
{
bool bRet = false;
- bRet = (nD3DFormat == MAKEFOURCC('N', 'V', '1', '2'));
+ bRet = (nD3DFormat == MAKEFOURCC('N', 'V', '1', '2') || nD3DFormat == MAKEFOURCC('I', 'M', 'C', '3'));
bIsPrefered = (config.ConfigBitstreamRaw == ffCodecs[m_nCodecNb].DXVAModes->PreferedConfigBitstream);
LOG (_T("IsSupportedDecoderConfig 0x%08x %d"), nD3DFormat, bRet);
diff --git a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.h b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.h
index 2e4062cda..b4e2fbf12 100644
--- a/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.h
+++ b/src/filters/transform/MPCVideoDec/MPCVideoDecFilter.h
@@ -30,7 +30,7 @@
#include "IMPCVideoDecFilter.h"
#include "MPCVideoDecSettingsWnd.h"
-#include "../../../decss/DeCSSInputPin.h"
+#include "../../../DeCSS/DeCSSInputPin.h"
#include "DXVADecoder.h"
#include "TlibavcodecExt.h"
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
index 666048770..9dda0709c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
@@ -20,6 +20,7 @@ OUT_DIRS = ../../../../../bin/obj/Release_x64/libavcodec_gcc/ \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec/x86 \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavutil \
+ ../../../../../bin/obj/Release_x64/libavcodec_gcc/libavutil/x86 \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libswscale \
$(SLIB_DIR)
else
@@ -29,6 +30,7 @@ OUT_DIRS = ../../../../../bin/obj/Release_Win32/libavcodec_gcc/ \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec/x86 \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavutil \
+ ../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavutil/x86 \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libswscale \
$(SLIB_DIR)
endif
@@ -36,6 +38,8 @@ endif
CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVCORE_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
-DHAVE_AV_CONFIG_H -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -std=gnu99
+YASMFLAGS+= -Pconfig.asm
+
SRCS_C=\
$(LAVC_DIR)/aac_ac3_parser.c \
$(LAVC_DIR)/ac3.c \
@@ -139,13 +143,14 @@ SRCS_C=\
$(LAVC_DIR)/amr_float/interf_dec.c \
$(LAVC_DIR)/amr_float/sp_dec.c \
\
- $(LAVC_DIR)/x86/cpuid.c \
$(LAVC_DIR)/x86/dsputil_mmx.c \
$(LAVC_DIR)/x86/fdct_mmx.c \
$(LAVC_DIR)/x86/fft.c \
$(LAVC_DIR)/x86/fft_3dn.c \
$(LAVC_DIR)/x86/fft_3dn2.c \
$(LAVC_DIR)/x86/fft_sse.c \
+ $(LAVC_DIR)/x86/h264_intrapred_init.c \
+ $(LAVC_DIR)/x86/h264dsp_mmx.c \
$(LAVC_DIR)/x86/idct_mmx.c \
$(LAVC_DIR)/x86/idct_mmx_xvid.c \
$(LAVC_DIR)/x86/idct_sse2_xvid.c \
@@ -153,15 +158,16 @@ SRCS_C=\
$(LAVC_DIR)/x86/mpegvideo_mmx.c \
$(LAVC_DIR)/x86/simple_idct_mmx.c \
$(LAVC_DIR)/x86/vc1dsp_mmx.c \
- $(LAVC_DIR)/x86/vp3dsp_mmx.c \
- $(LAVC_DIR)/x86/vp3dsp_sse2.c \
$(LAVC_DIR)/x86/vp56dsp_init.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
$(LAVCORE_DIR)/avcore_utils.c \
$(LAVCORE_DIR)/imgutils.c \
\
+ $(LAVU_DIR)/avstring.c \
+ $(LAVU_DIR)/cpu.c \
$(LAVU_DIR)/crc.c \
+ $(LAVU_DIR)/eval.c \
$(LAVU_DIR)/intfloat_readwrite.c \
$(LAVU_DIR)/inverse.c \
$(LAVU_DIR)/log.c \
@@ -173,6 +179,8 @@ SRCS_C=\
$(LAVU_DIR)/random_seed.c \
$(LAVU_DIR)/rational.c \
\
+ $(LAVU_DIR)/x86/cpu.c \
+\
$(LSWS_DIR)/rgb2rgb.c \
$(LSWS_DIR)/swscale.c \
$(LSWS_DIR)/sww32thread.c \
@@ -181,11 +189,13 @@ SRCS_C=\
SRCS_YASM=\
$(LAVC_DIR)/x86/dsputil_yasm.asm \
$(LAVC_DIR)/x86/fft_mmx.asm \
- $(LAVC_DIR)/x86/h264_deblock_sse2.asm \
- $(LAVC_DIR)/x86/h264_idct_sse2.asm \
+ $(LAVC_DIR)/x86/h264_chromamc.asm \
+ $(LAVC_DIR)/x86/h264_deblock.asm \
+ $(LAVC_DIR)/x86/h264_idct.asm \
$(LAVC_DIR)/x86/h264_intrapred.asm \
- $(LAVC_DIR)/x86/h264_weight_sse2.asm \
+ $(LAVC_DIR)/x86/h264_weight.asm \
$(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+ $(LAVC_DIR)/x86/vp3dsp.asm \
$(LAVC_DIR)/x86/vp56dsp.asm \
$(LAVC_DIR)/x86/vp8dsp.asm \
$(LAVC_DIR)/x86/x86util.asm
@@ -222,5 +232,6 @@ clean:
$(OUT_DIR)$(LAVC_DIR)/amr_float/*.o $(OUT_DIR)$(LAVC_DIR)/amr_float/*.d \
$(OUT_DIR)$(LAVCORE_DIR)/*.o $(OUT_DIR)$(LAVCORE_DIR)/*.d \
$(OUT_DIR)$(LAVU_DIR)/*.o $(OUT_DIR)$(LAVU_DIR)/*.d \
+ $(OUT_DIR)$(LAVU_DIR)/x86/*.o $(OUT_DIR)$(LAVU_DIR)/x86/*.d \
$(OUT_DIR)$(LSWS_DIR)/*.o $(OUT_DIR)$(LSWS_DIR)/*.d \
$(ZLIB_DIR)/*.o $(ZLIB_DIR)/*.d $(PNG_DIR)/*.o $(SLIB)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010 b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
index 27b4f57f1..d0dc4b5ec 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile_2010
@@ -20,6 +20,7 @@ OUT_DIRS = ../../../../../bin10/obj/Release_x64/libavcodec_gcc/ \
../../../../../bin10/obj/Release_x64/libavcodec_gcc/libavcodec/x86 \
../../../../../bin10/obj/Release_x64/libavcodec_gcc/libavcore \
../../../../../bin10/obj/Release_x64/libavcodec_gcc/libavutil \
+ ../../../../../bin10/obj/Release_x64/libavcodec_gcc/libavutil/x86 \
../../../../../bin10/obj/Release_x64/libavcodec_gcc/libswscale \
$(SLIB_DIR)
else
@@ -29,6 +30,7 @@ OUT_DIRS = ../../../../../bin10/obj/Release_Win32/libavcodec_gcc/ \
../../../../../bin10/obj/Release_Win32/libavcodec_gcc/libavcodec/x86 \
../../../../../bin10/obj/Release_Win32/libavcodec_gcc/libavcore \
../../../../../bin10/obj/Release_Win32/libavcodec_gcc/libavutil \
+ ../../../../../bin10/obj/Release_Win32/libavcodec_gcc/libavutil/x86 \
../../../../../bin10/obj/Release_Win32/libavcodec_gcc/libswscale \
$(SLIB_DIR)
endif
@@ -36,6 +38,8 @@ endif
CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVCORE_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
-DHAVE_AV_CONFIG_H -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -std=gnu99
+YASMFLAGS+= -Pconfig.asm
+
SRCS_C=\
$(LAVC_DIR)/aac_ac3_parser.c \
$(LAVC_DIR)/ac3.c \
@@ -139,13 +143,14 @@ SRCS_C=\
$(LAVC_DIR)/amr_float/interf_dec.c \
$(LAVC_DIR)/amr_float/sp_dec.c \
\
- $(LAVC_DIR)/x86/cpuid.c \
$(LAVC_DIR)/x86/dsputil_mmx.c \
$(LAVC_DIR)/x86/fdct_mmx.c \
$(LAVC_DIR)/x86/fft.c \
$(LAVC_DIR)/x86/fft_3dn.c \
$(LAVC_DIR)/x86/fft_3dn2.c \
$(LAVC_DIR)/x86/fft_sse.c \
+ $(LAVC_DIR)/x86/h264_intrapred_init.c \
+ $(LAVC_DIR)/x86/h264dsp_mmx.c \
$(LAVC_DIR)/x86/idct_mmx.c \
$(LAVC_DIR)/x86/idct_mmx_xvid.c \
$(LAVC_DIR)/x86/idct_sse2_xvid.c \
@@ -153,15 +158,16 @@ SRCS_C=\
$(LAVC_DIR)/x86/mpegvideo_mmx.c \
$(LAVC_DIR)/x86/simple_idct_mmx.c \
$(LAVC_DIR)/x86/vc1dsp_mmx.c \
- $(LAVC_DIR)/x86/vp3dsp_mmx.c \
- $(LAVC_DIR)/x86/vp3dsp_sse2.c \
$(LAVC_DIR)/x86/vp56dsp_init.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
$(LAVCORE_DIR)/avcore_utils.c \
$(LAVCORE_DIR)/imgutils.c \
\
+ $(LAVU_DIR)/avstring.c \
+ $(LAVU_DIR)/cpu.c \
$(LAVU_DIR)/crc.c \
+ $(LAVU_DIR)/eval.c \
$(LAVU_DIR)/intfloat_readwrite.c \
$(LAVU_DIR)/inverse.c \
$(LAVU_DIR)/log.c \
@@ -173,6 +179,8 @@ SRCS_C=\
$(LAVU_DIR)/random_seed.c \
$(LAVU_DIR)/rational.c \
\
+ $(LAVU_DIR)/x86/cpu.c \
+\
$(LSWS_DIR)/rgb2rgb.c \
$(LSWS_DIR)/swscale.c \
$(LSWS_DIR)/sww32thread.c \
@@ -181,11 +189,13 @@ SRCS_C=\
SRCS_YASM=\
$(LAVC_DIR)/x86/dsputil_yasm.asm \
$(LAVC_DIR)/x86/fft_mmx.asm \
- $(LAVC_DIR)/x86/h264_deblock_sse2.asm \
- $(LAVC_DIR)/x86/h264_idct_sse2.asm \
+ $(LAVC_DIR)/x86/h264_chromamc.asm \
+ $(LAVC_DIR)/x86/h264_deblock.asm \
+ $(LAVC_DIR)/x86/h264_idct.asm \
$(LAVC_DIR)/x86/h264_intrapred.asm \
- $(LAVC_DIR)/x86/h264_weight_sse2.asm \
+ $(LAVC_DIR)/x86/h264_weight.asm \
$(LAVC_DIR)/x86/vc1dsp_yasm.asm \
+ $(LAVC_DIR)/x86/vp3dsp.asm \
$(LAVC_DIR)/x86/vp56dsp.asm \
$(LAVC_DIR)/x86/vp8dsp.asm \
$(LAVC_DIR)/x86/x86util.asm
@@ -222,5 +232,6 @@ clean:
$(OUT_DIR)$(LAVC_DIR)/amr_float/*.o $(OUT_DIR)$(LAVC_DIR)/amr_float/*.d \
$(OUT_DIR)$(LAVCORE_DIR)/*.o $(OUT_DIR)$(LAVCORE_DIR)/*.d \
$(OUT_DIR)$(LAVU_DIR)/*.o $(OUT_DIR)$(LAVU_DIR)/*.d \
+ $(OUT_DIR)$(LAVU_DIR)/x86/*.o $(OUT_DIR)$(LAVU_DIR)/x86/*.d \
$(OUT_DIR)$(LSWS_DIR)/*.o $(OUT_DIR)$(LSWS_DIR)/*.d \
$(ZLIB_DIR)/*.o $(ZLIB_DIR)/*.d $(PNG_DIR)/*.o $(SLIB)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/config.h b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
index 507cca37f..5ca6a5486 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/config.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/config.h
@@ -14,8 +14,6 @@
#ifdef ARCH_X86_64
#define HAVE_FAST_64BIT 1
- #define HAVE_CMOV 1
- #define HAVE_FAST_CMOV 1
#define HAVE_STRUCT_TIMESPEC 1
#else
#define ARCH_X86_32 1
@@ -36,8 +34,6 @@
#define ARCH_X86_32 0
#define ARCH_X86_64 0
#define HAVE_FAST_64BIT 0
- #define HAVE_CMOV 0
- #define HAVE_FAST_CMOV 0
#define restrict
#define __asm__ __asm
@@ -50,6 +46,7 @@
#define FFMPEG_CONFIGURATION "ffdshow custom"
#define FFMPEG_LICENSE "GPL version 2 or later"
+
#define CC_TYPE "gcc"
#define CC_VERSION __VERSION__
@@ -57,9 +54,9 @@
// MPC custom code for linking with MSVC
#if defined(__GNUC__) && ARCH_X86_64
-#define EXTERN_PREFIX ""
+ #define EXTERN_PREFIX ""
#else
-#define EXTERN_PREFIX "_"
+ #define EXTERN_PREFIX "_"
#endif
#define EXTERN_ASM _
@@ -96,9 +93,11 @@
#define HAVE_ALTIVEC_H 0
#define HAVE_BIGENDIAN 0
#define HAVE_BSWAP 1
+#define HAVE_CMOV 1
#define HAVE_EBP_AVAILABLE 1
#define HAVE_EBX_AVAILABLE 1
#define HAVE_FAST_CLZ 0
+#define HAVE_FAST_CMOV 1
#define HAVE_FAST_UNALIGNED 1
#define HAVE_LOCAL_ALIGNED_16 1
#define HAVE_LOCAL_ALIGNED_8 1
@@ -112,36 +111,34 @@
#ifdef __GNUC__
#define HAVE_ATTRIBUTE_PACKED 1
#define HAVE_ATTRIBUTE_MAY_ALIAS 1
+
+ #define HAVE_EXP2 1
+ #define HAVE_EXP2F 1
+ #define HAVE_LLRINT 1
+ #define HAVE_LOG2 1
+ #define HAVE_LOG2F 1
+ #define HAVE_LRINT 1
+ #define HAVE_LRINTF 1
+ #define HAVE_ROUND 1
+ #define HAVE_ROUNDF 1
+ #define HAVE_TRUNCF 1
#else
#define HAVE_ATTRIBUTE_PACKED 0
#define HAVE_ATTRIBUTE_MAY_ALIAS 0
#define EMULATE_FAST_INT
-#endif
-#ifdef __GNUC__
-#define HAVE_EXP2 1
-#define HAVE_EXP2F 1
-#define HAVE_LLRINT 1
-#define HAVE_LOG2 1
-#define HAVE_LOG2F 1
-#define HAVE_LRINT 1
-#define HAVE_LRINTF 1
-#define HAVE_ROUND 1
-#define HAVE_ROUNDF 1
-#define HAVE_TRUNCF 1
-#else
-#define HAVE_EXP2 1
-#define HAVE_EXP2F 1
-#define HAVE_LLRINT 0
-#define HAVE_LOG2 1
-#define HAVE_LOG2F 1
-#define HAVE_LRINT 0
-#define HAVE_LRINTF 0
-#define HAVE_ROUND 0
-#define HAVE_ROUNDF 1
-#define HAVE_TRUNCF 1
-#define rint(x) (int)(x+0.5)
-#define cbrtf(x) pow((float)x, (float)1.0/3)
+ #define HAVE_EXP2 1
+ #define HAVE_EXP2F 1
+ #define HAVE_LLRINT 0
+ #define HAVE_LOG2 1
+ #define HAVE_LOG2F 1
+ #define HAVE_LRINT 0
+ #define HAVE_LRINTF 0
+ #define HAVE_ROUND 0
+ #define HAVE_ROUNDF 1
+ #define HAVE_TRUNCF 1
+ #define rint(x) (int)(x+0.5)
+ #define cbrtf(x) pow((float)x, (float)1.0/3)
#endif
#define CONFIG_DCT 0
@@ -149,8 +146,8 @@
#define CONFIG_GPL 1
#define CONFIG_GRAY 1
-#define CONFIG_H264PRED 1
#define CONFIG_H264DSP 1
+#define CONFIG_H264PRED 1
#define CONFIG_HARDCODED_TABLES 0
#define CONFIG_HUFFMAN 0
#define CONFIG_LIBAMR_NB 1
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
index d0bb2b9a1..564cc184f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
@@ -8,6 +8,7 @@ The following files have MPC-specific custom code (compared to ffdshow):
* libavcodec/bitstream.c
* libavcodec/CompilatorVersion.c
* libavcodec/dsputil.c
+* libavcodec/dxva.h
* libavcodec/h264.c
* libavcodec/mpc_helper.c
* libavcodec/mpeg12.c
@@ -15,3 +16,4 @@ The following files have MPC-specific custom code (compared to ffdshow):
* libavcore/avcore_utils.c (renamed from utils.c to avoid conflicts in MSVC2010)
* libavutil/internal.h
* libavutil/log.h
+* libswscale
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.def b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.def
index 6ee8d8d24..782a16c50 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.def
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.def
@@ -6,7 +6,9 @@ EXPORTS
avcodec_alloc_frame
avcodec_close
avcodec_decode_video
- avcodec_decode_audio
+ avcodec_decode_video2
+ avcodec_decode_audio2
+ avcodec_decode_audio3
avcodec_default_get_buffer
avcodec_default_reget_buffer
avcodec_default_release_buffer
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
index b10994323..757205029 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
- Version="9,00"
+ Version="9.00"
Name="libavcodec"
OwnerKey="{EAF909A5-FA59-4C3D-9431-0FCC20D5BCF9}"
ProjectGUID="{5CAF881C-9349-4EE2-9697-982C10795033}"
@@ -44,7 +44,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\..\thirdparty\zlib;.;libswscale;..\..\..\..\..\include"
PreprocessorDefinitions="WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NON_CONFORMING_SWPRINTFS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_WINDOWS;HAVE_AV_CONFIG_H;H264_MERGE_TESTING"
@@ -108,7 +107,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\..\thirdparty\zlib;.;libswscale;..\..\..\..\..\include"
PreprocessorDefinitions="_WIN64;_CRT_SECURE_NO_DEPRECATE;_CRT_NON_CONFORMING_SWPRINTFS;_CRT_NONSTDC_NO_WARNINGS;_DEBUG;_WINDOWS;HAVE_AV_CONFIG_H"
@@ -175,7 +173,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
@@ -191,7 +188,7 @@
AssemblerListingLocation="$(IntDir)\"
ObjectFile="$(IntDir)\"
ProgramDataBaseFileName="$(IntDir)\"
- WarningLevel="1"
+ WarningLevel="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
@@ -248,7 +245,6 @@
/>
<Tool
Name="VCCLCompilerTool"
- AdditionalOptions="/MP"
Optimization="3"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
@@ -1199,6 +1195,10 @@
>
</File>
<File
+ RelativePath=".\libavutil\avstring.c"
+ >
+ </File>
+ <File
RelativePath=".\libavutil\avstring.h"
>
</File>
@@ -1215,6 +1215,14 @@
>
</File>
<File
+ RelativePath=".\libavutil\cpu.c"
+ >
+ </File>
+ <File
+ RelativePath=".\libavutil\cpu.h"
+ >
+ </File>
+ <File
RelativePath=".\libavutil\crc.c"
>
</File>
@@ -1231,6 +1239,10 @@
>
</File>
<File
+ RelativePath=".\libavutil\eval.c"
+ >
+ </File>
+ <File
RelativePath=".\libavutil\eval.h"
>
</File>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
index e190e548e..b0fa70df9 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
@@ -133,6 +133,7 @@
<ClInclude Include="libavutil\avutil.h" />
<ClInclude Include="libavutil\bswap.h" />
<ClInclude Include="libavutil\common.h" />
+ <ClInclude Include="libavutil\cpu.h" />
<ClInclude Include="libavutil\crc.h" />
<ClInclude Include="libavutil\crc_data.h" />
<ClInclude Include="libavutil\error.h" />
@@ -275,7 +276,10 @@
<ClCompile Include="libavcodec\xiph.c" />
<ClCompile Include="libavcore\imgutils.c" />
<ClCompile Include="libavcore\avcore_utils.c" />
+ <ClCompile Include="libavutil\avstring.c" />
+ <ClCompile Include="libavutil\cpu.c" />
<ClCompile Include="libavutil\crc.c" />
+ <ClCompile Include="libavutil\eval.c" />
<ClCompile Include="libavutil\intfloat_readwrite.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
index d23ef9256..2dd323b4b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
@@ -51,6 +51,9 @@
<ClInclude Include="libavutil\common.h">
<Filter>libavutil</Filter>
</ClInclude>
+ <ClInclude Include="libavutil\cpu.h">
+ <Filter>libavutil</Filter>
+ </ClInclude>
<ClInclude Include="libavutil\crc.h">
<Filter>libavutil</Filter>
</ClInclude>
@@ -459,9 +462,18 @@
<ClCompile Include="libswscale\yuv2rgb_template.c">
<Filter>libswscale</Filter>
</ClCompile>
+ <ClCompile Include="libavutil\avstring.c">
+ <Filter>libavutil</Filter>
+ </ClCompile>
+ <ClCompile Include="libavutil\cpu.c">
+ <Filter>libavutil</Filter>
+ </ClCompile>
<ClCompile Include="libavutil\crc.c">
<Filter>libavutil</Filter>
</ClCompile>
+ <ClCompile Include="libavutil\eval.c">
+ <Filter>libavutil</Filter>
+ </ClCompile>
<ClCompile Include="libavutil\lfg.c">
<Filter>libavutil</Filter>
</ClCompile>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
index 126424440..5992715b8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/ac3dec.c
@@ -1311,8 +1311,10 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
* Decode a single AC-3 frame.
*/
static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
AC3DecodeContext *s = avctx->priv_data;
int16_t *out_samples = (int16_t *)data;
int blk, ch, err;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/adpcm.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/adpcm.c
index 6fda2f8bf..6ecd98e95 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/adpcm.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/adpcm.c
@@ -365,8 +365,10 @@ static void xa_decode(short *out, const unsigned char *in,
static int adpcm_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
ADPCMContext *c = avctx->priv_data;
ADPCMChannelStatus *cs;
int n, m, channel, i;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/audioconvert.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/audioconvert.h
index 81b6cded3..349065edc 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/audioconvert.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/audioconvert.h
@@ -29,6 +29,7 @@
*/
+#include "libavutil/cpu.h"
#include "avcodec.h"
@@ -60,6 +61,11 @@ enum SampleFormat avcodec_get_sample_fmt(const char* name);
const char *avcodec_get_channel_name(int channel_id);
/**
+ * @return channel layout that matches name, 0 if no match
+ */
+int64_t avcodec_get_channel_layout(const char *name);
+
+/**
* Return description of channel layout
*/
void avcodec_get_channel_layout_string(char *buf, int buf_size, int nb_channels, int64_t channel_layout);
@@ -88,7 +94,7 @@ typedef struct AVAudioConvert AVAudioConvert;
* @param in_fmt Input sample format
* @param in_channels Number of input channels
* @param[in] matrix Channel mixing matrix (of dimension in_channel*out_channels). Set to NULL to ignore.
- * @param flags See FF_MM_xx
+ * @param flags See AV_CPU_FLAG_xx
* @return NULL on error
*/
AVAudioConvert *av_audio_convert_alloc(enum SampleFormat out_fmt, int out_channels,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
index a841de108..c5f35eda2 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/avcodec.h
@@ -42,10 +42,11 @@
#include <errno.h>
#include "libavutil/avutil.h"
+#include "libavutil/cpu.h"
#define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 85
-#define LIBAVCODEC_VERSION_MICRO 1
+#define LIBAVCODEC_VERSION_MINOR 87
+#define LIBAVCODEC_VERSION_MICRO 5
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
@@ -57,6 +58,17 @@
#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+/**
+ * Those FF_API_* defines are not part of public API.
+ * They may change, break or disappear at any time.
+ */
+#ifndef FF_API_PALETTE_CONTROL
+#define FF_API_PALETTE_CONTROL (LIBAVCODEC_VERSION_MAJOR < 54)
+#endif
+#ifndef FF_API_MM_FLAGS
+#define FF_API_MM_FLAGS (LIBAVCODEC_VERSION_MAJOR < 53)
+#endif
+
#define AV_NOPTS_VALUE INT64_C(0x8000000000000000)
#define AV_TIME_BASE 1000000
static const AVRational AV_TIME_BASE_Q={1, AV_TIME_BASE};
@@ -1399,27 +1411,25 @@ typedef struct AVCodecContext {
* result into program crash.)
*/
unsigned dsp_mask;
-#define FF_MM_FORCE 0x80000000 /* Force usage of selected flags (OR) */
- /* lower 16 bits - CPU features */
-#define FF_MM_MMX 0x0001 ///< standard MMX
-#define FF_MM_3DNOW 0x0004 ///< AMD 3DNOW
-#if LIBAVCODEC_VERSION_MAJOR < 53
-#define FF_MM_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext
+
+#if FF_API_MM_FLAGS
+#define FF_MM_FORCE AV_CPU_FLAG_FORCE
+#define FF_MM_MMX AV_CPU_FLAG_MMX
+#define FF_MM_3DNOW AV_CPU_FLAG_3DNOW
+#define FF_MM_MMXEXT AV_CPU_FLAG_MMX2
+#define FF_MM_MMX2 AV_CPU_FLAG_MMX2
+#define FF_MM_SSE AV_CPU_FLAG_SSE
+#define FF_MM_SSE2 AV_CPU_FLAG_SSE2
+#define FF_MM_SSE2SLOW AV_CPU_FLAG_SSE2SLOW
+#define FF_MM_3DNOWEXT AV_CPU_FLAG_3DNOWEXT
+#define FF_MM_SSE3 AV_CPU_FLAG_SSE3
+#define FF_MM_SSE3SLOW AV_CPU_FLAG_SSE3SLOW
+#define FF_MM_SSSE3 AV_CPU_FLAG_SSSE3
+#define FF_MM_SSE4 AV_CPU_FLAG_SSE4
+#define FF_MM_SSE42 AV_CPU_FLAG_SSE42
+#define FF_MM_IWMMXT AV_CPU_FLAG_IWMMXT
+#define FF_MM_ALTIVEC AV_CPU_FLAG_ALTIVEC
#endif
-#define FF_MM_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
-#define FF_MM_SSE 0x0008 ///< SSE functions
-#define FF_MM_SSE2 0x0010 ///< PIV SSE2 functions
-#define FF_MM_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster
- ///< than regular MMX/SSE (e.g. Core1)
-#define FF_MM_3DNOWEXT 0x0020 ///< AMD 3DNowExt
-#define FF_MM_SSE3 0x0040 ///< Prescott SSE3 functions
-#define FF_MM_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster
- ///< than regular MMX/SSE (e.g. Core1)
-#define FF_MM_SSSE3 0x0080 ///< Conroe SSSE3 functions
-#define FF_MM_SSE4 0x0100 ///< Penryn SSE4.1 functions
-#define FF_MM_SSE42 0x0200 ///< Nehalem SSE4.2 functions
-#define FF_MM_IWMMXT 0x0100 ///< XScale IWMMXT
-#define FF_MM_ALTIVEC 0x0001 ///< standard AltiVec
/**
* bits per sample/pixel from the demuxer (needed for huffyuv).
@@ -1776,12 +1786,14 @@ typedef struct AVCodecContext {
*/
int lmax;
+#if FF_API_PALETTE_CONTROL
/**
* palette control structure
* - encoding: ??? (no palette-enabled encoder yet)
* - decoding: Set by user.
*/
struct AVPaletteControl *palctrl;
+#endif
/**
* noise reduction strength
@@ -2616,8 +2628,7 @@ typedef struct AVCodec {
int (*init)(AVCodecContext *);
int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);
int (*close)(AVCodecContext *);
- int (*decode)(AVCodecContext *, void *outdata, int *outdata_size,
- const uint8_t *buf, int buf_size);
+ int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
/**
* Codec capabilities.
* see CODEC_CAP_*
@@ -2834,59 +2845,91 @@ void avcodec_get_encoder_info(AVCodecContext *avctx,int *xvid_build,int *divx_ve
*/
FF_EXPORT int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
+#if LIBAVCODEC_VERSION_MAJOR < 53
/**
- * @deprecated Use avcodec_decode_audio2() instead.
+ * Decode an audio frame from buf into samples.
+ * Wrapper function which calls avcodec_decode_audio3.
+ *
+ * @deprecated Use avcodec_decode_audio3 instead.
+ * @param avctx the codec context
+ * @param[out] samples the output buffer
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the input buffer size in bytes
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
*/
-attribute_deprecated int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
+FF_EXPORT int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
int *frame_size_ptr,
const uint8_t *buf, int buf_size);
+#endif
/**
- * Decodes an audio frame from buf into samples.
- * The avcodec_decode_audio2() function decodes an audio frame from the input
- * buffer buf of size buf_size. To decode it, it makes use of the
- * audio codec which was coupled with avctx using avcodec_open(). The
- * resulting decoded frame is stored in output buffer samples. If no frame
- * could be decompressed, frame_size_ptr is zero. Otherwise, it is the
+ * Decode the audio frame of size avpkt->size from avpkt->data into samples.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio3 has to be called again with an AVPacket that contains
+ * the remaining data in order to decode the second frame etc.
+ * If no frame
+ * could be outputted, frame_size_ptr is zero. Otherwise, it is the
* decompressed frame size in bytes.
*
* @warning You must set frame_size_ptr to the allocated size of the
- * output buffer before calling avcodec_decode_audio2().
+ * output buffer before calling avcodec_decode_audio3().
*
* @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
* the actual read bytes because some optimized bitstream readers read 32 or 64
* bits at once and could read over the end.
*
- * @warning The end of the input buffer buf should be set to 0 to ensure that
+ * @warning The end of the input buffer avpkt->data should be set to 0 to ensure that
* no overreading happens for damaged MPEG streams.
*
- * @note You might have to align the input buffer buf and output buffer
+ * @note You might have to align the input buffer avpkt->data and output buffer
* samples. The alignment requirements depend on the CPU: On some CPUs it isn't
* necessary at all, on others it won't work at all if not aligned and on others
- * it will work but it will have an impact on performance. In practice, the
- * bitstream should have 4 byte alignment at minimum and all sample data should
- * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
- * the linesize is not a multiple of 16 then there's no sense in aligning the
- * start of the buffer to 16.
+ * it will work but it will have an impact on performance.
+ *
+ * In practice, avpkt->data should have 4 byte alignment at minimum and
+ * samples should be 16 byte aligned unless the CPU doesn't need it
+ * (AltiVec and SSE do).
*
* @param avctx the codec context
- * @param[out] samples the output buffer
+ * @param[out] samples the output buffer, sample type in avctx->sample_fmt
* @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ * You can create such packet with av_init_packet() and by then setting
+ * data and size, some decoders might in addition need other fields.
+ * All decoders are designed to use the least fields possible though.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame data was decompressed (used) from the input AVPacket.
+ */
+int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+ int *frame_size_ptr,
+ AVPacket *avpkt);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * Decode a video frame from buf into picture.
+ * Wrapper function which calls avcodec_decode_video2.
+ *
+ * @deprecated Use avcodec_decode_video2 instead.
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
* @param[in] buf the input buffer
- * @param[in] buf_size the input buffer size in bytes
+ * @param[in] buf_size the size of the input buffer in bytes
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
* @return On error a negative value is returned, otherwise the number of bytes
* used or zero if no frame could be decompressed.
*/
-FF_EXPORT int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
- int *frame_size_ptr,
+FF_EXPORT int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+ int *got_picture_ptr,
const uint8_t *buf, int buf_size);
+#endif
/**
- * Decodes a video frame from buf into picture.
- * The avcodec_decode_video() function decodes a video frame from the input
- * buffer buf of size buf_size. To decode it, it makes use of the
- * video codec which was coupled with avctx using avcodec_open(). The
- * resulting decoded frame is stored in picture.
+ * Decode the video frame of size avpkt->size from avpkt->data into picture.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame.
*
* @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
* the actual read bytes because some optimized bitstream readers read 32 or 64
@@ -2895,29 +2938,37 @@ FF_EXPORT int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
* @warning The end of the input buffer buf should be set to 0 to ensure that
* no overreading happens for damaged MPEG streams.
*
- * @note You might have to align the input buffer buf and output buffer
- * samples. The alignment requirements depend on the CPU: on some CPUs it isn't
+ * @note You might have to align the input buffer avpkt->data.
+ * The alignment requirements depend on the CPU: on some CPUs it isn't
* necessary at all, on others it won't work at all if not aligned and on others
- * it will work but it will have an impact on performance. In practice, the
- * bitstream should have 4 byte alignment at minimum and all sample data should
- * be 16 byte aligned unless the CPU doesn't need it (AltiVec and SSE do). If
- * the linesize is not a multiple of 16 then there's no sense in aligning the
- * start of the buffer to 16.
+ * it will work but it will have an impact on performance.
+ *
+ * In practice, avpkt->data should have 4 byte alignment at minimum.
*
* @note Some codecs have a delay between input and output, these need to be
- * feeded with buf=NULL, buf_size=0 at the end to return the remaining frames.
+ * fed with avpkt->data=NULL, avpkt->size=0 at the end to return the remaining frames.
*
* @param avctx the codec context
* @param[out] picture The AVFrame in which the decoded video frame will be stored.
- * @param[in] buf the input buffer
- * @param[in] buf_size the size of the input buffer in bytes
+ * Use avcodec_alloc_frame to get an AVFrame, the codec will
+ * allocate memory for the actual bitmap.
+ * with default get/release_buffer(), the decoder frees/reuses the bitmap as it sees fit.
+ * with overridden get/release_buffer() (needs CODEC_CAP_DR1) the user decides into what buffer the decoder
+ * decodes and the decoder tells the user once it does not need the data anymore,
+ * the user app can at this point free/reuse/keep the memory as it sees fit.
+ *
+ * @param[in] avpkt The input AVpacket containing the input buffer.
+ * You can create such packet with av_init_packet() and by then setting
+ * data and size, some decoders might in addition need other fields like
+ * flags&AV_PKT_FLAG_KEY. All decoders are designed to use the least
+ * fields possible.
* @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
* @return On error a negative value is returned, otherwise the number of bytes
* used or zero if no frame could be decompressed.
*/
-FF_EXPORT int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
int *got_picture_ptr,
- const uint8_t *buf, int buf_size);
+ AVPacket *avpkt);
int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
index e4a4a7ad6..3d6b46d75 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.c
@@ -27,6 +27,7 @@
* DSP utils
*/
+#include "libavcore/imgutils.h"
#include "avcodec.h"
#include "dsputil.h"
#include "simple_idct.h"
@@ -121,6 +122,9 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
int j;
j = src_scantable[i];
st->permutated[i] = permutation[j];
+#if ARCH_PPC
+ st->inverse[j] = i;
+#endif
}
end=-1;
@@ -1158,7 +1162,7 @@ CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
-CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
+av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
@@ -1172,6 +1176,9 @@ PIXOP2(put, op_put)
#undef op_avg
#undef op_put
+#define put_no_rnd_pixels8_c put_pixels8_c
+#define put_no_rnd_pixels16_c put_pixels16_c
+
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
@@ -1754,10 +1761,6 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
}\
}\
\
-static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels8_c(dst, src, stride, 8);\
-}\
-\
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
@@ -1936,9 +1939,6 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
-static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels16_c(dst, src, stride, 16);\
-}\
\
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
@@ -2133,6 +2133,13 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put
#undef op_put_no_rnd
+#define put_qpel8_mc00_c ff_put_pixels8x8_c
+#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
+#define put_qpel16_mc00_c ff_put_pixels16x16_c
+#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
+#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
+
#if 1
#define H264_LOWPASS(OPNAME, OP, OP2) \
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
@@ -2399,7 +2406,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
}\
#define H264_MC(OPNAME, SIZE) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
}\
\
@@ -2557,6 +2564,11 @@ H264_MC(avg_, 16)
#undef op2_put
#endif
+#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
+#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
+#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
+#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
+
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i;
@@ -2575,31 +2587,18 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
}
}
-#if CONFIG_CAVS_DECODER
-/* AVS specific */
-void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
put_pixels8_c(dst, src, stride, 8);
}
-void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
avg_pixels8_c(dst, src, stride, 8);
}
-void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
put_pixels16_c(dst, src, stride, 16);
}
-void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
avg_pixels16_c(dst, src, stride, 16);
}
-#endif /* CONFIG_CAVS_DECODER */
-
-#if CONFIG_VC1_DECODER
-/* VC-1 specific */
-void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
- put_pixels8_c(dst, src, stride, 8);
-}
-void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
- avg_pixels8_c(dst, src, stride, 8);
-}
-#endif /* CONFIG_VC1_DECODER */
#if CONFIG_RV40_DECODER
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
@@ -2645,10 +2644,6 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
}
}
-static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
- put_pixels8_c(dst, src, stride, 8);
-}
-
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
@@ -4352,7 +4347,7 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
#endif
- c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+ c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
@@ -4456,7 +4451,7 @@ av_cold void attribute_align_arg dsputil_init(DSPContext* c, AVCodecContext *avc
c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
#endif
- c->shrink[0]= ff_img_copy_plane;
+ c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
index cfd1b7f33..8c1499165 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/dsputil.h
@@ -82,6 +82,11 @@ extern const uint8_t ff_zigzag248_direct[64];
extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
+
/* VP3 DSP functions */
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
@@ -91,22 +96,15 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
-/* VP6 DSP functions */
-void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
- const int16_t *h_weights, const int16_t *v_weights);
-
-/* CAVS functions */
-void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
-
-/* VC1 functions */
-void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
-
/* 1/2^n downscaling functions from imgconvert.c */
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Use av_image_copy_plane() instead.
+ */
+attribute_deprecated
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+#endif
+
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
@@ -181,6 +179,10 @@ typedef struct ScanTable{
const uint8_t *scantable;
uint8_t permutated[64];
uint8_t raster_end[64];
+#if ARCH_PPC
+ /** Used by dct_quantize_altivec to find last-non-zero */
+ DECLARE_ALIGNED(16, uint8_t, inverse)[64];
+#endif
} ScanTable;
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
@@ -598,11 +600,15 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
*/
#define emms_c()
-/* should be defined by architectures supporting
- one or more MultiMedia extension */
-int mm_support(void);
-
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_dwt(DSPContext *c);
void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
@@ -627,9 +633,19 @@ static inline void emms(void)
#define emms_c() emms()
-#else
+#elif ARCH_ARM
+
+#if HAVE_NEON
+# define STRIDE_ALIGN 16
+#endif
+
+#elif ARCH_PPC
+
+#define STRIDE_ALIGN 16
+
+#elif HAVE_MMI
-#define mm_support() 0
+#define STRIDE_ALIGN 16
#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flvdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flvdec.c
index 32f595a27..8f05945cb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flvdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/flvdec.c
@@ -83,7 +83,7 @@ int ff_flv_decode_picture_header(MpegEncContext *s)
width = height = 0;
break;
}
- if(av_check_image_size(width, height, 0, s->avctx))
+ if(av_image_check_size(width, height, 0, s->avctx))
return -1;
s->width = width;
s->height = height;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
index d8b7abe4f..6c05565c7 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263.h
@@ -70,7 +70,7 @@ av_const int ff_h263_aspect_to_info(AVRational aspect);
int ff_h263_decode_init(AVCodecContext *avctx);
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size);
+ AVPacket *avpkt);
int ff_h263_decode_end(AVCodecContext *avctx);
void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
index 503c3b6ba..92beb1bd3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h263dec.c
@@ -25,6 +25,7 @@
* H.263 decoder.
*/
+#include "libavutil/cpu.h"
#include "internal.h"
#include "avcodec.h"
#include "dsputil.h"
@@ -321,8 +322,10 @@ static int decode_slice(MpegEncContext *s){
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MpegEncContext *s = avctx->priv_data;
int ret;
AVFrame *pict = data;
@@ -542,7 +545,7 @@ retry:
#endif
#if HAVE_MMX
- if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (mm_support() & FF_MM_MMX)){
+ if (s->codec_id == CODEC_ID_MPEG4 && s->xvid_build>=0 && avctx->idct_algo == FF_IDCT_AUTO && (av_get_cpu_flags() & AV_CPU_FLAG_MMX)) {
avctx->idct_algo= FF_IDCT_XVIDMMX;
avctx->coded_width= 0; // force reinit
// dsputil_init(&s->dsp, avctx);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
index bcc6e6b8f..7ab3e6311 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.c
@@ -1250,14 +1250,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
if(is_h264){
- idct_add = h->h264dsp.h264_idct_add;
- idct_dc_add = h->h264dsp.h264_idct_dc_add;
- for(i=16; i<16+8; i++){
- if(h->non_zero_count_cache[ scan8[i] ])
- idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- else if(h->mb[i*16])
- idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- }
+ h->h264dsp.h264_idct_add8(dest, block_offset,
+ h->mb, uvlinesize,
+ h->non_zero_count_cache);
}else{
for(i=16; i<16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
@@ -2891,8 +2886,10 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
static int decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;
AVFrame *pict = data;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
index d87f9d01a..64db7072a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264.h
@@ -607,12 +607,12 @@ typedef struct H264Context{
int sp_for_switch_flag;
int slice_qs_delta;
int slice_qp_delta;
- unsigned int first_mb_in_slice;
- int bit_offset_to_slice_data;
- int raw_slice_type;
- int64_t outputed_rtstart;
- void* dxva_slice_long;
- int ref_pic_flag;
+ unsigned int first_mb_in_slice;
+ int bit_offset_to_slice_data;
+ int raw_slice_type;
+ int64_t outputed_rtstart;
+ void* dxva_slice_long;
+ int ref_pic_flag;
// <== End patch MPC
}H264Context;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
index 40b71ec24..96f99ab49 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/h264_ps.c
@@ -344,7 +344,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
sps->mb_width = get_ue_golomb(&s->gb) + 1;
sps->mb_height= get_ue_golomb(&s->gb) + 1;
if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
- av_check_image_size(16*sps->mb_width, 16*sps->mb_height, 0, h->s.avctx)){
+ av_image_check_size(16*sps->mb_width, 16*sps->mb_height, 0, h->s.avctx)){
av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
goto fail;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
index f6ea7a7f1..eb2cdc376 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
@@ -37,7 +37,7 @@
#include "libavutil/pixdesc.h"
#include "libavcore/imgutils.h"
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_YASM
#include "x86/dsputil_mmx.h"
#endif
@@ -751,13 +751,13 @@ int ff_set_systematic_pal(uint32_t pal[256], enum PixelFormat pix_fmt){
#if LIBAVCODEC_VERSION_MAJOR < 53
int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width)
{
- return av_fill_image_linesizes(picture->linesize, pix_fmt, width);
+ return av_image_fill_linesizes(picture->linesize, pix_fmt, width);
}
int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
int height)
{
- return av_fill_image_pointers(picture->data, pix_fmt, height, ptr, picture->linesize);
+ return av_image_fill_pointers(picture->data, pix_fmt, height, ptr, picture->linesize);
}
#endif
@@ -813,64 +813,33 @@ static int avg_bits_per_pixel(enum PixelFormat pix_fmt)
return bits;
}
+#if LIBAVCODEC_VERSION_MAJOR < 53
void ff_img_copy_plane(uint8_t *dst, int dst_wrap,
const uint8_t *src, int src_wrap,
int width, int height)
{
- if((!dst) || (!src))
- return;
- for(;height > 0; height--) {
- memcpy(dst, src, width);
- dst += dst_wrap;
- src += src_wrap;
- }
+ av_image_copy_plane(dst, dst_wrap, src, src_wrap, width, height);
}
-#if LIBAVCODEC_VERSION_MAJOR < 53
int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane)
{
- return av_get_image_linesize(pix_fmt, width, plane);
+ return av_image_get_linesize(pix_fmt, width, plane);
}
-#endif
void av_picture_data_copy(uint8_t *dst_data[4], int dst_linesize[4],
uint8_t *src_data[4], int src_linesize[4],
enum PixelFormat pix_fmt, int width, int height)
{
- int i;
- const PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
- const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
-
- switch(pf->pixel_type) {
- case FF_PIXEL_PACKED:
- case FF_PIXEL_PLANAR:
- for(i = 0; i < pf->nb_channels; i++) {
- int h;
- int bwidth = av_get_image_linesize(pix_fmt, width, i);
- h = height;
- if (i == 1 || i == 2) {
- h= -((-height)>>desc->log2_chroma_h);
- }
- ff_img_copy_plane(dst_data[i], dst_linesize[i],
- src_data[i], src_linesize[i],
- bwidth, h);
- }
- break;
- case FF_PIXEL_PALETTE:
- ff_img_copy_plane(dst_data[0], dst_linesize[0],
- src_data[0], src_linesize[0],
- width, height);
- /* copy the palette */
- memcpy(dst_data[1], src_data[1], 4*256);
- break;
- }
+ av_image_copy(dst_data, dst_linesize, src_data, src_linesize,
+ pix_fmt, width, height);
}
+#endif
void av_picture_copy(AVPicture *dst, const AVPicture *src,
enum PixelFormat pix_fmt, int width, int height)
{
- av_picture_data_copy(dst->data, dst->linesize, src->data,
- src->linesize, pix_fmt, width, height);
+ av_image_copy(dst->data, dst->linesize, src->data,
+ src->linesize, pix_fmt, width, height);
}
/* 2x2 -> 1x1 */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/libamr.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/libamr.c
index 695fc4ca7..1c0ba07a8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/libamr.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/libamr.c
@@ -137,8 +137,10 @@ static av_cold int amr_nb_decode_close(AVCodecContext *avctx)
}
static int amr_nb_decode_frame(AVCodecContext * avctx, void *data,
- int *data_size, const uint8_t * buf, int buf_size)
+ int *data_size, AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
AMRContext *s = avctx->priv_data;
const uint8_t *amrData = buf;
static const uint8_t block_size[16] = { 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
index fcb4f2011..8095eff9f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.c
@@ -219,7 +219,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
height= s->height;
av_log(s->avctx, AV_LOG_DEBUG, "sof0: picture: %dx%d\n", width, height);
- if(av_check_image_size(width, height, 0, s->avctx))
+ if(av_image_check_size(width, height, 0, s->avctx))
return -1;
nb_components = get_bits(&s->gb, 8);
@@ -1205,8 +1205,10 @@ found:
int ff_mjpeg_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MJpegDecodeContext *s = avctx->priv_data;
const uint8_t *buf_end, *buf_ptr;
int start_code;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
index 5a9da5902..bbf734b56 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mjpegdec.h
@@ -111,7 +111,7 @@ int ff_mjpeg_decode_init(AVCodecContext *avctx);
int ff_mjpeg_decode_end(AVCodecContext *avctx);
int ff_mjpeg_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size);
+ AVPacket *avpkt);
int ff_mjpeg_decode_dqt(MJpegDecodeContext *s);
int ff_mjpeg_decode_dht(MJpegDecodeContext *s);
int ff_mjpeg_decode_sof(MJpegDecodeContext *s);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdec.c
index e392d971b..46fc32891 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mlpdec.c
@@ -943,8 +943,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
* otherwise the number of bytes consumed. */
static int read_access_unit(AVCodecContext *avctx, void* data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MLPDecodeContext *m = avctx->priv_data;
GetBitContext gb;
unsigned int length, substr;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
index 93268052a..6aab625c8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
@@ -2235,8 +2235,10 @@ static int decode_chunks(AVCodecContext *avctx,
/* handle buffering and image synchronisation */
static int mpeg_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
Mpeg1Context *s = avctx->priv_data;
AVFrame *picture = data;
MpegEncContext *s2 = &s->mpeg_enc_ctx;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
index 1f0a51ff9..6729c13fd 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/nellymoserdec.c
@@ -154,7 +154,9 @@ static av_cold int decode_init(AVCodecContext * avctx) {
static int decode_tag(AVCodecContext * avctx,
void *data, int *data_size,
- const uint8_t * buf, int buf_size) {
+ AVPacket *avpkt) {
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
NellyMoserDecodeContext *s = avctx->priv_data;
int blocks, i;
int16_t* samples;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
index 829c9b3b1..df3b664e7 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv10.c
@@ -370,7 +370,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
}
if(new_w != s->width || new_h != s->height){
av_log(s->avctx, AV_LOG_DEBUG, "attempting to change resolution to %dx%d\n", new_w, new_h);
- if (av_check_image_size(new_w, new_h, 0, s->avctx) < 0)
+ if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0)
return -1;
MPV_common_end(s);
avcodec_set_dimensions(s->avctx, new_w, new_h);
@@ -645,8 +645,10 @@ static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)
static int rv10_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MpegEncContext *s = avctx->priv_data;
int i;
AVFrame *pict = data;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.c
index ce92c7850..b586aa0b8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.c
@@ -1410,8 +1410,10 @@ static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)
int ff_rv34_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
RV34DecContext *r = avctx->priv_data;
MpegEncContext *s = &r->s;
AVFrame *pict = data;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
index 3d25af2b1..24a27ce48 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv34.h
@@ -124,7 +124,7 @@ typedef struct RV34DecContext{
*/
int ff_rv34_get_start_offset(GetBitContext *gb, int blocks);
int ff_rv34_decode_init(AVCodecContext *avctx);
-int ff_rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size);
+int ff_rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt);
int ff_rv34_decode_end(AVCodecContext *avctx);
#endif /* AVCODEC_RV34_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv40.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv40.c
index 13ba5b6ee..157169196 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv40.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/rv40.c
@@ -144,7 +144,7 @@ static int rv40_parse_slice_header(RV34DecContext *r, GetBitContext *gb, SliceIn
si->pts = get_bits(gb, 13);
if(!si->type || !get_bits1(gb))
rv40_parse_picture_size(gb, &w, &h);
- if(av_check_image_size(w, h, 0, r->s.avctx) < 0)
+ if(av_image_check_size(w, h, 0, r->s.avctx) < 0)
return -1;
si->width = w;
si->height = h;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/sp5xdec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/sp5xdec.c
index a63d52259..9db7d32ed 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/sp5xdec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/sp5xdec.c
@@ -32,11 +32,11 @@
static int sp5x_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
-#if 0
- MJpegDecodeContext *s = avctx->priv_data;
-#endif
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
+ AVPacket avpkt_recoded;
const int qscale = 5;
const uint8_t *buf_ptr;
uint8_t *recoded;
@@ -47,7 +47,6 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
buf_ptr = buf;
-#if 1
recoded = av_mallocz(buf_size + 1024);
if (!recoded)
return -1;
@@ -88,102 +87,13 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
recoded[j++] = 0xD9;
avctx->flags &= ~CODEC_FLAG_EMU_EDGE;
- i = ff_mjpeg_decode_frame(avctx, data, data_size, recoded, j);
+ av_init_packet(&avpkt_recoded);
+ avpkt_recoded.data = recoded;
+ avpkt_recoded.size = j;
+ i = ff_mjpeg_decode_frame(avctx, data, data_size, &avpkt_recoded);
av_free(recoded);
-#else
- /* SOF */
- s->bits = 8;
- s->width = avctx->coded_width;
- s->height = avctx->coded_height;
- s->nb_components = 3;
- s->component_id[0] = 0;
- s->h_count[0] = 2;
- s->v_count[0] = 2;
- s->quant_index[0] = 0;
- s->component_id[1] = 1;
- s->h_count[1] = 1;
- s->v_count[1] = 1;
- s->quant_index[1] = 1;
- s->component_id[2] = 2;
- s->h_count[2] = 1;
- s->v_count[2] = 1;
- s->quant_index[2] = 1;
- s->h_max = 2;
- s->v_max = 2;
-
- s->qscale_table = av_mallocz((s->width+15)/16);
- avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420;
- s->interlaced = 0;
-
- s->picture.reference = 0;
- if (avctx->get_buffer(avctx, &s->picture) < 0)
- {
- av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
- return -1;
- }
-
- s->picture.pict_type = FF_I_TYPE;
- s->picture.key_frame = 1;
-
- for (i = 0; i < 3; i++)
- s->linesize[i] = s->picture.linesize[i] << s->interlaced;
-
- /* DQT */
- for (i = 0; i < 64; i++)
- {
- j = s->scantable.permutated[i];
- s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
- }
- s->qscale[0] = FFMAX(
- s->quant_matrixes[0][s->scantable.permutated[1]],
- s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
-
- for (i = 0; i < 64; i++)
- {
- j = s->scantable.permutated[i];
- s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
- }
- s->qscale[1] = FFMAX(
- s->quant_matrixes[1][s->scantable.permutated[1]],
- s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
-
- /* DHT */
-
- /* SOS */
- s->comp_index[0] = 0;
- s->nb_blocks[0] = s->h_count[0] * s->v_count[0];
- s->h_scount[0] = s->h_count[0];
- s->v_scount[0] = s->v_count[0];
- s->dc_index[0] = 0;
- s->ac_index[0] = 0;
-
- s->comp_index[1] = 1;
- s->nb_blocks[1] = s->h_count[1] * s->v_count[1];
- s->h_scount[1] = s->h_count[1];
- s->v_scount[1] = s->v_count[1];
- s->dc_index[1] = 1;
- s->ac_index[1] = 1;
-
- s->comp_index[2] = 2;
- s->nb_blocks[2] = s->h_count[2] * s->v_count[2];
- s->h_scount[2] = s->h_count[2];
- s->v_scount[2] = s->v_count[2];
- s->dc_index[2] = 1;
- s->ac_index[2] = 1;
-
- for (i = 0; i < 3; i++)
- s->last_dc[i] = 1024;
-
- s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
- s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
-
- init_get_bits(&s->gb, buf+14, (buf_size-14)*8);
-
- return mjpeg_decode_scan(s);
-#endif
-
return i;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq1dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq1dec.c
index 74fede36a..2df76316d 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq1dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq1dec.c
@@ -642,8 +642,10 @@ static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
static int svq1_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MpegEncContext *s=avctx->priv_data;
uint8_t *current, *previous;
int result, i, x, y, width, height;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
index 752400fa5..d7fe4aa5c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/svq3.c
@@ -917,8 +917,10 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
static int svq3_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
MpegEncContext *const s = avctx->priv_data;
H264Context *const h = avctx->priv_data;
int m, mb_type;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
index 240ae68f1..aad1d9521 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
@@ -212,7 +212,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
#if LIBAVCODEC_VERSION_MAJOR < 53
int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){
- return av_check_image_size(w, h, 0, av_log_ctx);
+ return av_image_check_size(w, h, 0, av_log_ctx);
}
#endif
@@ -232,7 +232,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
return -1;
}
- if(av_check_image_size(w, h, 0, s))
+ if(av_image_check_size(w, h, 0, s))
return -1;
if(s->internal_buffer==NULL){
@@ -280,7 +280,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
do {
// NOTE: do not align linesizes individually, this breaks e.g. assumptions
// that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
- av_fill_image_linesizes(picture.linesize, s->pix_fmt, w);
+ av_image_fill_linesizes(picture.linesize, s->pix_fmt, w);
// increase alignment of w for next try (rhs gives the lowest bit set in w)
w += w & ~(w-1);
@@ -290,7 +290,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
}
} while (unaligned);
- tmpsize = av_fill_image_pointers(picture.data, s->pix_fmt, h, NULL, picture.linesize);
+ tmpsize = av_image_fill_pointers(picture.data, s->pix_fmt, h, NULL, picture.linesize);
if (tmpsize < 0)
return -1;
@@ -489,7 +489,7 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
#define SANE_NB_CHANNELS 128U
if (((avctx->coded_width || avctx->coded_height)
- && av_check_image_size(avctx->coded_width, avctx->coded_height, 0, avctx))
+ && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx))
|| avctx->channels > SANE_NB_CHANNELS) {
ret = AVERROR(EINVAL);
goto free_and_end;
@@ -498,14 +498,13 @@ int attribute_align_arg avcodec_open(AVCodecContext *avctx, AVCodec *codec)
avctx->codec = codec;
avctx->codec_id = codec->id; /* ffdshow custom code */
avctx->frame_number = 0;
- if(avctx->codec->init){
- if(avctx->codec_type == AVMEDIA_TYPE_VIDEO &&
- avctx->codec->max_lowres < avctx->lowres){
- av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
- avctx->codec->max_lowres);
- goto free_and_end;
- }
+ if (avctx->codec->max_lowres < avctx->lowres) {
+ av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
+ avctx->codec->max_lowres);
+ goto free_and_end;
+ }
+ if(avctx->codec->init){
ret = avctx->codec->init(avctx);
if (ret < 0) {
goto free_and_end;
@@ -548,7 +547,7 @@ int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf
av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n");
return -1;
}
- if(av_check_image_size(avctx->width, avctx->height, 0, avctx))
+ if(av_image_check_size(avctx->width, avctx->height, 0, avctx))
return -1;
if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){
int ret = avctx->codec->encode(avctx, buf, buf_size, pict);
@@ -560,18 +559,34 @@ int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf
return 0;
}
+#if LIBAVCODEC_VERSION_MAJOR < 53
int attribute_align_arg avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
int *got_picture_ptr,
const uint8_t *buf, int buf_size)
{
+ AVPacket avpkt;
+ av_init_packet(&avpkt);
+ avpkt.data = buf;
+ avpkt.size = buf_size;
+ // HACK for CorePNG to decode as normal PNG by default
+ avpkt.flags = AV_PKT_FLAG_KEY;
+
+ return avcodec_decode_video2(avctx, picture, got_picture_ptr, &avpkt);
+}
+#endif
+
+int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
+ int *got_picture_ptr,
+ AVPacket *avpkt)
+{
int ret;
*got_picture_ptr= 0;
- if((avctx->coded_width||avctx->coded_height) && av_check_image_size(avctx->coded_width, avctx->coded_height, 0, avctx))
+ if((avctx->coded_width||avctx->coded_height) && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx))
return -1;
- if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
+ if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size){
ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
- buf, buf_size);
+ avpkt);
emms_c(); //needed to avoid an emms_c() call before every return;
@@ -583,13 +598,27 @@ int attribute_align_arg avcodec_decode_video(AVCodecContext *avctx, AVFrame *pic
return ret;
}
+#if LIBAVCODEC_VERSION_MAJOR < 53
int attribute_align_arg avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
int *frame_size_ptr,
const uint8_t *buf, int buf_size)
{
+ AVPacket avpkt;
+ av_init_packet(&avpkt);
+ avpkt.data = buf;
+ avpkt.size = buf_size;
+
+ return avcodec_decode_audio3(avctx, samples, frame_size_ptr, &avpkt);
+}
+#endif
+
+int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+ int *frame_size_ptr,
+ AVPacket *avpkt)
+{
int ret;
- if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
+ if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size){
//FIXME remove the check below _after_ ensuring that all audio check that the available space is enough
if(*frame_size_ptr < AVCODEC_MAX_AUDIO_FRAME_SIZE){
av_log(avctx, AV_LOG_ERROR, "buffer smaller than AVCODEC_MAX_AUDIO_FRAME_SIZE\n");
@@ -601,8 +630,7 @@ int attribute_align_arg avcodec_decode_audio2(AVCodecContext *avctx, int16_t *sa
return -1;
}
- ret = avctx->codec->decode(avctx, samples, frame_size_ptr,
- buf, buf_size);
+ ret = avctx->codec->decode(avctx, samples, frame_size_ptr, avpkt);
avctx->frame_number++;
}else{
ret= 0;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
index 47cd5e811..fe960473f 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dec.c
@@ -3135,8 +3135,10 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
*/
static int vc1_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
VC1Context *v = avctx->priv_data;
MpegEncContext *s = &v->s;
AVFrame *pict = data;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
index 8634bef69..aab169479 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vc1dsp.c
@@ -630,7 +630,7 @@ av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c;
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c;
- dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
+ dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_pixels8x8_c;
dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c;
dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c;
dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c;
@@ -647,7 +647,7 @@ av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
- dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c;
+ dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_pixels8x8_c;
dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c;
dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c;
dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
index 4d0e2a4b0..437ecffa3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp3.c
@@ -1329,12 +1329,11 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
return;
h= y - s->last_slice_end;
+ s->last_slice_end= y;
y -= h;
if (!s->flipped_image) {
- if (y == 0)
- h -= s->height - s->avctx->height; // account for non-mod16
- y = s->height - y - h;
+ y = s->avctx->height - y - h;
}
cy = y >> s->chroma_y_shift;
@@ -1345,7 +1344,6 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
emms_c();
s->avctx->draw_horiz_band(s->avctx, &s->current_frame, offset, y, 3, h);
- s->last_slice_end= y + h;
}
/*
@@ -1516,7 +1514,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
* dispatch (slice - 1);
*/
- vp3_draw_horiz_band(s, FFMIN(64*slice + 64-16, s->height-16));
+ vp3_draw_horiz_band(s, FFMIN((32 << s->chroma_y_shift) * (slice + 1) -16, s->height-16));
}
/*
@@ -1737,8 +1735,10 @@ static int64_t theora_granule_frame(Vp3DecodeContext *s,int64_t granulepos)
*/
static int vp3_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
Vp3DecodeContext *s = avctx->priv_data;
GetBitContext gb;
static int counter = 0;
@@ -1868,7 +1868,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
int row = (s->height >> (3+(i && s->chroma_y_shift))) - 1;
apply_loop_filter(s, i, row, row+1);
}
- vp3_draw_horiz_band(s, s->height);
+ vp3_draw_horiz_band(s, s->avctx->height);
/* MPC Custom code begin */
#if 0
@@ -2009,7 +2009,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
Vp3DecodeContext *s = avctx->priv_data;
int visible_width, visible_height, colorspace;
int offset_x = 0, offset_y = 0;
- AVRational fps;
+ AVRational fps, aspect;
s->theora = get_bits_long(gb, 24);
av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora);
@@ -2025,7 +2025,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
visible_width = s->width = get_bits(gb, 16) << 4;
visible_height = s->height = get_bits(gb, 16) << 4;
- if(av_check_image_size(s->width, s->height, 0, avctx)){
+ if(av_image_check_size(s->width, s->height, 0, avctx)){
av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
s->width= s->height= 0;
return -1;
@@ -2046,8 +2046,13 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
fps.den, fps.num, 1<<30);
}
- avctx->sample_aspect_ratio.num = get_bits_long(gb, 24);
- avctx->sample_aspect_ratio.den = get_bits_long(gb, 24);
+ aspect.num = get_bits_long(gb, 24);
+ aspect.den = get_bits_long(gb, 24);
+ if (aspect.num && aspect.den) {
+ av_reduce(&avctx->sample_aspect_ratio.num,
+ &avctx->sample_aspect_ratio.den,
+ aspect.num, aspect.den, 1<<30);
+ }
if (s->theora < 0x030200)
s->keyframe_frequency_force=1<<get_bits(gb, 5); /* keyframe frequency force */ /* ffdshow custom code */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
index 3f5569eb8..4b937a49e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
@@ -482,11 +482,12 @@ static int vp56_size_changed(AVCodecContext *avctx)
}
int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
+ const uint8_t *buf = avpkt->data;
VP56Context *s = avctx->priv_data;
AVFrame *const p = s->framep[VP56_FRAME_CURRENT];
- int remaining_buf_size = buf_size;
+ int remaining_buf_size = avpkt->size;
int is_alpha, av_uninit(alpha_offset);
if (s->has_alpha) {
@@ -635,7 +636,7 @@ int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
*(AVFrame*)data = *p;
*data_size = sizeof(AVFrame);
- return buf_size;
+ return avpkt->size;
}
av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
index f9500cfb5..da6b1b64b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
@@ -174,7 +174,7 @@ void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
int ff_vp56_free(AVCodecContext *avctx);
void ff_vp56_init_dequant(VP56Context *s, int quantizer);
int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
- const uint8_t *buf, int buf_size);
+ AVPacket *avpkt);
/**
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
index d67604b01..0fe9e3e55 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56dsp.c
@@ -84,7 +84,7 @@ void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec)
s->edge_filter_ver = vp6_edge_filter_ver;
if (CONFIG_VP6_DECODER) {
- s->vp6_filter_diag4= ff_vp6_filter_diag4_c;
+ s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
}
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
index d8d7cdaa2..de97489a8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
@@ -223,7 +223,7 @@ static void vp8_decode_flush(AVCodecContext *avctx)
static int update_dimensions(VP8Context *s, int width, int height)
{
- if (av_check_image_size(width, height, 0, s->avctx))
+ if (av_image_check_size(width, height, 0, s->avctx))
return AVERROR_INVALIDDATA;
vp8_decode_flush(s->avctx);
@@ -1471,14 +1471,14 @@ static void filter_mb_row_simple(VP8Context *s, int mb_y)
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
- const uint8_t *buf, int buf_size)
+ AVPacket *avpkt)
{
VP8Context *s = avctx->priv_data;
int ret, mb_x, mb_y, i, y, referenced;
enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe);
- if ((ret = decode_frame_header(s, buf, buf_size)) < 0)
+ if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret;
referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
@@ -1644,7 +1644,7 @@ skip_decode:
*data_size = sizeof(AVFrame);
}
- return buf_size;
+ return avpkt->size;
}
static av_cold int vp8_decode_init(AVCodecContext *avctx)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/config.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/config.asm
new file mode 100644
index 000000000..8efc2c533
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/config.asm
@@ -0,0 +1 @@
+%define ARCH_X86
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
deleted file mode 100644
index ff359230c..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_mmx.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
- * Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * MMX optimized version of (put|avg)_h264_chroma_mc8.
- * H264_CHROMA_MC8_TMPL must be defined to the desired function name
- * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg
- * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
- */
-static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
-{
- DECLARE_ALIGNED(8, uint64_t, AA);
- DECLARE_ALIGNED(8, uint64_t, DD);
- int i;
-
- if(y==0 && x==0) {
- /* no filter needed */
- H264_CHROMA_MC8_MV0(dst, src, stride, h);
- return;
- }
-
- assert(x<8 && y<8 && x>=0 && y>=0);
-
- if(y==0 || x==0)
- {
- /* 1 dimensional filter only */
- const int dxy = x ? 1 : stride;
-
- __asm__ volatile(
- "movd %0, %%mm5\n\t"
- "movq %1, %%mm4\n\t"
- "movq %2, %%mm6\n\t" /* mm6 = rnd >> 3 */
- "punpcklwd %%mm5, %%mm5\n\t"
- "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
- "pxor %%mm7, %%mm7\n\t"
- "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */
- :: "rm"(x+y), "m"(ff_pw_8), "m"(*(rnd_reg+1)));
-
- for(i=0; i<h; i++) {
- __asm__ volatile(
- /* mm0 = src[0..7], mm1 = src[1..8] */
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- :: "m"(src[0]), "m"(src[dxy]));
-
- __asm__ volatile(
- /* [mm0,mm1] = A * src[0..7] */
- /* [mm2,mm3] = B * src[1..8] */
- "movq %%mm0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "pmullw %%mm4, %%mm0\n\t"
- "pmullw %%mm4, %%mm1\n\t"
- "pmullw %%mm5, %%mm2\n\t"
- "pmullw %%mm5, %%mm3\n\t"
-
- /* dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3 */
- "paddw %%mm6, %%mm0\n\t"
- "paddw %%mm6, %%mm1\n\t"
- "paddw %%mm2, %%mm0\n\t"
- "paddw %%mm3, %%mm1\n\t"
- "psrlw $3, %%mm0\n\t"
- "psrlw $3, %%mm1\n\t"
- "packuswb %%mm1, %%mm0\n\t"
- H264_CHROMA_OP(%0, %%mm0)
- "movq %%mm0, %0\n\t"
- : "=m" (dst[0]));
-
- src += stride;
- dst += stride;
- }
- return;
- }
-
- /* general case, bilinear */
- __asm__ volatile("movd %2, %%mm4\n\t"
- "movd %3, %%mm6\n\t"
- "punpcklwd %%mm4, %%mm4\n\t"
- "punpcklwd %%mm6, %%mm6\n\t"
- "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
- "movq %%mm4, %%mm5\n\t"
- "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */
- "psllw $3, %%mm5\n\t"
- "psllw $3, %%mm6\n\t"
- "movq %%mm5, %%mm7\n\t"
- "paddw %%mm6, %%mm7\n\t"
- "movq %%mm4, %1\n\t" /* DD = x * y */
- "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */
- "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */
- "paddw %4, %%mm4\n\t"
- "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */
- "pxor %%mm7, %%mm7\n\t"
- "movq %%mm4, %0\n\t"
- : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
-
- __asm__ volatile(
- /* mm0 = src[0..7], mm1 = src[1..8] */
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- : : "m" (src[0]), "m" (src[1]));
-
- for(i=0; i<h; i++) {
- src += stride;
-
- __asm__ volatile(
- /* mm2 = A * src[0..3] + B * src[1..4] */
- /* mm3 = A * src[4..7] + B * src[5..8] */
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpckhbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "pmullw %0, %%mm0\n\t"
- "pmullw %0, %%mm2\n\t"
- "pmullw %%mm5, %%mm1\n\t"
- "pmullw %%mm5, %%mm3\n\t"
- "paddw %%mm1, %%mm2\n\t"
- "paddw %%mm0, %%mm3\n\t"
- : : "m" (AA));
-
- __asm__ volatile(
- /* [mm2,mm3] += C * src[0..7] */
- "movq %0, %%mm0\n\t"
- "movq %%mm0, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "pmullw %%mm6, %%mm0\n\t"
- "pmullw %%mm6, %%mm1\n\t"
- "paddw %%mm0, %%mm2\n\t"
- "paddw %%mm1, %%mm3\n\t"
- : : "m" (src[0]));
-
- __asm__ volatile(
- /* [mm2,mm3] += D * src[1..8] */
- "movq %1, %%mm1\n\t"
- "movq %%mm1, %%mm0\n\t"
- "movq %%mm1, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm4\n\t"
- "pmullw %2, %%mm0\n\t"
- "pmullw %2, %%mm4\n\t"
- "paddw %%mm0, %%mm2\n\t"
- "paddw %%mm4, %%mm3\n\t"
- "movq %0, %%mm0\n\t"
- : : "m" (src[0]), "m" (src[1]), "m" (DD));
-
- __asm__ volatile(
- /* dst[0..7] = ([mm2,mm3] + rnd) >> 6 */
- "paddw %1, %%mm2\n\t"
- "paddw %1, %%mm3\n\t"
- "psrlw $6, %%mm2\n\t"
- "psrlw $6, %%mm3\n\t"
- "packuswb %%mm3, %%mm2\n\t"
- H264_CHROMA_OP(%0, %%mm2)
- "movq %%mm2, %0\n\t"
- : "=m" (dst[0]) : "m" (*rnd_reg));
- dst+= stride;
- }
-}
-
-static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
-{
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movd %5, %%mm2 \n\t"
- "movd %6, %%mm3 \n\t"
- "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
- "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm3, %%mm3 \n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm3, %%mm3 \n\t"
- "psubw %%mm2, %%mm4 \n\t"
- "psubw %%mm3, %%mm5 \n\t"
-
- "movd (%1), %%mm0 \n\t"
- "movd 1(%1), %%mm6 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm2, %%mm6 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
-
- "1: \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd 1(%1), %%mm1 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm2, %%mm1 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "movq %%mm1, %%mm0 \n\t"
- "pmullw %%mm5, %%mm6 \n\t"
- "pmullw %%mm3, %%mm1 \n\t"
- "paddw %4, %%mm6 \n\t"
- "paddw %%mm6, %%mm1 \n\t"
- "psrlw $6, %%mm1 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm6)
- "movd %%mm1, (%0) \n\t"
- "add %3, %0 \n\t"
- "movd (%1), %%mm6 \n\t"
- "movd 1(%1), %%mm1 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm6 \n\t"
- "pmullw %%mm2, %%mm1 \n\t"
- "paddw %%mm6, %%mm1 \n\t"
- "movq %%mm1, %%mm6 \n\t"
- "pmullw %%mm5, %%mm0 \n\t"
- "pmullw %%mm3, %%mm1 \n\t"
- "paddw %4, %%mm0 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psrlw $6, %%mm1 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm0)
- "movd %%mm1, (%0) \n\t"
- "add %3, %0 \n\t"
- "sub $2, %2 \n\t"
- "jnz 1b \n\t"
- : "+r"(dst), "+r"(src), "+r"(h)
- : "r"((x86_reg)stride), "m"(*rnd_reg), "m"(x), "m"(y)
- );
-}
-
-#ifdef H264_CHROMA_MC2_TMPL
-static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- int tmp = ((1<<16)-1)*x + 8;
- int CD= tmp*y;
- int AB= (tmp<<3) - CD;
- __asm__ volatile(
- /* mm5 = {A,B,A,B} */
- /* mm6 = {C,D,C,D} */
- "movd %0, %%mm5\n\t"
- "movd %1, %%mm6\n\t"
- "punpckldq %%mm5, %%mm5\n\t"
- "punpckldq %%mm6, %%mm6\n\t"
- "pxor %%mm7, %%mm7\n\t"
- /* mm0 = src[0,1,1,2] */
- "movd %2, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "pshufw $0x94, %%mm2, %%mm2\n\t"
- :: "r"(AB), "r"(CD), "m"(src[0]));
-
-
- __asm__ volatile(
- "1:\n\t"
- "add %4, %1\n\t"
- /* mm1 = A * src[0,1] + B * src[1,2] */
- "movq %%mm2, %%mm1\n\t"
- "pmaddwd %%mm5, %%mm1\n\t"
- /* mm0 = src[0,1,1,2] */
- "movd (%1), %%mm0\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "pshufw $0x94, %%mm0, %%mm0\n\t"
- /* mm1 += C * src[0,1] + D * src[1,2] */
- "movq %%mm0, %%mm2\n\t"
- "pmaddwd %%mm6, %%mm0\n\t"
- "paddw %3, %%mm1\n\t"
- "paddw %%mm0, %%mm1\n\t"
- /* dst[0,1] = pack((mm1 + 32) >> 6) */
- "psrlw $6, %%mm1\n\t"
- "packssdw %%mm7, %%mm1\n\t"
- "packuswb %%mm7, %%mm1\n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm3)
- "movd %%mm1, %%esi\n\t"
- "movw %%si, (%0)\n\t"
- "add %4, %0\n\t"
- "sub $1, %2\n\t"
- "jnz 1b\n\t"
- : "+r" (dst), "+r"(src), "+r"(h)
- : "m" (ff_pw_32), "r"((x86_reg)stride)
- : "%esi");
-
-}
-#endif
-
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_ssse3.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_ssse3.c
deleted file mode 100644
index 0eceb74f2..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_h264_template_ssse3.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2008 Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * SSSE3 optimized version of (put|avg)_h264_chroma_mc8.
- * H264_CHROMA_MC8_TMPL must be defined to the desired function name
- * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
- * AVG_OP must be defined to empty for put and the identify for avg
- */
-static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, int rnd)
-{
- if(y==0 && x==0) {
- /* no filter needed */
- H264_CHROMA_MC8_MV0(dst, src, stride, h);
- return;
- }
-
- assert(x<8 && y<8 && x>=0 && y>=0);
-
- if(y==0 || x==0)
- {
- /* 1 dimensional filter only */
- __asm__ volatile(
- "movd %0, %%xmm7 \n\t"
- "movq %1, %%xmm6 \n\t"
- "pshuflw $0, %%xmm7, %%xmm7 \n\t"
- "movlhps %%xmm6, %%xmm6 \n\t"
- "movlhps %%xmm7, %%xmm7 \n\t"
- :: "r"(255*(x+y)+8), "m"(*(rnd?&ff_pw_4.a:&ff_pw_3))
- );
-
- if(x) {
- __asm__ volatile(
- "1: \n\t"
- "movq (%1), %%xmm0 \n\t"
- "movq 1(%1), %%xmm1 \n\t"
- "movq (%1,%3), %%xmm2 \n\t"
- "movq 1(%1,%3), %%xmm3 \n\t"
- "punpcklbw %%xmm1, %%xmm0 \n\t"
- "punpcklbw %%xmm3, %%xmm2 \n\t"
- "pmaddubsw %%xmm7, %%xmm0 \n\t"
- "pmaddubsw %%xmm7, %%xmm2 \n\t"
- AVG_OP("movq (%0), %%xmm4 \n\t")
- AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
- "paddw %%xmm6, %%xmm0 \n\t"
- "paddw %%xmm6, %%xmm2 \n\t"
- "psrlw $3, %%xmm0 \n\t"
- "psrlw $3, %%xmm2 \n\t"
- "packuswb %%xmm2, %%xmm0 \n\t"
- AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
- "movq %%xmm0, (%0) \n\t"
- "movhps %%xmm0, (%0,%3) \n\t"
- "sub $2, %2 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%0,%3,2), %0 \n\t"
- "jg 1b \n\t"
- :"+r"(dst), "+r"(src), "+r"(h)
- :"r"((x86_reg)stride)
- );
- } else {
- __asm__ volatile(
- "1: \n\t"
- "movq (%1), %%xmm0 \n\t"
- "movq (%1,%3), %%xmm1 \n\t"
- "movdqa %%xmm1, %%xmm2 \n\t"
- "movq (%1,%3,2), %%xmm3 \n\t"
- "punpcklbw %%xmm1, %%xmm0 \n\t"
- "punpcklbw %%xmm3, %%xmm2 \n\t"
- "pmaddubsw %%xmm7, %%xmm0 \n\t"
- "pmaddubsw %%xmm7, %%xmm2 \n\t"
- AVG_OP("movq (%0), %%xmm4 \n\t")
- AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
- "paddw %%xmm6, %%xmm0 \n\t"
- "paddw %%xmm6, %%xmm2 \n\t"
- "psrlw $3, %%xmm0 \n\t"
- "psrlw $3, %%xmm2 \n\t"
- "packuswb %%xmm2, %%xmm0 \n\t"
- AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
- "movq %%xmm0, (%0) \n\t"
- "movhps %%xmm0, (%0,%3) \n\t"
- "sub $2, %2 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "lea (%0,%3,2), %0 \n\t"
- "jg 1b \n\t"
- :"+r"(dst), "+r"(src), "+r"(h)
- :"r"((x86_reg)stride)
- );
- }
- return;
- }
-
- /* general case, bilinear */
- __asm__ volatile(
- "movd %0, %%xmm7 \n\t"
- "movd %1, %%xmm6 \n\t"
- "movdqa %2, %%xmm5 \n\t"
- "pshuflw $0, %%xmm7, %%xmm7 \n\t"
- "pshuflw $0, %%xmm6, %%xmm6 \n\t"
- "movlhps %%xmm7, %%xmm7 \n\t"
- "movlhps %%xmm6, %%xmm6 \n\t"
- :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
- );
-
- __asm__ volatile(
- "movq (%1), %%xmm0 \n\t"
- "movq 1(%1), %%xmm1 \n\t"
- "punpcklbw %%xmm1, %%xmm0 \n\t"
- "add %3, %1 \n\t"
- "1: \n\t"
- "movq (%1), %%xmm1 \n\t"
- "movq 1(%1), %%xmm2 \n\t"
- "movq (%1,%3), %%xmm3 \n\t"
- "movq 1(%1,%3), %%xmm4 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "punpcklbw %%xmm2, %%xmm1 \n\t"
- "punpcklbw %%xmm4, %%xmm3 \n\t"
- "movdqa %%xmm1, %%xmm2 \n\t"
- "movdqa %%xmm3, %%xmm4 \n\t"
- "pmaddubsw %%xmm7, %%xmm0 \n\t"
- "pmaddubsw %%xmm6, %%xmm1 \n\t"
- "pmaddubsw %%xmm7, %%xmm2 \n\t"
- "pmaddubsw %%xmm6, %%xmm3 \n\t"
- "paddw %%xmm5, %%xmm0 \n\t"
- "paddw %%xmm5, %%xmm2 \n\t"
- "paddw %%xmm0, %%xmm1 \n\t"
- "paddw %%xmm2, %%xmm3 \n\t"
- "movdqa %%xmm4, %%xmm0 \n\t"
- "psrlw $6, %%xmm1 \n\t"
- "psrlw $6, %%xmm3 \n\t"
- AVG_OP("movq (%0), %%xmm2 \n\t")
- AVG_OP("movhps (%0,%3), %%xmm2 \n\t")
- "packuswb %%xmm3, %%xmm1 \n\t"
- AVG_OP("pavgb %%xmm2, %%xmm1 \n\t")
- "movq %%xmm1, (%0)\n\t"
- "movhps %%xmm1, (%0,%3)\n\t"
- "sub $2, %2 \n\t"
- "lea (%0,%3,2), %0 \n\t"
- "jg 1b \n\t"
- :"+r"(dst), "+r"(src), "+r"(h)
- :"r"((x86_reg)stride)
- );
-}
-
-static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- __asm__ volatile(
- "movd %0, %%mm7 \n\t"
- "movd %1, %%mm6 \n\t"
- "movq %2, %%mm5 \n\t"
- "pshufw $0, %%mm7, %%mm7 \n\t"
- "pshufw $0, %%mm6, %%mm6 \n\t"
- :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
- );
-
- __asm__ volatile(
- "movd (%1), %%mm0 \n\t"
- "punpcklbw 1(%1), %%mm0 \n\t"
- "add %3, %1 \n\t"
- "1: \n\t"
- "movd (%1), %%mm1 \n\t"
- "movd (%1,%3), %%mm3 \n\t"
- "punpcklbw 1(%1), %%mm1 \n\t"
- "punpcklbw 1(%1,%3), %%mm3 \n\t"
- "lea (%1,%3,2), %1 \n\t"
- "movq %%mm1, %%mm2 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "pmaddubsw %%mm7, %%mm0 \n\t"
- "pmaddubsw %%mm6, %%mm1 \n\t"
- "pmaddubsw %%mm7, %%mm2 \n\t"
- "pmaddubsw %%mm6, %%mm3 \n\t"
- "paddw %%mm5, %%mm0 \n\t"
- "paddw %%mm5, %%mm2 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "paddw %%mm2, %%mm3 \n\t"
- "movq %%mm4, %%mm0 \n\t"
- "psrlw $6, %%mm1 \n\t"
- "psrlw $6, %%mm3 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- "packuswb %%mm3, %%mm3 \n\t"
- AVG_OP("pavgb (%0), %%mm1 \n\t")
- AVG_OP("pavgb (%0,%3), %%mm3 \n\t")
- "movd %%mm1, (%0)\n\t"
- "movd %%mm3, (%0,%3)\n\t"
- "sub $2, %2 \n\t"
- "lea (%0,%3,2), %0 \n\t"
- "jg 1b \n\t"
- :"+r"(dst), "+r"(src), "+r"(h)
- :"r"((x86_reg)stride)
- );
-}
-
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
index c4939ec65..995df0564 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.c
@@ -22,14 +22,13 @@
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*/
+#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/h264dsp.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "dsputil_mmx.h"
-#include "vp3dsp_mmx.h"
-#include "vp3dsp_sse2.h"
#include "idct_xvid.h"
//#undef NDEBUG
@@ -62,6 +61,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x0404040404040404ULL};
@@ -70,7 +70,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x8080808080808080ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A1A1A1A1A1A1ULL};
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F8F8F8F8F8F8ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL};
@@ -228,7 +228,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
/***********************************/
/* standard MMX */
-void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
{
const DCTELEM *p;
uint8_t *pix;
@@ -304,7 +304,7 @@ DECLARE_ASM_CONST(8, uint8_t, ff_vector128)[8] =
"movq %%mm3, (%0, %3, 2) \n\t"\
"movq %%mm4, (%0, %1) \n\t"
-void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
{
x86_reg line_skip = line_size;
x86_reg line_skip3;
@@ -320,7 +320,7 @@ void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li
:"memory");
}
-void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
{
const DCTELEM *p;
uint8_t *pix;
@@ -728,35 +728,6 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
}
}
-static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
- __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
- "movd %4, %%mm0 \n\t"
- "movd %5, %%mm1 \n\t"
- "movd %6, %%mm2 \n\t"
- "movd %7, %%mm3 \n\t"
- "punpcklbw %%mm1, %%mm0 \n\t"
- "punpcklbw %%mm3, %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "movd %%mm0, %0 \n\t"
- "punpckhdq %%mm0, %%mm0 \n\t"
- "movd %%mm0, %1 \n\t"
- "movd %%mm1, %2 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movd %%mm1, %3 \n\t"
-
- : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
- "=m" (*(uint32_t*)(dst + 1*dst_stride)),
- "=m" (*(uint32_t*)(dst + 2*dst_stride)),
- "=m" (*(uint32_t*)(dst + 3*dst_stride))
- : "m" (*(uint32_t*)(src + 0*src_stride)),
- "m" (*(uint32_t*)(src + 1*src_stride)),
- "m" (*(uint32_t*)(src + 2*src_stride)),
- "m" (*(uint32_t*)(src + 3*src_stride))
- );
-}
-
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
const int strength= ff_h263_loop_filter_strength[qscale];
@@ -1820,8 +1791,59 @@ PREFETCH(prefetch_mmx2, prefetcht0)
PREFETCH(prefetch_3dnow, prefetch)
#undef PREFETCH
-#include "h264dsp_mmx.c"
-#include "rv40dsp_mmx.c"
+#include "h264_qpel_mmx.c"
+
+void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
/* CAVS specific */
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
@@ -1851,43 +1873,43 @@ void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
#endif
static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_idct_xvid_mmx (block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_idct_xvid_mmx (block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_idct_xvid_mmx2 (block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
{
ff_idct_xvid_mmx2 (block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
@@ -2376,6 +2398,19 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
);
}
+void ff_vp3_idct_mmx(int16_t *input_data);
+void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
+
+void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+
+void ff_vp3_idct_sse2(int16_t *input_data);
+void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
@@ -2387,20 +2422,8 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co
void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left);
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left);
-void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
-void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
-void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
-void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
-void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
-
-#if HAVE_YASM && ARCH_X86_32
-void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
-static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
-{
- ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
- ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
-}
-#elif !HAVE_YASM
+
+#if !HAVE_YASM
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
@@ -2500,10 +2523,10 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
- int mm_flags = mm_support();
+ int mm_flags = av_get_cpu_flags();
if (avctx->dsp_mask) {
- if (avctx->dsp_mask & FF_MM_FORCE)
+ if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
mm_flags |= (avctx->dsp_mask & 0xffff);
else
mm_flags &= ~(avctx->dsp_mask & 0xffff);
@@ -2511,20 +2534,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if 0
av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
- if (mm_flags & FF_MM_MMX)
+ if (mm_flags & AV_CPU_FLAG_MMX)
av_log(avctx, AV_LOG_INFO, " mmx");
- if (mm_flags & FF_MM_MMX2)
+ if (mm_flags & AV_CPU_FLAG_MMX2)
av_log(avctx, AV_LOG_INFO, " mmx2");
- if (mm_flags & FF_MM_3DNOW)
+ if (mm_flags & AV_CPU_FLAG_3DNOW)
av_log(avctx, AV_LOG_INFO, " 3dnow");
- if (mm_flags & FF_MM_SSE)
+ if (mm_flags & AV_CPU_FLAG_SSE)
av_log(avctx, AV_LOG_INFO, " sse");
- if (mm_flags & FF_MM_SSE2)
+ if (mm_flags & AV_CPU_FLAG_SSE2)
av_log(avctx, AV_LOG_INFO, " sse2");
av_log(avctx, AV_LOG_INFO, "\n");
#endif
- if (mm_flags & FF_MM_MMX) {
+ if (mm_flags & AV_CPU_FLAG_MMX) {
const int idct_algo= avctx->idct_algo;
if(avctx->lowres==0){
@@ -2535,7 +2558,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
#if CONFIG_GPL
}else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
- if(mm_flags & FF_MM_MMX2){
+ if(mm_flags & AV_CPU_FLAG_MMX2){
c->idct_put= ff_libmpeg2mmx2_idct_put;
c->idct_add= ff_libmpeg2mmx2_idct_add;
c->idct = ff_mmxext_idct;
@@ -2547,8 +2570,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
#endif
}else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
- idct_algo==FF_IDCT_VP3){
- if(mm_flags & FF_MM_SSE2){
+ idct_algo==FF_IDCT_VP3 && HAVE_YASM){
+ if(mm_flags & AV_CPU_FLAG_SSE2){
c->idct_put= ff_vp3_idct_put_sse2;
c->idct_add= ff_vp3_idct_add_sse2;
c->idct = ff_vp3_idct_sse2;
@@ -2562,12 +2585,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}else if(idct_algo==FF_IDCT_CAVS){
c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
}else if(idct_algo==FF_IDCT_XVIDMMX){
- if(mm_flags & FF_MM_SSE2){
+ if(mm_flags & AV_CPU_FLAG_SSE2){
c->idct_put= ff_idct_xvid_sse2_put;
c->idct_add= ff_idct_xvid_sse2_add;
c->idct = ff_idct_xvid_sse2;
c->idct_permutation_type= FF_SSE2_IDCT_PERM;
- }else if(mm_flags & FF_MM_MMX2){
+ }else if(mm_flags & AV_CPU_FLAG_MMX2){
c->idct_put= ff_idct_xvid_mmx2_put;
c->idct_add= ff_idct_xvid_mmx2_add;
c->idct = ff_idct_xvid_mmx2;
@@ -2579,12 +2602,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
- c->put_pixels_clamped = put_pixels_clamped_mmx;
- c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
- c->add_pixels_clamped = add_pixels_clamped_mmx;
+ c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
+ c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
+ c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
- if (mm_flags & FF_MM_SSE){
+ if (mm_flags & AV_CPU_FLAG_SSE){
c->clear_block = clear_block_sse;
c->clear_blocks = clear_blocks_sse;
}
@@ -2615,14 +2638,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
c->h263_h_loop_filter= h263_h_loop_filter_mmx;
}
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
- c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd;
- c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
- c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
+#if HAVE_YASM
+ c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
+ c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
+ c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
+
+ c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
+ c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
+#endif
- if (mm_flags & FF_MM_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
c->prefetch = prefetch_mmx2;
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
@@ -2647,12 +2673,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
- if (CONFIG_VP3_DECODER) {
+ if (CONFIG_VP3_DECODER && HAVE_YASM) {
c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
}
}
- if (CONFIG_VP3_DECODER) {
+ if (CONFIG_VP3_DECODER && HAVE_YASM) {
c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
}
@@ -2699,21 +2725,21 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
- c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2;
- c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2;
+#if HAVE_YASM
+ c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
+ c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
- c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_nornd;
+ c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
+ c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
+ c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
+ c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
+ c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
-#if HAVE_YASM
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
#endif
#if HAVE_7REGS && HAVE_TEN_OPERANDS
- if( mm_flags&FF_MM_3DNOW )
+ if( mm_flags&AV_CPU_FLAG_3DNOW )
c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
#endif
@@ -2721,7 +2747,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
ff_vc1dsp_init_mmx(c, avctx);
c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
- } else if (mm_flags & FF_MM_3DNOW) {
+ } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
@@ -2772,11 +2798,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
+#if HAVE_YASM
+ c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
+ c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
+
+ c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
- c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow;
- c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow;
+ c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
+ c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
+#endif
}
@@ -2785,13 +2815,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
- if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
+ if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
}
- if(mm_flags & FF_MM_SSE2){
+ if(mm_flags & AV_CPU_FLAG_SSE2){
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@@ -2806,7 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 3, sse2);
}
#if HAVE_SSSE3
- if(mm_flags & FF_MM_SSSE3){
+ if(mm_flags & AV_CPU_FLAG_SSSE3){
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2819,16 +2849,16 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
- c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nornd;
- c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nornd;
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
+ c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
+ c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
+ c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
+ c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
+ c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
+ c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
- if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe
+ if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
#endif
}
@@ -2838,7 +2868,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
* todo: test if it still causes crashes
*/
#if ARCH_X86_32
- if(mm_flags & FF_MM_3DNOW){
+ if(mm_flags & AV_CPU_FLAG_3DNOW){
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
c->vector_fmul = vector_fmul_3dnow;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -2846,14 +2876,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
}
}
- if(mm_flags & FF_MM_3DNOWEXT){
+ if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
c->vector_fmul_window = vector_fmul_window_3dnow2;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
}
}
- if(mm_flags & FF_MM_SSE){
+ if(mm_flags & AV_CPU_FLAG_SSE){
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
c->ac3_downmix = ac3_downmix_sse;
c->vector_fmul = vector_fmul_sse;
@@ -2869,9 +2899,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif
#endif
}
- if(mm_flags & FF_MM_3DNOW)
+ if(mm_flags & AV_CPU_FLAG_3DNOW)
c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
- if(mm_flags & FF_MM_SSE2){
+ if(mm_flags & AV_CPU_FLAG_SSE2){
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = float_to_int16_sse2;
c->float_to_int16_interleave = float_to_int16_interleave_sse2;
@@ -2883,92 +2913,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dsputilenc_init_mmx(c, avctx);
}
-#if CONFIG_H264DSP
-void ff_h264dsp_init_x86(H264DSPContext *c)
-{
- int mm_flags = mm_support();
-
- if (mm_flags & FF_MM_MMX) {
- c->h264_idct_dc_add=
- c->h264_idct_add= ff_h264_idct_add_mmx;
- c->h264_idct8_dc_add=
- c->h264_idct8_add= ff_h264_idct8_add_mmx;
-
- c->h264_idct_add16 = ff_h264_idct_add16_mmx;
- c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
- c->h264_idct_add8 = ff_h264_idct_add8_mmx;
- c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
-
- if (mm_flags & FF_MM_MMX2) {
- c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
- c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
- c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
- c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
- c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
- c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
-
- c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
- c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
- c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
- c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
- c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
- c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
- c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
-
- c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
- c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
- c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
- c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
- c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
- c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
- c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
- c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
-
- c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
- c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
- c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
- c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
- c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
- c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
- c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
- c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
- }
- if(mm_flags & FF_MM_SSE2){
- c->h264_idct8_add = ff_h264_idct8_add_sse2;
- c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
- }
-
-#if HAVE_YASM
- if (mm_flags & FF_MM_MMX2){
-#if ARCH_X86_32
- c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
- c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
-#endif
- if( mm_flags&FF_MM_SSE2 ){
- c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
- c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
-#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
- c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
- c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
- c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
- c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
-#endif
-#if CONFIG_GPL
- c->h264_idct_add16 = ff_h264_idct_add16_sse2;
- c->h264_idct_add8 = ff_h264_idct_add8_sse2;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
-#endif
- }
- if ( mm_flags&FF_MM_SSSE3 ){
- c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
- c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
- }
- }
-#endif
- }
-}
-#endif /* CONFIG_H264DSP */
-
const char* avcodec_get_current_idct_mmx(AVCodecContext *avctx,DSPContext *c)
{
if (c->idct_put==ff_idct_xvid_mmx_put)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
index 33dafed1f..58256fd40 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/dsputil_mmx.h
@@ -57,7 +57,7 @@ extern const uint64_t ff_pb_7;
extern const uint64_t ff_pb_1F;
extern const uint64_t ff_pb_3F;
extern const uint64_t ff_pb_81;
-extern const uint64_t ff_pb_A1;
+extern const xmm_reg ff_pb_A1;
extern const xmm_reg ff_pb_F8;
extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_pb_FE;
@@ -94,6 +94,35 @@ extern const double ff_pd_2[2];
SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
+ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
+ "movd %4, %%mm0 \n\t"
+ "movd %5, %%mm1 \n\t"
+ "movd %6, %%mm2 \n\t"
+ "movd %7, %%mm3 \n\t"
+ "punpcklbw %%mm1, %%mm0 \n\t"
+ "punpcklbw %%mm3, %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "punpcklwd %%mm2, %%mm0 \n\t"
+ "punpckhwd %%mm2, %%mm1 \n\t"
+ "movd %%mm0, %0 \n\t"
+ "punpckhdq %%mm0, %%mm0 \n\t"
+ "movd %%mm0, %1 \n\t"
+ "movd %%mm1, %2 \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movd %%mm1, %3 \n\t"
+
+ : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 1*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 2*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 3*dst_stride))
+ : "m" (*(uint32_t*)(src + 0*src_stride)),
+ "m" (*(uint32_t*)(src + 1*src_stride)),
+ "m" (*(uint32_t*)(src + 2*src_stride)),
+ "m" (*(uint32_t*)(src + 3*src_stride))
+ );
+}
+
// e,f,g,h can be memory
// out: a,d,t,c
#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
@@ -158,9 +187,9 @@ extern const double ff_pd_2[2];
void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
-void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
-void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
-void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
index eb5c65ecb..771b1e664 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft.c
@@ -16,25 +16,26 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/cpu.h"
#include "libavcodec/dsputil.h"
#include "fft.h"
av_cold void ff_fft_init_mmx(FFTContext *s)
{
#if HAVE_YASM
- int has_vectors = mm_support();
- if (has_vectors & FF_MM_SSE && HAVE_SSE) {
+ int has_vectors = av_get_cpu_flags();
+ if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
/* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_sse;
- } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
+ } else if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
/* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dn2;
s->imdct_half = ff_imdct_half_3dn2;
s->fft_calc = ff_fft_calc_3dn2;
- } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
+ } else if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
/* 3DNow! for K6-2/3 */
s->imdct_calc = ff_imdct_calc_3dn;
s->imdct_half = ff_imdct_half_3dn;
@@ -46,8 +47,8 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
#if CONFIG_DCT
av_cold void ff_dct_init_mmx(DCTContext *s)
{
- int has_vectors = mm_support();
- if (has_vectors & FF_MM_SSE && HAVE_SSE)
+ int has_vectors = av_get_cpu_flags();
+ if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
s->dct32 = ff_dct32_float_sse;
}
#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
index 31176d6c9..b75ec0cc5 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/fft_mmx.asm
@@ -532,20 +532,15 @@ INIT_XMM
unpckhps xmm0, xmm2
%endmacro
-%macro PREROTATEW 3 ;addr1, addr2, xmm
- movlps %1, %3
- movhps %2, %3
-%endmacro
-
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
movaps xmm6, [%4+%1*2]
movaps %2, [%4+%1*2+0x10]
movaps %3, xmm6
movaps xmm7, %2
- mulps xmm6, [%5+%1*1]
- mulps %2, [%6+%1*1]
- mulps %3, [%6+%1*1]
- mulps xmm7, [%5+%1*1]
+ mulps xmm6, [%5+%1]
+ mulps %2, [%6+%1]
+ mulps %3, [%6+%1]
+ mulps xmm7, [%5+%1]
subps %2, xmm6
addps %3, xmm7
%endmacro
@@ -576,8 +571,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
%define rrevtab r10
%define rtcos r11
%define rtsin r12
- push r10
- push r11
push r12
push r13
push r14
@@ -620,21 +613,25 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
PREROTATER r4, r3, r2, rtcos, rtsin
%ifdef ARCH_X86_64
- movzx r5, word [rrevtab+r4*1-4]
- movzx r6, word [rrevtab+r4*1-2]
- movzx r13, word [rrevtab+r3*1]
- movzx r14, word [rrevtab+r3*1+2]
- PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0
- PREROTATEW [r1+r13*8], [r1+r14*8], xmm1
+ movzx r5, word [rrevtab+r4-4]
+ movzx r6, word [rrevtab+r4-2]
+ movzx r13, word [rrevtab+r3]
+ movzx r14, word [rrevtab+r3+2]
+ movlps [r1+r5 *8], xmm0
+ movhps [r1+r6 *8], xmm0
+ movlps [r1+r13*8], xmm1
+ movhps [r1+r14*8], xmm1
add r4, 4
%else
mov r6, [esp]
- movzx r5, word [r6+r4*1-4]
- movzx r4, word [r6+r4*1-2]
- PREROTATEW [r1+r5*8], [r1+r4*8], xmm0
- movzx r5, word [r6+r3*1]
- movzx r4, word [r6+r3*1+2]
- PREROTATEW [r1+r5*8], [r1+r4*8], xmm1
+ movzx r5, word [r6+r4-4]
+ movzx r4, word [r6+r4-2]
+ movlps [r1+r5*8], xmm0
+ movhps [r1+r4*8], xmm0
+ movzx r5, word [r6+r3]
+ movzx r4, word [r6+r3+2]
+ movlps [r1+r5*8], xmm1
+ movhps [r1+r4*8], xmm1
%endif
sub r3, 4
jns .pre
@@ -663,8 +660,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
pop r14
pop r13
pop r12
- pop r11
- pop r10
%else
add esp, 12
%endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_chromamc.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_chromamc.asm
new file mode 100644
index 000000000..6df82cc52
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_chromamc.asm
@@ -0,0 +1,671 @@
+;******************************************************************************
+;* MMX/SSSE3-optimized functions for H264 chroma MC
+;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
+;* 2005-2008 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+rnd_rv40_2d_tbl: times 4 dw 0
+ times 4 dw 16
+ times 4 dw 32
+ times 4 dw 16
+ times 4 dw 32
+ times 4 dw 28
+ times 4 dw 32
+ times 4 dw 28
+ times 4 dw 0
+ times 4 dw 32
+ times 4 dw 16
+ times 4 dw 32
+ times 4 dw 32
+ times 4 dw 28
+ times 4 dw 32
+ times 4 dw 28
+rnd_rv40_1d_tbl: times 4 dw 0
+ times 4 dw 2
+ times 4 dw 4
+ times 4 dw 2
+ times 4 dw 4
+ times 4 dw 3
+ times 4 dw 4
+ times 4 dw 3
+ times 4 dw 0
+ times 4 dw 4
+ times 4 dw 2
+ times 4 dw 4
+ times 4 dw 4
+ times 4 dw 3
+ times 4 dw 4
+ times 4 dw 3
+
+cextern pw_3
+cextern pw_4
+cextern pw_8
+cextern pw_28
+cextern pw_32
+cextern pw_64
+
+SECTION .text
+
+%macro mv0_pixels_mc8 0
+ lea r4, [r2*2 ]
+.next4rows
+ movq mm0, [r1 ]
+ movq mm1, [r1+r2]
+ CHROMAMC_AVG mm0, [r0 ]
+ CHROMAMC_AVG mm1, [r0+r2]
+ movq [r0 ], mm0
+ movq [r0+r2], mm1
+ add r0, r4
+ add r1, r4
+ movq mm0, [r1 ]
+ movq mm1, [r1+r2]
+ CHROMAMC_AVG mm0, [r0 ]
+ CHROMAMC_AVG mm1, [r0+r2]
+ add r1, r4
+ movq [r0 ], mm0
+ movq [r0+r2], mm1
+ add r0, r4
+ sub r3d, 4
+ jne .next4rows
+%endmacro
+
+%macro chroma_mc8_mmx_func 3
+; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
+; int stride, int h, int mx, int my)
+cglobal %1_%2_chroma_mc8_%3, 6, 7, 0
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+ mov r6d, r5d
+ or r6d, r4d
+ jne .at_least_one_non_zero
+ ; mx == 0 AND my == 0 - no filter needed
+ mv0_pixels_mc8
+ REP_RET
+
+.at_least_one_non_zero
+%ifidn %2, rv40
+%ifdef PIC
+%define rnd_1d_rv40 r11
+%define rnd_2d_rv40 r11
+%else ; no-PIC
+%define rnd_1d_rv40 rnd_rv40_1d_tbl
+%define rnd_2d_rv40 rnd_rv40_2d_tbl
+%endif
+%ifdef ARCH_X86_64
+ mov r10, r5
+ and r10, 6 ; &~1 for mx/my=[0,7]
+ lea r10, [r10*4+r4]
+ sar r10d, 1
+%define rnd_bias r10
+%define dest_reg r0
+%else ; x86-32
+ mov r0, r5
+ and r0, 6 ; &~1 for mx/my=[0,7]
+ lea r0, [r0*4+r4]
+ sar r0d, 1
+%define rnd_bias r0
+%define dest_reg r5
+%endif
+%else ; vc1, h264
+%define rnd_bias 0
+%define dest_reg r0
+%endif
+
+ test r5d, r5d
+ mov r6, 1
+ je .my_is_zero
+ test r4d, r4d
+ mov r6, r2 ; dxy = x ? 1 : stride
+ jne .both_non_zero
+.my_is_zero
+ ; mx == 0 XOR my == 0 - 1 dimensional filter only
+ or r4d, r5d ; x + y
+
+%ifidn %2, rv40
+%ifdef PIC
+ lea r11, [rnd_rv40_1d_tbl]
+%endif
+%ifndef ARCH_X86_64
+ mov r5, r0m
+%endif
+%endif
+
+ movd m5, r4d
+ movq m4, [pw_8]
+ movq m6, [rnd_1d_%2+rnd_bias*8] ; mm6 = rnd >> 3
+ punpcklwd m5, m5
+ punpckldq m5, m5 ; mm5 = B = x
+ pxor m7, m7
+ psubw m4, m5 ; mm4 = A = 8-x
+
+.next1drow
+ movq m0, [r1 ] ; mm0 = src[0..7]
+ movq m2, [r1+r6] ; mm1 = src[1..8]
+
+ movq m1, m0
+ movq m3, m2
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ pmullw m0, m4 ; [mm0,mm1] = A * src[0..7]
+ pmullw m1, m4
+ pmullw m2, m5 ; [mm2,mm3] = B * src[1..8]
+ pmullw m3, m5
+
+ paddw m0, m6
+ paddw m1, m6
+ paddw m0, m2
+ paddw m1, m3
+ psrlw m0, 3
+ psrlw m1, 3
+ packuswb m0, m1
+ CHROMAMC_AVG m0, [dest_reg]
+ movq [dest_reg], m0 ; dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3
+
+ add dest_reg, r2
+ add r1, r2
+ dec r3d
+ jne .next1drow
+ REP_RET
+
+.both_non_zero ; general case, bilinear
+ movd m4, r4d ; x
+ movd m6, r5d ; y
+%ifidn %2, rv40
+%ifdef PIC
+ lea r11, [rnd_rv40_2d_tbl]
+%endif
+%ifndef ARCH_X86_64
+ mov r5, r0m
+%endif
+%endif
+ mov r6, rsp ; backup stack pointer
+ and rsp, ~(mmsize-1) ; align stack
+ sub rsp, 16 ; AA and DD
+
+ punpcklwd m4, m4
+ punpcklwd m6, m6
+ punpckldq m4, m4 ; mm4 = x words
+ punpckldq m6, m6 ; mm6 = y words
+ movq m5, m4
+ pmullw m4, m6 ; mm4 = x * y
+ psllw m5, 3
+ psllw m6, 3
+ movq m7, m5
+ paddw m7, m6
+ movq [rsp+8], m4 ; DD = x * y
+ psubw m5, m4 ; mm5 = B = 8x - xy
+ psubw m6, m4 ; mm6 = C = 8y - xy
+ paddw m4, [pw_64]
+ psubw m4, m7 ; mm4 = A = xy - (8x+8y) + 64
+ pxor m7, m7
+ movq [rsp ], m4
+
+ movq m0, [r1 ] ; mm0 = src[0..7]
+ movq m1, [r1+1] ; mm1 = src[1..8]
+.next2drow
+ add r1, r2
+
+ movq m2, m0
+ movq m3, m1
+ punpckhbw m0, m7
+ punpcklbw m1, m7
+ punpcklbw m2, m7
+ punpckhbw m3, m7
+ pmullw m0, [rsp]
+ pmullw m2, [rsp]
+ pmullw m1, m5
+ pmullw m3, m5
+ paddw m2, m1 ; mm2 = A * src[0..3] + B * src[1..4]
+ paddw m3, m0 ; mm3 = A * src[4..7] + B * src[5..8]
+
+ movq m0, [r1]
+ movq m1, m0
+ punpcklbw m0, m7
+ punpckhbw m1, m7
+ pmullw m0, m6
+ pmullw m1, m6
+ paddw m2, m0
+ paddw m3, m1 ; [mm2,mm3] += C * src[0..7]
+
+ movq m1, [r1+1]
+ movq m0, m1
+ movq m4, m1
+ punpcklbw m0, m7
+ punpckhbw m4, m7
+ pmullw m0, [rsp+8]
+ pmullw m4, [rsp+8]
+ paddw m2, m0
+ paddw m3, m4 ; [mm2,mm3] += D * src[1..8]
+ movq m0, [r1]
+
+ paddw m2, [rnd_2d_%2+rnd_bias*8]
+ paddw m3, [rnd_2d_%2+rnd_bias*8]
+ psrlw m2, 6
+ psrlw m3, 6
+ packuswb m2, m3
+ CHROMAMC_AVG m2, [dest_reg]
+ movq [dest_reg], m2 ; dst[0..7] = ([mm2,mm3] + rnd) >> 6
+
+ add dest_reg, r2
+ dec r3d
+ jne .next2drow
+ mov rsp, r6 ; restore stack pointer
+ RET
+%endmacro
+
+%macro chroma_mc4_mmx_func 3
+cglobal %1_%2_chroma_mc4_%3, 6, 6, 0
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+ pxor m7, m7
+ movd m2, r4d ; x
+ movd m3, r5d ; y
+ movq m4, [pw_8]
+ movq m5, [pw_8]
+ punpcklwd m2, m2
+ punpcklwd m3, m3
+ punpcklwd m2, m2
+ punpcklwd m3, m3
+ psubw m4, m2
+ psubw m5, m3
+
+%ifidn %2, rv40
+%ifdef PIC
+ lea r11, [rnd_rv40_2d_tbl]
+%define rnd_2d_rv40 r11
+%else
+%define rnd_2d_rv40 rnd_rv40_2d_tbl
+%endif
+ and r5, 6 ; &~1 for mx/my=[0,7]
+ lea r5, [r5*4+r4]
+ sar r5d, 1
+%define rnd_bias r5
+%else ; vc1, h264
+%define rnd_bias 0
+%endif
+
+ movd m0, [r1 ]
+ movd m6, [r1+1]
+ add r1, r2
+ punpcklbw m0, m7
+ punpcklbw m6, m7
+ pmullw m0, m4
+ pmullw m6, m2
+ paddw m6, m0
+
+.next2rows
+ movd m0, [r1 ]
+ movd m1, [r1+1]
+ add r1, r2
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ pmullw m0, m4
+ pmullw m1, m2
+ paddw m1, m0
+ movq m0, m1
+
+ pmullw m6, m5
+ pmullw m1, m3
+ paddw m6, [rnd_2d_%2+rnd_bias*8]
+ paddw m1, m6
+ psrlw m1, 6
+ packuswb m1, m1
+ CHROMAMC_AVG4 m1, m6, [r0]
+ movd [r0], m1
+ add r0, r2
+
+ movd m6, [r1 ]
+ movd m1, [r1+1]
+ add r1, r2
+ punpcklbw m6, m7
+ punpcklbw m1, m7
+ pmullw m6, m4
+ pmullw m1, m2
+ paddw m1, m6
+ movq m6, m1
+ pmullw m0, m5
+ pmullw m1, m3
+ paddw m0, [rnd_2d_%2+rnd_bias*8]
+ paddw m1, m0
+ psrlw m1, 6
+ packuswb m1, m1
+ CHROMAMC_AVG4 m1, m0, [r0]
+ movd [r0], m1
+ add r0, r2
+ sub r3d, 2
+ jnz .next2rows
+ REP_RET
+%endmacro
+
+%macro chroma_mc2_mmx_func 3
+cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+
+ mov r6d, r4d
+ shl r4d, 16
+ sub r4d, r6d
+ add r4d, 8
+ imul r5d, r4d ; x*y<<16 | y*(8-x)
+ shl r4d, 3
+ sub r4d, r5d ; x*(8-y)<<16 | (8-x)*(8-y)
+
+ movd m5, r4d
+ movd m6, r5d
+ punpckldq m5, m5 ; mm5 = {A,B,A,B}
+ punpckldq m6, m6 ; mm6 = {C,D,C,D}
+ pxor m7, m7
+ movd m2, [r1]
+ punpcklbw m2, m7
+ pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
+
+.nextrow
+ add r1, r2
+ movq m1, m2
+ pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
+ movd m0, [r1]
+ punpcklbw m0, m7
+ pshufw m0, m0, 0x94 ; mm0 = src[0,1,1,2]
+ movq m2, m0
+ pmaddwd m0, m6
+ paddw m1, [rnd_2d_%2]
+ paddw m1, m0 ; mm1 += C * src[0,1] + D * src[1,2]
+ psrlw m1, 6
+ packssdw m1, m7
+ packuswb m1, m7
+ CHROMAMC_AVG4 m1, m3, [r0]
+ movd r5d, m1
+ mov [r0], r5w
+ add r0, r2
+ sub r3d, 1
+ jnz .nextrow
+ REP_RET
+%endmacro
+
+%define rnd_1d_h264 pw_4
+%define rnd_2d_h264 pw_32
+%define rnd_1d_vc1 pw_3
+%define rnd_2d_vc1 pw_28
+
+%macro NOTHING 2-3
+%endmacro
+%macro DIRECT_AVG 2
+ PAVG %1, %2
+%endmacro
+%macro COPY_AVG 3
+ movd %2, %3
+ PAVG %1, %2
+%endmacro
+
+INIT_MMX
+%define CHROMAMC_AVG NOTHING
+%define CHROMAMC_AVG4 NOTHING
+chroma_mc8_mmx_func put, h264, mmx_rnd
+chroma_mc8_mmx_func put, vc1, mmx_nornd
+chroma_mc8_mmx_func put, rv40, mmx
+chroma_mc4_mmx_func put, h264, mmx
+chroma_mc4_mmx_func put, rv40, mmx
+chroma_mc2_mmx_func put, h264, mmx2
+
+%define CHROMAMC_AVG DIRECT_AVG
+%define CHROMAMC_AVG4 COPY_AVG
+%define PAVG pavgb
+chroma_mc8_mmx_func avg, h264, mmx2_rnd
+chroma_mc8_mmx_func avg, vc1, mmx2_nornd
+chroma_mc8_mmx_func avg, rv40, mmx2
+chroma_mc4_mmx_func avg, h264, mmx2
+chroma_mc4_mmx_func avg, rv40, mmx2
+chroma_mc2_mmx_func avg, h264, mmx2
+
+%define PAVG pavgusb
+chroma_mc8_mmx_func avg, h264, 3dnow_rnd
+chroma_mc8_mmx_func avg, vc1, 3dnow_nornd
+chroma_mc8_mmx_func avg, rv40, 3dnow
+chroma_mc4_mmx_func avg, h264, 3dnow
+chroma_mc4_mmx_func avg, rv40, 3dnow
+
+%macro chroma_mc8_ssse3_func 3
+cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+ mov r6d, r5d
+ or r6d, r4d
+ jne .at_least_one_non_zero
+ ; mx == 0 AND my == 0 - no filter needed
+ mv0_pixels_mc8
+ REP_RET
+
+.at_least_one_non_zero
+ test r5d, r5d
+ je .my_is_zero
+ test r4d, r4d
+ je .mx_is_zero
+
+ ; general case, bilinear
+ mov r6d, r4d
+ shl r4d, 8
+ sub r4, r6
+ add r4, 8 ; x*288+8 = x<<8 | (8-x)
+ mov r6, 8
+ sub r6d, r5d
+ imul r6, r4 ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
+ imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
+
+ movd m7, r6d
+ movd m6, r4d
+ movdqa m5, [rnd_2d_%2]
+ pshuflw m7, m7, 0
+ pshuflw m6, m6, 0
+ movlhps m7, m7
+ movlhps m6, m6
+
+ movq m0, [r1 ]
+ movq m1, [r1 +1]
+ punpcklbw m0, m1
+ add r1, r2
+.next2rows
+ movq m1, [r1 ]
+ movq m2, [r1 +1]
+ movq m3, [r1+r2 ]
+ movq m4, [r1+r2+1]
+ lea r1, [r1+r2*2]
+ punpcklbw m1, m2
+ punpcklbw m3, m4
+ movdqa m2, m1
+ movdqa m4, m3
+ pmaddubsw m0, m7
+ pmaddubsw m1, m6
+ pmaddubsw m2, m7
+ pmaddubsw m3, m6
+ paddw m0, m5
+ paddw m2, m5
+ paddw m1, m0
+ paddw m3, m2
+ movdqa m0, m4
+ psrlw m1, 6
+ psrlw m3, 6
+%ifidn %1, avg
+ movq m2, [r0 ]
+ movhps m2, [r0+r2]
+%endif
+ packuswb m1, m3
+ CHROMAMC_AVG m1, m2
+ movq [r0 ], m1
+ movhps [r0+r2], m1
+ sub r3d, 2
+ lea r0, [r0+r2*2]
+ jg .next2rows
+ REP_RET
+
+.my_is_zero
+ mov r5d, r4d
+ shl r4d, 8
+ add r4, 8
+ sub r4, r5 ; 255*x+8 = x<<8 | (8-x)
+ movd m7, r4d
+ movq m6, [rnd_1d_%2]
+ pshuflw m7, m7, 0
+ movlhps m6, m6
+ movlhps m7, m7
+
+.next2xrows
+ movq m0, [r1 ]
+ movq m1, [r1 +1]
+ movq m2, [r1+r2 ]
+ movq m3, [r1+r2+1]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ pmaddubsw m0, m7
+ pmaddubsw m2, m7
+%ifidn %1, avg
+ movq m4, [r0 ]
+ movhps m4, [r0+r2]
+%endif
+ paddw m0, m6
+ paddw m2, m6
+ psrlw m0, 3
+ psrlw m2, 3
+ packuswb m0, m2
+ CHROMAMC_AVG m0, m4
+ movq [r0 ], m0
+ movhps [r0+r2], m0
+ sub r3d, 2
+ lea r0, [r0+r2*2]
+ lea r1, [r1+r2*2]
+ jg .next2xrows
+ REP_RET
+
+.mx_is_zero
+ mov r4d, r5d
+ shl r5d, 8
+ add r5, 8
+ sub r5, r4 ; 255*y+8 = y<<8 | (8-y)
+ movd m7, r5d
+ movq m6, [rnd_1d_%2]
+ pshuflw m7, m7, 0
+ movlhps m6, m6
+ movlhps m7, m7
+
+.next2yrows
+ movq m0, [r1 ]
+ movq m1, [r1+r2 ]
+ movdqa m2, m1
+ movq m3, [r1+r2*2]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ pmaddubsw m0, m7
+ pmaddubsw m2, m7
+%ifidn %1, avg
+ movq m4, [r0 ]
+ movhps m4, [r0+r2]
+%endif
+ paddw m0, m6
+ paddw m2, m6
+ psrlw m0, 3
+ psrlw m2, 3
+ packuswb m0, m2
+ CHROMAMC_AVG m0, m4
+ movq [r0 ], m0
+ movhps [r0+r2], m0
+ sub r3d, 2
+ lea r0, [r0+r2*2]
+ lea r1, [r1+r2*2]
+ jg .next2yrows
+ REP_RET
+%endmacro
+
+%macro chroma_mc4_ssse3_func 3
+cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
+%ifdef ARCH_X86_64
+ movsxd r2, r2d
+%endif
+ mov r6, r4
+ shl r4d, 8
+ sub r4d, r6d
+ add r4d, 8 ; x*288+8
+ mov r6, 8
+ sub r6d, r5d
+ imul r6d, r4d ; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
+ imul r4d, r5d ; y *(x*255+8) = y *x<<8 | y *(8-x)
+
+ movd m7, r6d
+ movd m6, r4d
+ movq m5, [pw_32]
+ pshufw m7, m7, 0
+ pshufw m6, m6, 0
+
+ movd m0, [r1 ]
+ punpcklbw m0, [r1 +1]
+ add r1, r2
+.next2rows
+ movd m1, [r1 ]
+ movd m3, [r1+r2 ]
+ punpcklbw m1, [r1 +1]
+ punpcklbw m3, [r1+r2+1]
+ lea r1, [r1+r2*2]
+ movq m2, m1
+ movq m4, m3
+ pmaddubsw m0, m7
+ pmaddubsw m1, m6
+ pmaddubsw m2, m7
+ pmaddubsw m3, m6
+ paddw m0, m5
+ paddw m2, m5
+ paddw m1, m0
+ paddw m3, m2
+ movq m0, m4
+ psrlw m1, 6
+ psrlw m3, 6
+ packuswb m1, m1
+ packuswb m3, m3
+ CHROMAMC_AVG m1, [r0 ]
+ CHROMAMC_AVG m3, [r0+r2]
+ movd [r0 ], m1
+ movd [r0+r2], m3
+ sub r3d, 2
+ lea r0, [r0+r2*2]
+ jg .next2rows
+ REP_RET
+%endmacro
+
+%define CHROMAMC_AVG NOTHING
+INIT_XMM
+chroma_mc8_ssse3_func put, h264, ssse3_rnd
+chroma_mc8_ssse3_func put, vc1, ssse3_nornd
+INIT_MMX
+chroma_mc4_ssse3_func put, h264, ssse3
+
+%define CHROMAMC_AVG DIRECT_AVG
+%define PAVG pavgb
+INIT_XMM
+chroma_mc8_ssse3_func avg, h264, ssse3_rnd
+chroma_mc8_ssse3_func avg, vc1, ssse3_nornd
+INIT_MMX
+chroma_mc4_ssse3_func avg, h264, ssse3
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock_sse2.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock.asm
index a9e6dea3d..fb9cacfd1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock_sse2.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_deblock.asm
@@ -4,6 +4,7 @@
;* Copyright (C) 2005-2008 x264 project
;*
;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;* Jason Garrett-Glaser <darkshikari@gmail.com>
;*
;* This file is part of FFmpeg.
;*
@@ -23,12 +24,14 @@
;******************************************************************************
%include "x86inc.asm"
+%include "x86util.asm"
SECTION_RODATA
-pb_00: times 16 db 0x00
-pb_01: times 16 db 0x01
-pb_03: times 16 db 0x03
-pb_a1: times 16 db 0xa1
+
+cextern pb_0
+cextern pb_1
+cextern pb_3
+cextern pb_A1
SECTION .text
@@ -104,7 +107,7 @@ SECTION .text
movd %8, m5
%endmacro
-%macro SBUTTERFLY 4
+%macro SBUTTERFLY3 4
movq %4, %2
punpckl%1 %2, %3
punpckh%1 %4, %3
@@ -120,19 +123,19 @@ SECTION .text
movq m4, %5
movq m5, %6
movq m6, %7
- SBUTTERFLY bw, m0, m1, m7
- SBUTTERFLY bw, m2, m3, m1
- SBUTTERFLY bw, m4, m5, m3
+ SBUTTERFLY3 bw, m0, m1, m7
+ SBUTTERFLY3 bw, m2, m3, m1
+ SBUTTERFLY3 bw, m4, m5, m3
movq [%9+0x10], m1
- SBUTTERFLY bw, m6, %8, m5
- SBUTTERFLY wd, m0, m2, m1
- SBUTTERFLY wd, m4, m6, m2
+ SBUTTERFLY3 bw, m6, %8, m5
+ SBUTTERFLY3 wd, m0, m2, m1
+ SBUTTERFLY3 wd, m4, m6, m2
punpckhdq m0, m4
movq [%9+0x00], m0
- SBUTTERFLY wd, m7, [%9+0x10], m6
- SBUTTERFLY wd, m3, m5, m4
- SBUTTERFLY dq, m7, m3, m0
- SBUTTERFLY dq, m1, m2, m5
+ SBUTTERFLY3 wd, m7, [%9+0x10], m6
+ SBUTTERFLY3 wd, m3, m5, m4
+ SBUTTERFLY3 dq, m7, m3, m0
+ SBUTTERFLY3 dq, m1, m2, m5
punpckldq m6, m4
movq [%9+0x10], m1
movq [%9+0x20], m5
@@ -151,25 +154,25 @@ SECTION .text
movq m4, %5
movq m5, %6
movq m6, %7
- SBUTTERFLY bw, m0, m1, m7
- SBUTTERFLY bw, m2, m3, m1
- SBUTTERFLY bw, m4, m5, m3
- SBUTTERFLY bw, m6, %8, m5
+ SBUTTERFLY3 bw, m0, m1, m7
+ SBUTTERFLY3 bw, m2, m3, m1
+ SBUTTERFLY3 bw, m4, m5, m3
+ SBUTTERFLY3 bw, m6, %8, m5
movq %9, m3
- SBUTTERFLY wd, m0, m2, m3
- SBUTTERFLY wd, m4, m6, m2
- SBUTTERFLY wd, m7, m1, m6
+ SBUTTERFLY3 wd, m0, m2, m3
+ SBUTTERFLY3 wd, m4, m6, m2
+ SBUTTERFLY3 wd, m7, m1, m6
movq %11, m2
movq m2, %9
- SBUTTERFLY wd, m2, m5, m1
- SBUTTERFLY dq, m0, m4, m5
- SBUTTERFLY dq, m7, m2, m4
+ SBUTTERFLY3 wd, m2, m5, m1
+ SBUTTERFLY3 dq, m0, m4, m5
+ SBUTTERFLY3 dq, m7, m2, m4
movq %9, m0
movq %10, m5
movq %13, m7
movq %14, m4
- SBUTTERFLY dq, m3, %11, m0
- SBUTTERFLY dq, m6, m1, m5
+ SBUTTERFLY3 dq, m3, %11, m0
+ SBUTTERFLY3 dq, m6, m1, m5
movq %11, m3
movq %12, m0
movq %15, m6
@@ -235,19 +238,19 @@ SECTION .text
; clobbers: m0,3-6
%macro DEBLOCK_P0_Q0 0
mova m5, m1
- pxor m5, m2 ; p0^q0
- pand m5, [pb_01] ; (p0^q0)&1
+ pxor m5, m2 ; p0^q0
+ pand m5, [pb_1] ; (p0^q0)&1
pcmpeqb m4, m4
pxor m3, m4
- pavgb m3, m0 ; (p1 - q1 + 256)>>1
- pavgb m3, [pb_03] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
+ pavgb m3, m0 ; (p1 - q1 + 256)>>1
+ pavgb m3, [pb_3] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pxor m4, m1
- pavgb m4, m2 ; (q0 - p0 + 256)>>1
+ pavgb m4, m2 ; (q0 - p0 + 256)>>1
pavgb m3, m5
- paddusb m3, m4 ; d+128+33
- mova m6, [pb_a1]
+ paddusb m3, m4 ; d+128+33
+ mova m6, [pb_A1]
psubusb m6, m3
- psubusb m3, [pb_a1]
+ psubusb m3, [pb_A1]
pminub m6, m7
pminub m3, m7
psubusb m1, m6
@@ -263,10 +266,10 @@ SECTION .text
%macro LUMA_Q1 6
mova %6, m1
pavgb %6, m2
- pavgb %2, %6 ; avg(p2,avg(p0,q0))
+ pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3
- pand %6, [pb_01] ; (p2^avg(p0,q0))&1
- psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
+ pand %6, [pb_1] ; (p2^avg(p0,q0))&1
+ psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
mova %6, %1
psubusb %6, %5
paddusb %5, %1
@@ -495,6 +498,8 @@ cglobal x264_deblock_h_luma_%1, 0,5
RET
%endmacro ; DEBLOCK_LUMA
+INIT_MMX
+DEBLOCK_LUMA mmxext, v8, 8
INIT_XMM
DEBLOCK_LUMA sse2, v, 16
@@ -517,9 +522,9 @@ DEBLOCK_LUMA sse2, v, 16
mova t3, t2
mova t4, t2
psrlw t2, 1
- pavgb t2, mpb_00
+ pavgb t2, mpb_0
pxor t2, t0
- pand t2, mpb_01
+ pand t2, mpb_1
psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
mova t1, p2
@@ -528,21 +533,21 @@ DEBLOCK_LUMA sse2, v, 16
psubb t2, q1
paddb t3, t3
psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1
- pand t2, mpb_01
+ pand t2, mpb_1
psubb t1, t2
pavgb t1, p1
pavgb t1, t5 ; (((p2+q1)/2 + p1+1)/2 + (p0+q0+1)/2 + 1)/2
psrlw t3, 2
- pavgb t3, mpb_00
+ pavgb t3, mpb_0
pxor t3, t1
- pand t3, mpb_01
+ pand t3, mpb_1
psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8
mova t3, p0
mova t2, p0
pxor t3, q1
pavgb t2, q1
- pand t3, mpb_01
+ pand t3, mpb_1
psubb t2, t3
pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4
@@ -562,9 +567,9 @@ DEBLOCK_LUMA sse2, v, 16
paddb t2, t2
paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0
psrlw t2, 2
- pavgb t2, mpb_00
+ pavgb t2, mpb_0
pxor t2, t1
- pand t2, mpb_01
+ pand t2, mpb_1
psubb t1, t2 ; p2' = (2*p3+3*p2+p1+p0+q0+4)/8
pxor t0, p1
@@ -603,8 +608,8 @@ DEBLOCK_LUMA sse2, v, 16
%define mask0 m12
%define mask1p m13
%define mask1q [rsp-24]
- %define mpb_00 m14
- %define mpb_01 m15
+ %define mpb_0 m14
+ %define mpb_1 m15
%else
%define spill(x) [esp+16*x+((stack_offset+4)&15)]
%define p2 [r4+r1]
@@ -614,8 +619,8 @@ DEBLOCK_LUMA sse2, v, 16
%define mask0 spill(2)
%define mask1p spill(3)
%define mask1q spill(4)
- %define mpb_00 [pb_00]
- %define mpb_01 [pb_01]
+ %define mpb_0 [pb_0]
+ %define mpb_1 [pb_1]
%endif
;-----------------------------------------------------------------------------
@@ -638,12 +643,12 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16
mova q0, [r0]
mova q1, [r0+r1]
%ifdef ARCH_X86_64
- pxor mpb_00, mpb_00
- mova mpb_01, [pb_01]
+ pxor mpb_0, mpb_0
+ mova mpb_1, [pb_1]
LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
SWAP 7, 12 ; m12=mask0
- pavgb t5, mpb_00
- pavgb t5, mpb_01 ; alpha/4+1
+ pavgb t5, mpb_0
+ pavgb t5, mpb_1 ; alpha/4+1
movdqa p2, [r4+r1]
movdqa q2, [r0+2*r1]
DIFF_GT2 p0, q0, t5, t0, t3 ; t0 = |p0-q0| > alpha/4+1
@@ -658,8 +663,8 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16
LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
mova m4, t5
mova mask0, m7
- pavgb m4, [pb_00]
- pavgb m4, [pb_01] ; alpha/4+1
+ pavgb m4, [pb_0]
+ pavgb m4, [pb_1] ; alpha/4+1
DIFF_GT2 p0, q0, m4, m6, m7 ; m6 = |p0-q0| > alpha/4+1
pand m6, mask0
DIFF_GT2 p0, p2, m5, m4, m7 ; m4 = |p2-p0| > beta-1
@@ -759,3 +764,126 @@ DEBLOCK_LUMA_INTRA sse2, v
INIT_MMX
DEBLOCK_LUMA_INTRA mmxext, v8
%endif
+
+
+
+INIT_MMX
+
+%macro CHROMA_V_START 0
+ dec r2d ; alpha-1
+ dec r3d ; beta-1
+ mov t5, r0
+ sub t5, r1
+ sub t5, r1
+%endmacro
+
+%macro CHROMA_H_START 0
+ dec r2d
+ dec r3d
+ sub r0, 2
+ lea t6, [r1*3]
+ mov t5, r0
+ add r0, t6
+%endmacro
+
+%define t5 r5
+%define t6 r6
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_v_chroma_mmxext, 5,6
+ CHROMA_V_START
+ movq m0, [t5]
+ movq m1, [t5+r1]
+ movq m2, [r0]
+ movq m3, [r0+r1]
+ call x264_chroma_inter_body_mmxext
+ movq [t5+r1], m1
+ movq [r0], m2
+ RET
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_h_chroma_mmxext, 5,7
+%ifdef ARCH_X86_64
+ %define buf0 [rsp-24]
+ %define buf1 [rsp-16]
+%else
+ %define buf0 r0m
+ %define buf1 r2m
+%endif
+ CHROMA_H_START
+ TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6)
+ movq buf0, m0
+ movq buf1, m3
+ call x264_chroma_inter_body_mmxext
+ movq m0, buf0
+ movq m3, buf1
+ TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6)
+ RET
+
+ALIGN 16
+x264_chroma_inter_body_mmxext:
+ LOAD_MASK r2d, r3d
+ movd m6, [r4] ; tc0
+ punpcklbw m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
+ ret
+
+
+
+; in: %1=p0 %2=p1 %3=q1
+; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
+%macro CHROMA_INTRA_P0 3
+ movq m4, %1
+ pxor m4, %3
+ pand m4, [pb_1] ; m4 = (p0^q1)&1
+ pavgb %1, %3
+ psubusb %1, m4
+ pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
+%endmacro
+
+%define t5 r4
+%define t6 r5
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_v_chroma_intra_mmxext, 4,5
+ CHROMA_V_START
+ movq m0, [t5]
+ movq m1, [t5+r1]
+ movq m2, [r0]
+ movq m3, [r0+r1]
+ call x264_chroma_intra_body_mmxext
+ movq [t5+r1], m1
+ movq [r0], m2
+ RET
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_h_chroma_intra_mmxext, 4,6
+ CHROMA_H_START
+ TRANSPOSE4x8_LOAD PASS8ROWS(t5, r0, r1, t6)
+ call x264_chroma_intra_body_mmxext
+ TRANSPOSE8x4_STORE PASS8ROWS(t5, r0, r1, t6)
+ RET
+
+ALIGN 16
+x264_chroma_intra_body_mmxext:
+ LOAD_MASK r2d, r3d
+ movq m5, m1
+ movq m6, m2
+ CHROMA_INTRA_P0 m1, m0, m3
+ CHROMA_INTRA_P0 m2, m3, m0
+ psubb m1, m5
+ psubb m2, m6
+ pand m1, m7
+ pand m2, m7
+ paddb m1, m5
+ paddb m2, m6
+ ret
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm
new file mode 100644
index 000000000..3311ab559
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct.asm
@@ -0,0 +1,865 @@
+;*****************************************************************************
+;* MMX/SSE2-optimized H.264 iDCT
+;*****************************************************************************
+;* Copyright (C) 2004-2005 Michael Niedermayer, Loren Merritt
+;* Copyright (C) 2003-2008 x264 project
+;*
+;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
+;* Loren Merritt <lorenm@u.washington.edu>
+;* Holger Lubitz <hal@duncan.ol.sub.de>
+;* Min Chen <chenm001.163.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
+ db 6+1*8, 7+1*8, 6+2*8, 7+2*8
+ db 4+3*8, 5+3*8, 4+4*8, 5+4*8
+ db 6+3*8, 7+3*8, 6+4*8, 7+4*8
+ db 1+1*8, 2+1*8
+ db 1+2*8, 2+2*8
+ db 1+4*8, 2+4*8
+ db 1+5*8, 2+5*8
+%ifdef PIC
+%define scan8 r11
+%else
+%define scan8 scan8_mem
+%endif
+
+cextern pw_32
+
+SECTION .text
+
+; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
+%macro IDCT4_ADD 3
+ ; Load dct coeffs
+ movq m0, [%2]
+ movq m1, [%2+8]
+ movq m2, [%2+16]
+ movq m3, [%2+24]
+
+ IDCT4_1D 0, 1, 2, 3, 4, 5
+ mova m6, [pw_32]
+ TRANSPOSE4x4W 0, 1, 2, 3, 4
+ paddw m0, m6
+ IDCT4_1D 0, 1, 2, 3, 4, 5
+ pxor m7, m7
+
+ STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
+ lea %1, [%1+%3*2]
+ STORE_DIFFx2 m2, m3, m4, m5, m7, 6, %1, %3
+%endmacro
+
+INIT_MMX
+; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
+cglobal h264_idct_add_mmx, 3, 3, 0
+ IDCT4_ADD r0, r1, r2
+ RET
+
+%macro IDCT8_1D 2
+ mova m4, m5
+ mova m0, m1
+ psraw m4, 1
+ psraw m1, 1
+ paddw m4, m5
+ paddw m1, m0
+ paddw m4, m7
+ paddw m1, m5
+ psubw m4, m0
+ paddw m1, m3
+
+ psubw m0, m3
+ psubw m5, m3
+ paddw m0, m7
+ psubw m5, m7
+ psraw m3, 1
+ psraw m7, 1
+ psubw m0, m3
+ psubw m5, m7
+
+ mova m3, m4
+ mova m7, m1
+ psraw m1, 2
+ psraw m3, 2
+ paddw m3, m0
+ psraw m0, 2
+ paddw m1, m5
+ psraw m5, 2
+ psubw m0, m4
+ psubw m7, m5
+
+ mova m4, m2
+ mova m5, m6
+ psraw m4, 1
+ psraw m6, 1
+ psubw m4, m5
+ paddw m6, m2
+
+ mova m2, %1
+ mova m5, %2
+ SUMSUB_BA m5, m2
+ SUMSUB_BA m6, m5
+ SUMSUB_BA m4, m2
+ SUMSUB_BA m7, m6
+ SUMSUB_BA m0, m4
+ SUMSUB_BA m3, m2
+ SUMSUB_BA m1, m5
+ SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
+%endmacro
+
+%macro IDCT8_1D_FULL 1
+ mova m7, [%1+112]
+ mova m6, [%1+ 96]
+ mova m5, [%1+ 80]
+ mova m3, [%1+ 48]
+ mova m2, [%1+ 32]
+ mova m1, [%1+ 16]
+ IDCT8_1D [%1], [%1+ 64]
+%endmacro
+
+; %1=int16_t *block, %2=int16_t *dstblock
+%macro IDCT8_ADD_MMX_START 2
+ IDCT8_1D_FULL %1
+ mova [%1], m7
+ TRANSPOSE4x4W 0, 1, 2, 3, 7
+ mova m7, [%1]
+ mova [%2 ], m0
+ mova [%2+16], m1
+ mova [%2+32], m2
+ mova [%2+48], m3
+ TRANSPOSE4x4W 4, 5, 6, 7, 3
+ mova [%2+ 8], m4
+ mova [%2+24], m5
+ mova [%2+40], m6
+ mova [%2+56], m7
+%endmacro
+
+; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
+%macro IDCT8_ADD_MMX_END 3
+ IDCT8_1D_FULL %2
+ mova [%2 ], m5
+ mova [%2+16], m6
+ mova [%2+32], m7
+
+ pxor m7, m7
+ STORE_DIFFx2 m0, m1, m5, m6, m7, 6, %1, %3
+ lea %1, [%1+%3*2]
+ STORE_DIFFx2 m2, m3, m5, m6, m7, 6, %1, %3
+ mova m0, [%2 ]
+ mova m1, [%2+16]
+ mova m2, [%2+32]
+ lea %1, [%1+%3*2]
+ STORE_DIFFx2 m4, m0, m5, m6, m7, 6, %1, %3
+ lea %1, [%1+%3*2]
+ STORE_DIFFx2 m1, m2, m5, m6, m7, 6, %1, %3
+%endmacro
+
+INIT_MMX
+; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
+cglobal h264_idct8_add_mmx, 3, 4, 0
+ %assign pad 128+4-(stack_offset&7)
+ SUB rsp, pad
+
+ add word [r1], 32
+ IDCT8_ADD_MMX_START r1 , rsp
+ IDCT8_ADD_MMX_START r1+8, rsp+64
+ lea r3, [r0+4]
+ IDCT8_ADD_MMX_END r0 , rsp, r2
+ IDCT8_ADD_MMX_END r3 , rsp+8, r2
+
+ ADD rsp, pad
+ RET
+
+; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
+%macro IDCT8_ADD_SSE 4
+ IDCT8_1D_FULL %2
+%ifdef ARCH_X86_64
+ TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+ TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16]
+%endif
+ paddw m0, [pw_32]
+
+%ifndef ARCH_X86_64
+ mova [%2 ], m0
+ mova [%2+16], m4
+ IDCT8_1D [%2], [%2+ 16]
+ mova [%2 ], m6
+ mova [%2+16], m7
+%else
+ SWAP 0, 8
+ SWAP 4, 9
+ IDCT8_1D m8, m9
+ SWAP 6, 8
+ SWAP 7, 9
+%endif
+
+ pxor m7, m7
+ lea %4, [%3*3]
+ STORE_DIFF m0, m6, m7, [%1 ]
+ STORE_DIFF m1, m6, m7, [%1+%3 ]
+ STORE_DIFF m2, m6, m7, [%1+%3*2]
+ STORE_DIFF m3, m6, m7, [%1+%4 ]
+%ifndef ARCH_X86_64
+ mova m0, [%2 ]
+ mova m1, [%2+16]
+%else
+ SWAP 0, 8
+ SWAP 1, 9
+%endif
+ lea %1, [%1+%3*4]
+ STORE_DIFF m4, m6, m7, [%1 ]
+ STORE_DIFF m5, m6, m7, [%1+%3 ]
+ STORE_DIFF m0, m6, m7, [%1+%3*2]
+ STORE_DIFF m1, m6, m7, [%1+%4 ]
+%endmacro
+
+INIT_XMM
+; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
+cglobal h264_idct8_add_sse2, 3, 4, 10
+ IDCT8_ADD_SSE r0, r1, r2, r3
+ RET
+
+%macro DC_ADD_MMX2_INIT 2-3
+%if %0 == 2
+ movsx %1, word [%1]
+ add %1, 32
+ sar %1, 6
+ movd m0, %1
+ lea %1, [%2*3]
+%else
+ add %3, 32
+ sar %3, 6
+ movd m0, %3
+ lea %3, [%2*3]
+%endif
+ pshufw m0, m0, 0
+ pxor m1, m1
+ psubw m1, m0
+ packuswb m0, m0
+ packuswb m1, m1
+%endmacro
+
+%macro DC_ADD_MMX2_OP 3-4
+ %1 m2, [%2 ]
+ %1 m3, [%2+%3 ]
+ %1 m4, [%2+%3*2]
+ %1 m5, [%2+%4 ]
+ paddusb m2, m0
+ paddusb m3, m0
+ paddusb m4, m0
+ paddusb m5, m0
+ psubusb m2, m1
+ psubusb m3, m1
+ psubusb m4, m1
+ psubusb m5, m1
+ %1 [%2 ], m2
+ %1 [%2+%3 ], m3
+ %1 [%2+%3*2], m4
+ %1 [%2+%4 ], m5
+%endmacro
+
+INIT_MMX
+; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+cglobal h264_idct_dc_add_mmx2, 3, 3, 0
+ DC_ADD_MMX2_INIT r1, r2
+ DC_ADD_MMX2_OP movh, r0, r2, r1
+ RET
+
+; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
+ DC_ADD_MMX2_INIT r1, r2
+ DC_ADD_MMX2_OP mova, r0, r2, r1
+ lea r0, [r0+r2*4]
+ DC_ADD_MMX2_OP mova, r0, r2, r1
+ RET
+
+; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16_mmx, 5, 7, 0
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .skipblock
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ IDCT4_ADD r6, r2, r3
+.skipblock
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+
+; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct8_add4_mmx, 5, 7, 0
+ %assign pad 128+4-(stack_offset&7)
+ SUB rsp, pad
+
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .skipblock
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ add word [r2], 32
+ IDCT8_ADD_MMX_START r2 , rsp
+ IDCT8_ADD_MMX_START r2+8, rsp+64
+ IDCT8_ADD_MMX_END r6 , rsp, r3
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6+4]
+ IDCT8_ADD_MMX_END r6 , rsp+8, r3
+.skipblock
+ add r5, 4
+ add r2, 128
+ cmp r5, 16
+ jl .nextblock
+ ADD rsp, pad
+ RET
+
+; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16_mmx2, 5, 7, 0
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .skipblock
+ cmp r6, 1
+ jnz .no_dc
+ movsx r6, word [r2]
+ test r6, r6
+ jz .no_dc
+ DC_ADD_MMX2_INIT r2, r3, r6
+%ifdef ARCH_X86_64
+%define dst_reg r10
+%define dst_regd r10d
+%else
+%define dst_reg r1
+%define dst_regd r1d
+%endif
+ mov dst_regd, dword [r1+r5*4]
+ lea dst_reg, [r0+dst_reg]
+ DC_ADD_MMX2_OP movh, dst_reg, r3, r6
+%ifndef ARCH_X86_64
+ mov r1, r1m
+%endif
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+.no_dc
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ IDCT4_ADD r6, r2, r3
+.skipblock
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+
+; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16intra_mmx, 5, 7, 0
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ or r6w, word [r2]
+ test r6, r6
+ jz .skipblock
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ IDCT4_ADD r6, r2, r3
+.skipblock
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+
+; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16intra_mmx2, 5, 7, 0
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .try_dc
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ IDCT4_ADD r6, r2, r3
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+.try_dc
+ movsx r6, word [r2]
+ test r6, r6
+ jz .skipblock
+ DC_ADD_MMX2_INIT r2, r3, r6
+%ifdef ARCH_X86_64
+%define dst_reg r10
+%define dst_regd r10d
+%else
+%define dst_reg r1
+%define dst_regd r1d
+%endif
+ mov dst_regd, dword [r1+r5*4]
+ lea dst_reg, [r0+dst_reg]
+ DC_ADD_MMX2_OP movh, dst_reg, r3, r6
+%ifndef ARCH_X86_64
+ mov r1, r1m
+%endif
+.skipblock
+ inc r5
+ add r2, 32
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+
+; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct8_add4_mmx2, 5, 7, 0
+ %assign pad 128+4-(stack_offset&7)
+ SUB rsp, pad
+
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .skipblock
+ cmp r6, 1
+ jnz .no_dc
+ movsx r6, word [r2]
+ test r6, r6
+ jz .no_dc
+ DC_ADD_MMX2_INIT r2, r3, r6
+%ifdef ARCH_X86_64
+%define dst_reg r10
+%define dst_regd r10d
+%else
+%define dst_reg r1
+%define dst_regd r1d
+%endif
+ mov dst_regd, dword [r1+r5*4]
+ lea dst_reg, [r0+dst_reg]
+ DC_ADD_MMX2_OP mova, dst_reg, r3, r6
+ lea dst_reg, [dst_reg+r3*4]
+ DC_ADD_MMX2_OP mova, dst_reg, r3, r6
+%ifndef ARCH_X86_64
+ mov r1, r1m
+%endif
+ add r5, 4
+ add r2, 128
+ cmp r5, 16
+ jl .nextblock
+
+ ADD rsp, pad
+ RET
+.no_dc
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6]
+ add word [r2], 32
+ IDCT8_ADD_MMX_START r2 , rsp
+ IDCT8_ADD_MMX_START r2+8, rsp+64
+ IDCT8_ADD_MMX_END r6 , rsp, r3
+ mov r6d, dword [r1+r5*4]
+ lea r6, [r0+r6+4]
+ IDCT8_ADD_MMX_END r6 , rsp+8, r3
+.skipblock
+ add r5, 4
+ add r2, 128
+ cmp r5, 16
+ jl .nextblock
+
+ ADD rsp, pad
+ RET
+
+INIT_XMM
+; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct8_add4_sse2, 5, 7, 10
+ xor r5, r5
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .skipblock
+ cmp r6, 1
+ jnz .no_dc
+ movsx r6, word [r2]
+ test r6, r6
+ jz .no_dc
+INIT_MMX
+ DC_ADD_MMX2_INIT r2, r3, r6
+%ifdef ARCH_X86_64
+%define dst_reg r10
+%define dst_regd r10d
+%else
+%define dst_reg r1
+%define dst_regd r1d
+%endif
+ mov dst_regd, dword [r1+r5*4]
+ lea dst_reg, [r0+dst_reg]
+ DC_ADD_MMX2_OP mova, dst_reg, r3, r6
+ lea dst_reg, [dst_reg+r3*4]
+ DC_ADD_MMX2_OP mova, dst_reg, r3, r6
+%ifndef ARCH_X86_64
+ mov r1, r1m
+%endif
+ add r5, 4
+ add r2, 128
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+.no_dc
+INIT_XMM
+ mov dst_regd, dword [r1+r5*4]
+ lea dst_reg, [r0+dst_reg]
+ IDCT8_ADD_SSE dst_reg, r2, r3, r6
+%ifndef ARCH_X86_64
+ mov r1, r1m
+%endif
+.skipblock
+ add r5, 4
+ add r2, 128
+ cmp r5, 16
+ jl .nextblock
+ REP_RET
+
+INIT_MMX
+h264_idct_add8_mmx_plane:
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ or r6w, word [r2]
+ test r6, r6
+ jz .skipblock
+%ifdef ARCH_X86_64
+ mov r0d, dword [r1+r5*4]
+ add r0, [r10]
+%else
+ mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, [r0]
+ add r0, dword [r1+r5*4]
+%endif
+ IDCT4_ADD r0, r2, r3
+.skipblock
+ inc r5
+ add r2, 32
+ test r5, 3
+ jnz .nextblock
+ rep ret
+
+; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add8_mmx, 5, 7, 0
+ mov r5, 16
+ add r2, 512
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+ call h264_idct_add8_mmx_plane
+%ifdef ARCH_X86_64
+ add r10, gprsize
+%else
+ add r0mp, gprsize
+%endif
+ call h264_idct_add8_mmx_plane
+ RET
+
+h264_idct_add8_mmx2_plane
+.nextblock
+ movzx r6, byte [scan8+r5]
+ movzx r6, byte [r4+r6]
+ test r6, r6
+ jz .try_dc
+%ifdef ARCH_X86_64
+ mov r0d, dword [r1+r5*4]
+ add r0, [r10]
+%else
+ mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, [r0]
+ add r0, dword [r1+r5*4]
+%endif
+ IDCT4_ADD r0, r2, r3
+ inc r5
+ add r2, 32
+ test r5, 3
+ jnz .nextblock
+ rep ret
+.try_dc
+ movsx r6, word [r2]
+ test r6, r6
+ jz .skipblock
+ DC_ADD_MMX2_INIT r2, r3, r6
+%ifdef ARCH_X86_64
+ mov r0d, dword [r1+r5*4]
+ add r0, [r10]
+%else
+ mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, [r0]
+ add r0, dword [r1+r5*4]
+%endif
+ DC_ADD_MMX2_OP movh, r0, r3, r6
+.skipblock
+ inc r5
+ add r2, 32
+ test r5, 3
+ jnz .nextblock
+ rep ret
+
+; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add8_mmx2, 5, 7, 0
+ mov r5, 16
+ add r2, 512
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+ call h264_idct_add8_mmx2_plane
+%ifdef ARCH_X86_64
+ add r10, gprsize
+%else
+ add r0mp, gprsize
+%endif
+ call h264_idct_add8_mmx2_plane
+ RET
+
+INIT_MMX
+; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
+h264_idct_dc_add8_mmx2:
+ movd m0, [r2 ] ; 0 0 X D
+ punpcklwd m0, [r2+32] ; x X d D
+ paddsw m0, [pw_32]
+ psraw m0, 6
+ punpcklwd m0, m0 ; d d D D
+ pxor m1, m1 ; 0 0 0 0
+ psubw m1, m0 ; -d-d-D-D
+ packuswb m0, m1 ; -d-d-D-D d d D D
+ pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D
+ punpcklwd m0, m0 ; d d d d D D D D
+ lea r6, [r3*3]
+ DC_ADD_MMX2_OP movq, r0, r3, r6
+ ret
+
+ALIGN 16
+INIT_XMM
+; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride
+x264_add8x4_idct_sse2:
+ movq m0, [r2+ 0]
+ movq m1, [r2+ 8]
+ movq m2, [r2+16]
+ movq m3, [r2+24]
+ movhps m0, [r2+32]
+ movhps m1, [r2+40]
+ movhps m2, [r2+48]
+ movhps m3, [r2+56]
+ IDCT4_1D 0,1,2,3,4,5
+ TRANSPOSE2x4x4W 0,1,2,3,4
+ paddw m0, [pw_32]
+ IDCT4_1D 0,1,2,3,4,5
+ pxor m7, m7
+ STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
+ lea r0, [r0+r3*2]
+ STORE_DIFFx2 m2, m3, m4, m5, m7, 6, r0, r3
+ ret
+
+%macro add16_sse2_cycle 2
+ movzx r0, word [r4+%2]
+ test r0, r0
+ jz .cycle%1end
+ mov r0d, dword [r1+%1*8]
+%ifdef ARCH_X86_64
+ add r0, r10
+%else
+ add r0, r0m
+%endif
+ call x264_add8x4_idct_sse2
+.cycle%1end
+%if %1 < 7
+ add r2, 64
+%endif
+%endmacro
+
+; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16_sse2, 5, 5, 8
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+ ; unrolling of the loop leads to an average performance gain of
+ ; 20-25%
+ add16_sse2_cycle 0, 0xc
+ add16_sse2_cycle 1, 0x14
+ add16_sse2_cycle 2, 0xe
+ add16_sse2_cycle 3, 0x16
+ add16_sse2_cycle 4, 0x1c
+ add16_sse2_cycle 5, 0x24
+ add16_sse2_cycle 6, 0x1e
+ add16_sse2_cycle 7, 0x26
+ RET
+
+; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add16intra_sse2, 5, 7, 8
+ xor r5, r5
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+.next2blocks
+ movzx r0, byte [scan8+r5]
+ movzx r0, word [r4+r0]
+ test r0, r0
+ jz .try_dc
+ mov r0d, dword [r1+r5*4]
+%ifdef ARCH_X86_64
+ add r0, r10
+%else
+ add r0, r0m
+%endif
+ call x264_add8x4_idct_sse2
+ add r5, 2
+ add r2, 64
+ cmp r5, 16
+ jl .next2blocks
+ REP_RET
+.try_dc
+ movsx r0, word [r2 ]
+ or r0w, word [r2+32]
+ jz .skip2blocks
+ mov r0d, dword [r1+r5*4]
+%ifdef ARCH_X86_64
+ add r0, r10
+%else
+ add r0, r0m
+%endif
+ call h264_idct_dc_add8_mmx2
+.skip2blocks
+ add r5, 2
+ add r2, 64
+ cmp r5, 16
+ jl .next2blocks
+ REP_RET
+
+h264_idct_add8_sse2_plane:
+.next2blocks
+ movzx r0, byte [scan8+r5]
+ movzx r0, word [r4+r0]
+ test r0, r0
+ jz .try_dc
+%ifdef ARCH_X86_64
+ mov r0d, dword [r1+r5*4]
+ add r0, [r10]
+%else
+ mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, [r0]
+ add r0, dword [r1+r5*4]
+%endif
+ call x264_add8x4_idct_sse2
+ add r5, 2
+ add r2, 64
+ test r5, 3
+ jnz .next2blocks
+ rep ret
+.try_dc
+ movsx r0, word [r2 ]
+ or r0w, word [r2+32]
+ jz .skip2blocks
+%ifdef ARCH_X86_64
+ mov r0d, dword [r1+r5*4]
+ add r0, [r10]
+%else
+ mov r0, r1m ; XXX r1m here is actually r0m of the calling func
+ mov r0, [r0]
+ add r0, dword [r1+r5*4]
+%endif
+ call h264_idct_dc_add8_mmx2
+.skip2blocks
+ add r5, 2
+ add r2, 64
+ test r5, 3
+ jnz .next2blocks
+ rep ret
+
+; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
+; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
+cglobal h264_idct_add8_sse2, 5, 7, 8
+ mov r5, 16
+ add r2, 512
+%ifdef PIC
+ lea r11, [scan8_mem]
+%endif
+%ifdef ARCH_X86_64
+ mov r10, r0
+%endif
+ call h264_idct_add8_sse2_plane
+%ifdef ARCH_X86_64
+ add r10, gprsize
+%else
+ add r0mp, gprsize
+%endif
+ call h264_idct_add8_sse2_plane
+ RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct_sse2.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct_sse2.asm
deleted file mode 100644
index 86c1e66c7..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_idct_sse2.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-;*****************************************************************************
-;* SSE2-optimized H.264 iDCT
-;*****************************************************************************
-;* Copyright (C) 2003-2008 x264 project
-;*
-;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
-;* Loren Merritt <lorenm@u.washington.edu>
-;* Holger Lubitz <hal@duncan.ol.sub.de>
-;* Min Chen <chenm001.163.com>
-;*
-;* This program is free software; you can redistribute it and/or modify
-;* it under the terms of the GNU General Public License as published by
-;* the Free Software Foundation; either version 2 of the License, or
-;* (at your option) any later version.
-;*
-;* This program is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;* GNU General Public License for more details.
-;*
-;* You should have received a copy of the GNU General Public License
-;* along with this program; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
-;*****************************************************************************
-
-%include "x86inc.asm"
-%include "x86util.asm"
-
-SECTION_RODATA
-pw_32: times 8 dw 32
-
-SECTION .text
-
-INIT_XMM
-cglobal x264_add8x4_idct_sse2, 3,3,8
- movq m0, [r1+ 0]
- movq m1, [r1+ 8]
- movq m2, [r1+16]
- movq m3, [r1+24]
- movhps m0, [r1+32]
- movhps m1, [r1+40]
- movhps m2, [r1+48]
- movhps m3, [r1+56]
- IDCT4_1D 0,1,2,3,4,5
- TRANSPOSE2x4x4W 0,1,2,3,4
- paddw m0, [pw_32]
- IDCT4_1D 0,1,2,3,4,5
- pxor m7, m7
- STORE_DIFF m0, m4, m7, [r0]
- STORE_DIFF m1, m4, m7, [r0+r2]
- lea r0, [r0+r2*2]
- STORE_DIFF m2, m4, m7, [r0]
- STORE_DIFF m3, m4, m7, [r0+r2]
- RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_intrapred_init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_intrapred_init.c
new file mode 100644
index 000000000..e01a17bd6
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_intrapred_init.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2010 Jason Garrett-Glaser
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vertical_mmx (uint8_t *src, int stride);
+void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
+void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
+void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
+void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
+void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
+void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
+void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride);
+void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
+void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
+void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
+void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
+void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
+void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
+void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
+void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
+void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
+void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
+void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
+void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
+void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
+
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
+{
+ int mm_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+ if (mm_flags & AV_CPU_FLAG_MMX) {
+ h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
+ h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
+ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
+ if (codec_id == CODEC_ID_VP8) {
+ h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx;
+ }
+ }
+
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
+ h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
+ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
+ h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
+ if (codec_id == CODEC_ID_VP8) {
+ h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
+ h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext;
+ h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext;
+ }
+ }
+
+ if (mm_flags & AV_CPU_FLAG_SSE) {
+ h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
+ }
+
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
+ h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
+ if (codec_id == CODEC_ID_VP8) {
+ h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
+ }
+ }
+
+ if (mm_flags & AV_CPU_FLAG_SSSE3) {
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
+ h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
+ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
+ if (codec_id == CODEC_ID_VP8) {
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
+ h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
+ }
+ }
+#endif
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_qpel_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_qpel_mmx.c
new file mode 100644
index 000000000..e94ed0935
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_qpel_mmx.c
@@ -0,0 +1,1209 @@
+/*
+ * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil_mmx.h"
+
+/***********************************/
+/* motion compensation */
+
+#define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\
+ "mov"#q" "#C", "#T" \n\t"\
+ "mov"#d" (%0), "#F" \n\t"\
+ "paddw "#D", "#T" \n\t"\
+ "psllw $2, "#T" \n\t"\
+ "psubw "#B", "#T" \n\t"\
+ "psubw "#E", "#T" \n\t"\
+ "punpcklbw "#Z", "#F" \n\t"\
+ "pmullw %4, "#T" \n\t"\
+ "paddw %5, "#A" \n\t"\
+ "add %2, %0 \n\t"\
+ "paddw "#F", "#A" \n\t"\
+ "paddw "#A", "#T" \n\t"\
+ "psraw $5, "#T" \n\t"\
+ "packuswb "#T", "#T" \n\t"\
+ OP(T, (%1), A, d)\
+ "add %3, %1 \n\t"
+
+#define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\
+ "mov"#q" "#C", "#T" \n\t"\
+ "mov"#d" (%0), "#F" \n\t"\
+ "paddw "#D", "#T" \n\t"\
+ "psllw $2, "#T" \n\t"\
+ "paddw %4, "#A" \n\t"\
+ "psubw "#B", "#T" \n\t"\
+ "psubw "#E", "#T" \n\t"\
+ "punpcklbw "#Z", "#F" \n\t"\
+ "pmullw %3, "#T" \n\t"\
+ "paddw "#F", "#A" \n\t"\
+ "add %2, %0 \n\t"\
+ "paddw "#A", "#T" \n\t"\
+ "mov"#q" "#T", "#OF"(%1) \n\t"
+
+#define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q)
+#define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q)
+#define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa)
+#define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa)
+
+
+#define QPEL_H264(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=4;\
+\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq "MANGLE(ff_pw_5) ", %%mm4\n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
+ "1: \n\t"\
+ "movd -1(%0), %%mm1 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "movd 1(%0), %%mm3 \n\t"\
+ "movd 2(%0), %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "movd -2(%0), %%mm0 \n\t"\
+ "movd 3(%0), %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm3, %%mm0 \n\t"\
+ "psllw $2, %%mm2 \n\t"\
+ "psubw %%mm1, %%mm2 \n\t"\
+ "pmullw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm0 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm6, d)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=4;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %0, %%mm4 \n\t"\
+ "movq %1, %%mm5 \n\t"\
+ :: "m"(ff_pw_5), "m"(ff_pw_16)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "movd -1(%0), %%mm1 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "movd 1(%0), %%mm3 \n\t"\
+ "movd 2(%0), %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "movd -2(%0), %%mm0 \n\t"\
+ "movd 3(%0), %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm3, %%mm0 \n\t"\
+ "psllw $2, %%mm2 \n\t"\
+ "psubw %%mm1, %%mm2 \n\t"\
+ "pmullw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm0 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "movd (%2), %%mm3 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ PAVGB" %%mm3, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm6, d)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ src -= 2*srcStride;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}\
+static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ int h=4;\
+ int w=3;\
+ src -= 2*srcStride+2;\
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
+ \
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ tmp += 4;\
+ src += 4 - 9*srcStride;\
+ }\
+ tmp -= 3*4;\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "paddw 10(%0), %%mm0 \n\t"\
+ "movq 2(%0), %%mm1 \n\t"\
+ "paddw 8(%0), %%mm1 \n\t"\
+ "movq 4(%0), %%mm2 \n\t"\
+ "paddw 6(%0), %%mm2 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
+ "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
+ "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
+ "paddsw %%mm2, %%mm0 \n\t"\
+ "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\
+ "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\
+ "psraw $6, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm7, d)\
+ "add $24, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 1(%0), %%mm2 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "psllw $2, %%mm0 \n\t"\
+ "psllw $2, %%mm1 \n\t"\
+ "movq -1(%0), %%mm2 \n\t"\
+ "movq 2(%0), %%mm4 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm4, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm5, %%mm1 \n\t"\
+ "pmullw %%mm6, %%mm0 \n\t"\
+ "pmullw %%mm6, %%mm1 \n\t"\
+ "movd -2(%0), %%mm2 \n\t"\
+ "movd 7(%0), %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
+ "paddw %%mm5, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm4, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm5, q)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %0, %%mm6 \n\t"\
+ :: "m"(ff_pw_5)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 1(%0), %%mm2 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "psllw $2, %%mm0 \n\t"\
+ "psllw $2, %%mm1 \n\t"\
+ "movq -1(%0), %%mm2 \n\t"\
+ "movq 2(%0), %%mm4 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm4, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm5, %%mm1 \n\t"\
+ "pmullw %%mm6, %%mm0 \n\t"\
+ "pmullw %%mm6, %%mm1 \n\t"\
+ "movd -2(%0), %%mm2 \n\t"\
+ "movd 7(%0), %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "movq %5, %%mm5 \n\t"\
+ "paddw %%mm5, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm4, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "movq (%2), %%mm4 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ PAVGB" %%mm4, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm5, q)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ int w= 2;\
+ src -= 2*srcStride;\
+ \
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(h==16){\
+ __asm__ volatile(\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+ src += 4-(h+5)*srcStride;\
+ dst += 4-h*dstStride;\
+ }\
+}\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
+ int w = (size+8)>>2;\
+ src -= 2*srcStride+2;\
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\
+ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\
+ QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(size==16){\
+ __asm__ volatile(\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
+ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
+ QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+ tmp += 4;\
+ src += 4 - (size+5)*srcStride;\
+ }\
+}\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
+ int w = size>>4;\
+ do{\
+ int h = size;\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm3 \n\t"\
+ "movq 2(%0), %%mm1 \n\t"\
+ "movq 10(%0), %%mm4 \n\t"\
+ "paddw %%mm4, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "paddw 18(%0), %%mm3 \n\t"\
+ "paddw 16(%0), %%mm4 \n\t"\
+ "movq 4(%0), %%mm2 \n\t"\
+ "movq 12(%0), %%mm5 \n\t"\
+ "paddw 6(%0), %%mm2 \n\t"\
+ "paddw 14(%0), %%mm5 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"\
+ "psubw %%mm4, %%mm3 \n\t"\
+ "psraw $2, %%mm0 \n\t"\
+ "psraw $2, %%mm3 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"\
+ "psubw %%mm4, %%mm3 \n\t"\
+ "paddsw %%mm2, %%mm0 \n\t"\
+ "paddsw %%mm5, %%mm3 \n\t"\
+ "psraw $2, %%mm0 \n\t"\
+ "psraw $2, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm5, %%mm3 \n\t"\
+ "psraw $6, %%mm0 \n\t"\
+ "psraw $6, %%mm3 \n\t"\
+ "packuswb %%mm3, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm7, q)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ tmp += 8 - size*24;\
+ dst += 8 - size*dstStride;\
+ }while(w--);\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+ src += 8*dstStride;\
+ dst += 8*dstStride;\
+ src2 += 8*src2Stride;\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+ put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
+ OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
+}\
+static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
+}\
+\
+static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ __asm__ volatile(\
+ "movq (%1), %%mm0 \n\t"\
+ "movq 24(%1), %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ "packuswb %%mm1, %%mm1 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm1 \n\t"\
+ OP(%%mm0, (%2), %%mm4, d)\
+ OP(%%mm1, (%2,%4), %%mm5, d)\
+ "lea (%0,%3,2), %0 \n\t"\
+ "lea (%2,%4,2), %2 \n\t"\
+ "movq 48(%1), %%mm0 \n\t"\
+ "movq 72(%1), %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ "packuswb %%mm1, %%mm1 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm1 \n\t"\
+ OP(%%mm0, (%2), %%mm4, d)\
+ OP(%%mm1, (%2,%4), %%mm5, d)\
+ :"+a"(src8), "+c"(src16), "+d"(dst)\
+ :"S"((x86_reg)src8Stride), "D"((x86_reg)dstStride)\
+ :"memory");\
+}\
+static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ do{\
+ __asm__ volatile(\
+ "movq (%1), %%mm0 \n\t"\
+ "movq 8(%1), %%mm1 \n\t"\
+ "movq 48(%1), %%mm2 \n\t"\
+ "movq 8+48(%1), %%mm3 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "psraw $5, %%mm2 \n\t"\
+ "psraw $5, %%mm3 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ "packuswb %%mm3, %%mm2 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm2 \n\t"\
+ OP(%%mm0, (%2), %%mm5, q)\
+ OP(%%mm2, (%2,%4), %%mm5, q)\
+ ::"a"(src8), "c"(src16), "d"(dst),\
+ "r"((x86_reg)src8Stride), "r"((x86_reg)dstStride)\
+ :"memory");\
+ src8 += 2L*src8Stride;\
+ src16 += 48;\
+ dst += 2L*dstStride;\
+ }while(h-=2);\
+}\
+static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
+ OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
+}\
+
+
+#if ARCH_X86_64
+#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=16;\
+ __asm__ volatile(\
+ "pxor %%xmm15, %%xmm15 \n\t"\
+ "movdqa %6, %%xmm14 \n\t"\
+ "movdqa %7, %%xmm13 \n\t"\
+ "1: \n\t"\
+ "lddqu 6(%0), %%xmm1 \n\t"\
+ "lddqu -2(%0), %%xmm7 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm15, %%xmm1 \n\t"\
+ "punpcklbw %%xmm15, %%xmm0 \n\t"\
+ "punpcklbw %%xmm15, %%xmm7 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm0, %%xmm6 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm0, %%xmm8 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm0, %%xmm9 \n\t"\
+ "movdqa %%xmm0, %%xmm12 \n\t"\
+ "movdqa %%xmm1, %%xmm11 \n\t"\
+ "palignr $10,%%xmm0, %%xmm11\n\t"\
+ "palignr $10,%%xmm7, %%xmm12\n\t"\
+ "palignr $2, %%xmm0, %%xmm4 \n\t"\
+ "palignr $2, %%xmm7, %%xmm9 \n\t"\
+ "palignr $4, %%xmm0, %%xmm3 \n\t"\
+ "palignr $4, %%xmm7, %%xmm8 \n\t"\
+ "palignr $6, %%xmm0, %%xmm2 \n\t"\
+ "palignr $6, %%xmm7, %%xmm6 \n\t"\
+ "paddw %%xmm0 ,%%xmm11 \n\t"\
+ "palignr $8, %%xmm0, %%xmm1 \n\t"\
+ "palignr $8, %%xmm7, %%xmm0 \n\t"\
+ "paddw %%xmm12,%%xmm7 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm8, %%xmm6 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm9, %%xmm0 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "psllw $2, %%xmm6 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "psubw %%xmm0, %%xmm6 \n\t"\
+ "paddw %%xmm13,%%xmm11 \n\t"\
+ "paddw %%xmm13,%%xmm7 \n\t"\
+ "pmullw %%xmm14,%%xmm2 \n\t"\
+ "pmullw %%xmm14,%%xmm6 \n\t"\
+ "lddqu (%2), %%xmm3 \n\t"\
+ "paddw %%xmm11,%%xmm2 \n\t"\
+ "paddw %%xmm7, %%xmm6 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "psraw $5, %%xmm6 \n\t"\
+ "packuswb %%xmm2,%%xmm6 \n\t"\
+ "pavgb %%xmm3, %%xmm6 \n\t"\
+ OP(%%xmm6, (%1), %%xmm4, dqa)\
+ "add %5, %0 \n\t"\
+ "add %5, %1 \n\t"\
+ "add %4, %2 \n\t"\
+ "decl %3 \n\t"\
+ "jg 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}
+#else // ARCH_X86_64
+#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+ src += 8*dstStride;\
+ dst += 8*dstStride;\
+ src2 += 8*src2Stride;\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+}
+#endif // ARCH_X86_64
+
+#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movdqa %0, %%xmm6 \n\t"\
+ :: "m"(ff_pw_5)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "lddqu -2(%0), %%xmm1 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $2, %%xmm0, %%xmm4 \n\t"\
+ "palignr $4, %%xmm0, %%xmm3 \n\t"\
+ "palignr $6, %%xmm0, %%xmm2 \n\t"\
+ "palignr $8, %%xmm0, %%xmm1 \n\t"\
+ "palignr $10,%%xmm0, %%xmm5 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "movq (%2), %%xmm3 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "paddw %5, %%xmm0 \n\t"\
+ "pmullw %%xmm6, %%xmm2 \n\t"\
+ "paddw %%xmm0, %%xmm2 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "packuswb %%xmm2, %%xmm2 \n\t"\
+ "pavgb %%xmm3, %%xmm2 \n\t"\
+ OP(%%xmm2, (%1), %%xmm4, q)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
+ "1: \n\t"\
+ "lddqu -2(%0), %%xmm1 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $2, %%xmm0, %%xmm4 \n\t"\
+ "palignr $4, %%xmm0, %%xmm3 \n\t"\
+ "palignr $6, %%xmm0, %%xmm2 \n\t"\
+ "palignr $8, %%xmm0, %%xmm1 \n\t"\
+ "palignr $10,%%xmm0, %%xmm5 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
+ "pmullw %%xmm6, %%xmm2 \n\t"\
+ "paddw %%xmm0, %%xmm2 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "packuswb %%xmm2, %%xmm2 \n\t"\
+ OP(%%xmm2, (%1), %%xmm4, q)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+
+#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ src -= 2*srcStride;\
+ \
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movq (%0), %%xmm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm2 \n\t"\
+ "punpcklbw %%xmm7, %%xmm3 \n\t"\
+ "punpcklbw %%xmm7, %%xmm4 \n\t"\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
+ QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(h==16){\
+ __asm__ volatile(\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
+ QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+}\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}
+
+static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
+ int w = (size+8)>>3;
+ src -= 2*srcStride+2;
+ while(w--){
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7 \n\t"
+ "movq (%0), %%xmm0 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm1 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm2 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm3 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm4 \n\t"
+ "add %2, %0 \n\t"
+ "punpcklbw %%xmm7, %%xmm0 \n\t"
+ "punpcklbw %%xmm7, %%xmm1 \n\t"
+ "punpcklbw %%xmm7, %%xmm2 \n\t"
+ "punpcklbw %%xmm7, %%xmm3 \n\t"
+ "punpcklbw %%xmm7, %%xmm4 \n\t"
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 0*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 1*48)
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 2*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 3*48)
+ QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 4*48)
+ QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
+ : "+a"(src)
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "memory"
+ );
+ if(size==16){
+ __asm__ volatile(
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
+ QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
+ QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48)
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48)
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
+ : "+a"(src)
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "memory"
+ );
+ }
+ tmp += 8;
+ src += 8 - (size+5)*srcStride;
+ }
+}
+
+#define QPEL_H264_HV2_XMM(OPNAME, OP, MMX)\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
+ int h = size;\
+ if(size == 16){\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movdqa 32(%0), %%xmm4 \n\t"\
+ "movdqa 16(%0), %%xmm5 \n\t"\
+ "movdqa (%0), %%xmm7 \n\t"\
+ "movdqa %%xmm4, %%xmm3 \n\t"\
+ "movdqa %%xmm4, %%xmm2 \n\t"\
+ "movdqa %%xmm4, %%xmm1 \n\t"\
+ "movdqa %%xmm4, %%xmm0 \n\t"\
+ "palignr $10, %%xmm5, %%xmm0 \n\t"\
+ "palignr $8, %%xmm5, %%xmm1 \n\t"\
+ "palignr $6, %%xmm5, %%xmm2 \n\t"\
+ "palignr $4, %%xmm5, %%xmm3 \n\t"\
+ "palignr $2, %%xmm5, %%xmm4 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "movdqa %%xmm5, %%xmm6 \n\t"\
+ "movdqa %%xmm5, %%xmm4 \n\t"\
+ "movdqa %%xmm5, %%xmm3 \n\t"\
+ "palignr $8, %%xmm7, %%xmm4 \n\t"\
+ "palignr $2, %%xmm7, %%xmm6 \n\t"\
+ "palignr $10, %%xmm7, %%xmm3 \n\t"\
+ "paddw %%xmm6, %%xmm4 \n\t"\
+ "movdqa %%xmm5, %%xmm6 \n\t"\
+ "palignr $6, %%xmm7, %%xmm5 \n\t"\
+ "palignr $4, %%xmm7, %%xmm6 \n\t"\
+ "paddw %%xmm7, %%xmm3 \n\t"\
+ "paddw %%xmm6, %%xmm5 \n\t"\
+ \
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psubw %%xmm4, %%xmm3 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm3 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psubw %%xmm4, %%xmm3 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "paddw %%xmm5, %%xmm3 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm3 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "paddw %%xmm5, %%xmm3 \n\t"\
+ "psraw $6, %%xmm0 \n\t"\
+ "psraw $6, %%xmm3 \n\t"\
+ "packuswb %%xmm0, %%xmm3 \n\t"\
+ OP(%%xmm3, (%1), %%xmm7, dqa)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }else{\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movdqa 16(%0), %%xmm1 \n\t"\
+ "movdqa (%0), %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $10, %%xmm0, %%xmm5 \n\t"\
+ "palignr $8, %%xmm0, %%xmm4 \n\t"\
+ "palignr $6, %%xmm0, %%xmm3 \n\t"\
+ "palignr $4, %%xmm0, %%xmm2 \n\t"\
+ "palignr $2, %%xmm0, %%xmm1 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "psraw $6, %%xmm0 \n\t"\
+ "packuswb %%xmm0, %%xmm0 \n\t"\
+ OP(%%xmm0, (%1), %%xmm7, q)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }\
+}
+
+#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+ put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
+ OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
+}\
+static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
+}\
+static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
+}\
+
+#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2
+#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2
+#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2
+#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2
+#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2
+#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2
+#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2
+#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2
+
+#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2
+#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2
+#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2
+#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2
+#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2
+#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2
+#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2
+#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2
+
+#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2
+#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2
+#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2
+#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2
+
+#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
+#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
+#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
+#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
+
+#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2
+#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2
+
+#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
+H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
+
+static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
+ put_pixels16_sse2(dst, src, stride, 16);
+}
+static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
+ avg_pixels16_sse2(dst, src, stride, 16);
+}
+#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2
+#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2
+
+#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
+}\
+
+#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
+}\
+
+#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
+}\
+
+#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
+ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
+}\
+
+#define H264_MC_4816(MMX)\
+H264_MC(put_, 4, MMX, 8)\
+H264_MC(put_, 8, MMX, 8)\
+H264_MC(put_, 16,MMX, 8)\
+H264_MC(avg_, 4, MMX, 8)\
+H264_MC(avg_, 8, MMX, 8)\
+H264_MC(avg_, 16,MMX, 8)\
+
+#define H264_MC_816(QPEL, XMM)\
+QPEL(put_, 8, XMM, 16)\
+QPEL(put_, 16,XMM, 16)\
+QPEL(avg_, 8, XMM, 16)\
+QPEL(avg_, 16,XMM, 16)\
+
+
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgusb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+
+#define PAVGB "pavgusb"
+QPEL_H264(put_, PUT_OP, 3dnow)
+QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
+#undef PAVGB
+#define PAVGB "pavgb"
+QPEL_H264(put_, PUT_OP, mmx2)
+QPEL_H264(avg_, AVG_MMX2_OP, mmx2)
+QPEL_H264_V_XMM(put_, PUT_OP, sse2)
+QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2)
+QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
+QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2)
+#if HAVE_SSSE3
+QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3)
+QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3)
+QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
+#endif
+#undef PAVGB
+
+H264_MC_4816(3dnow)
+H264_MC_4816(mmx2)
+H264_MC_816(H264_MC_V, sse2)
+H264_MC_816(H264_MC_HV, sse2)
+#if HAVE_SSSE3
+H264_MC_816(H264_MC_H, ssse3)
+H264_MC_816(H264_MC_HV, ssse3)
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight.asm
new file mode 100644
index 000000000..53aa21047
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight.asm
@@ -0,0 +1,375 @@
+;*****************************************************************************
+;* SSE2-optimized weighted prediction code
+;*****************************************************************************
+;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; biweight pred:
+;
+; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
+; int log2_denom, int weightd, int weights,
+; int offset);
+; and
+; void h264_weight_16x16_sse2(uint8_t *dst, int stride,
+; int log2_denom, int weight,
+; int offset);
+;-----------------------------------------------------------------------------
+
+%macro WEIGHT_SETUP 0
+ add r4, r4
+ inc r4
+ movd m3, r3d
+ movd m5, r4d
+ movd m6, r2d
+ pslld m5, m6
+ psrld m5, 1
+%if mmsize == 16
+ pshuflw m3, m3, 0
+ pshuflw m5, m5, 0
+ punpcklqdq m3, m3
+ punpcklqdq m5, m5
+%else
+ pshufw m3, m3, 0
+ pshufw m5, m5, 0
+%endif
+ pxor m7, m7
+%endmacro
+
+%macro WEIGHT_OP 2
+ movh m0, [r0+%1]
+ movh m1, [r0+%2]
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ pmullw m0, m3
+ pmullw m1, m3
+ paddsw m0, m5
+ paddsw m1, m5
+ psraw m0, m6
+ psraw m1, m6
+ packuswb m0, m1
+%endmacro
+
+%macro WEIGHT_FUNC_DBL_MM 1
+cglobal h264_weight_16x%1_mmx2, 5, 5, 0
+ WEIGHT_SETUP
+ mov r2, %1
+%if %1 == 16
+.nextrow
+ WEIGHT_OP 0, 4
+ mova [r0 ], m0
+ WEIGHT_OP 8, 12
+ mova [r0+8], m0
+ add r0, r1
+ dec r2
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+WEIGHT_FUNC_DBL_MM 16
+WEIGHT_FUNC_DBL_MM 8
+
+%macro WEIGHT_FUNC_MM 4
+cglobal h264_weight_%1x%2_%4, 7, 7, %3
+ WEIGHT_SETUP
+ mov r2, %2
+%if %2 == 16
+.nextrow
+ WEIGHT_OP 0, mmsize/2
+ mova [r0], m0
+ add r0, r1
+ dec r2
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+WEIGHT_FUNC_MM 8, 16, 0, mmx2
+WEIGHT_FUNC_MM 8, 8, 0, mmx2
+WEIGHT_FUNC_MM 8, 4, 0, mmx2
+INIT_XMM
+WEIGHT_FUNC_MM 16, 16, 8, sse2
+WEIGHT_FUNC_MM 16, 8, 8, sse2
+
+%macro WEIGHT_FUNC_HALF_MM 5
+cglobal h264_weight_%1x%2_%5, 5, 5, %4
+ WEIGHT_SETUP
+ mov r2, %2/2
+ lea r3, [r1*2]
+%if %2 == mmsize
+.nextrow
+ WEIGHT_OP 0, r1
+ movh [r0], m0
+%if mmsize == 16
+ movhps [r0+r1], m0
+%else
+ psrlq m0, 32
+ movh [r0+r1], m0
+%endif
+ add r0, r3
+ dec r2
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
+WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
+WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
+INIT_XMM
+WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
+WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
+WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
+
+%macro BIWEIGHT_SETUP 0
+ add r6, 1
+ or r6, 1
+ add r3, 1
+ movd m3, r4d
+ movd m4, r5d
+ movd m5, r6d
+ movd m6, r3d
+ pslld m5, m6
+ psrld m5, 1
+%if mmsize == 16
+ pshuflw m3, m3, 0
+ pshuflw m4, m4, 0
+ pshuflw m5, m5, 0
+ punpcklqdq m3, m3
+ punpcklqdq m4, m4
+ punpcklqdq m5, m5
+%else
+ pshufw m3, m3, 0
+ pshufw m4, m4, 0
+ pshufw m5, m5, 0
+%endif
+ pxor m7, m7
+%endmacro
+
+%macro BIWEIGHT_STEPA 3
+ movh m%1, [r0+%3]
+ movh m%2, [r1+%3]
+ punpcklbw m%1, m7
+ punpcklbw m%2, m7
+ pmullw m%1, m3
+ pmullw m%2, m4
+ paddsw m%1, m%2
+%endmacro
+
+%macro BIWEIGHT_STEPB 0
+ paddsw m0, m5
+ paddsw m1, m5
+ psraw m0, m6
+ psraw m1, m6
+ packuswb m0, m1
+%endmacro
+
+%macro BIWEIGHT_FUNC_DBL_MM 1
+cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
+ BIWEIGHT_SETUP
+ mov r3, %1
+%if %1 == 16
+.nextrow
+ BIWEIGHT_STEPA 0, 1, 0
+ BIWEIGHT_STEPA 1, 2, 4
+ BIWEIGHT_STEPB
+ mova [r0], m0
+ BIWEIGHT_STEPA 0, 1, 8
+ BIWEIGHT_STEPA 1, 2, 12
+ BIWEIGHT_STEPB
+ mova [r0+8], m0
+ add r0, r2
+ add r1, r2
+ dec r3
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+BIWEIGHT_FUNC_DBL_MM 16
+BIWEIGHT_FUNC_DBL_MM 8
+
+%macro BIWEIGHT_FUNC_MM 4
+cglobal h264_biweight_%1x%2_%4, 7, 7, %3
+ BIWEIGHT_SETUP
+ mov r3, %2
+%if %2 == 16
+.nextrow
+ BIWEIGHT_STEPA 0, 1, 0
+ BIWEIGHT_STEPA 1, 2, mmsize/2
+ BIWEIGHT_STEPB
+ mova [r0], m0
+ add r0, r2
+ add r1, r2
+ dec r3
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+BIWEIGHT_FUNC_MM 8, 16, 0, mmx2
+BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
+BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
+INIT_XMM
+BIWEIGHT_FUNC_MM 16, 16, 8, sse2
+BIWEIGHT_FUNC_MM 16, 8, 8, sse2
+
+%macro BIWEIGHT_FUNC_HALF_MM 5
+cglobal h264_biweight_%1x%2_%5, 7, 7, %4
+ BIWEIGHT_SETUP
+ mov r3, %2/2
+ lea r4, [r2*2]
+%if %2 == mmsize
+.nextrow
+ BIWEIGHT_STEPA 0, 1, 0
+ BIWEIGHT_STEPA 1, 2, r2
+ BIWEIGHT_STEPB
+ movh [r0], m0
+%if mmsize == 16
+ movhps [r0+r2], m0
+%else
+ psrlq m0, 32
+ movh [r0+r2], m0
+%endif
+ add r0, r4
+ add r1, r4
+ dec r3
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
+%endif
+%endmacro
+
+INIT_MMX
+BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2
+BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
+BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
+INIT_XMM
+BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2
+BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
+BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
+
+%macro BIWEIGHT_SSSE3_SETUP 0
+ add r6, 1
+ or r6, 1
+ add r3, 1
+ movd m4, r4d
+ movd m0, r5d
+ movd m5, r6d
+ movd m6, r3d
+ pslld m5, m6
+ psrld m5, 1
+ punpcklbw m4, m0
+ pshuflw m4, m4, 0
+ pshuflw m5, m5, 0
+ punpcklqdq m4, m4
+ punpcklqdq m5, m5
+%endmacro
+
+%macro BIWEIGHT_SSSE3_OP 0
+ pmaddubsw m0, m4
+ pmaddubsw m2, m4
+ paddsw m0, m5
+ paddsw m2, m5
+ psraw m0, m6
+ psraw m2, m6
+ packuswb m0, m2
+%endmacro
+
+%macro BIWEIGHT_SSSE3_16 1
+cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
+ BIWEIGHT_SSSE3_SETUP
+ mov r3, %1
+
+%if %1 == 16
+.nextrow
+ movh m0, [r0]
+ movh m2, [r0+8]
+ movh m3, [r1+8]
+ punpcklbw m0, [r1]
+ punpcklbw m2, m3
+ BIWEIGHT_SSSE3_OP
+ mova [r0], m0
+ add r0, r2
+ add r1, r2
+ dec r3
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
+%endif
+%endmacro
+
+INIT_XMM
+BIWEIGHT_SSSE3_16 16
+BIWEIGHT_SSSE3_16 8
+
+%macro BIWEIGHT_SSSE3_8 1
+cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
+ BIWEIGHT_SSSE3_SETUP
+ mov r3, %1/2
+ lea r4, [r2*2]
+
+%if %1 == 16
+.nextrow
+ movh m0, [r0]
+ movh m1, [r1]
+ movh m2, [r0+r2]
+ movh m3, [r1+r2]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ BIWEIGHT_SSSE3_OP
+ movh [r0], m0
+ movhps [r0+r2], m0
+ add r0, r4
+ add r1, r4
+ dec r3
+ jnz .nextrow
+ REP_RET
+%else
+ jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
+%endif
+%endmacro
+
+INIT_XMM
+BIWEIGHT_SSSE3_8 16
+BIWEIGHT_SSSE3_8 8
+BIWEIGHT_SSSE3_8 4
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight_sse2.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight_sse2.asm
deleted file mode 100644
index 8667f0690..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264_weight_sse2.asm
+++ /dev/null
@@ -1,170 +0,0 @@
-;*****************************************************************************
-;* SSE2-optimized weighted prediction code
-;*****************************************************************************
-;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
-;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "x86inc.asm"
-
-SECTION .text
-INIT_XMM
-
-;-----------------------------------------------------------------------------
-; biweight pred:
-;
-; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
-; int log2_denom, int weightd, int weights,
-; int offset);
-;-----------------------------------------------------------------------------
-
-%macro BIWEIGHT_SSE2_SETUP 0
- add r6, 1
- or r6, 1
- add r3, 1
- movd m3, r4
- movd m4, r5
- movd m5, r6
- movd m6, r3
- pslld m5, m6
- psrld m5, 1
- pshuflw m3, m3, 0
- pshuflw m4, m4, 0
- pshuflw m5, m5, 0
- punpcklqdq m3, m3
- punpcklqdq m4, m4
- punpcklqdq m5, m5
- pxor m7, m7
-%endmacro
-
-%macro BIWEIGHT_SSE2_STEPA 3
- movh m%1, [r0+%3]
- movh m%2, [r1+%3]
- punpcklbw m%1, m7
- punpcklbw m%2, m7
- pmullw m%1, m3
- pmullw m%2, m4
- paddsw m%1, m%2
-%endmacro
-
-%macro BIWEIGHT_SSE2_STEPB 0
- paddsw m0, m5
- paddsw m1, m5
- psraw m0, m6
- psraw m1, m6
- packuswb m0, m1
-%endmacro
-
-cglobal h264_biweight_16x16_sse2, 7, 7, 8
- BIWEIGHT_SSE2_SETUP
- mov r3, 16
-
-.nextrow
- BIWEIGHT_SSE2_STEPA 0, 1, 0
- BIWEIGHT_SSE2_STEPA 1, 2, 8
- BIWEIGHT_SSE2_STEPB
- mova [r0], m0
- add r0, r2
- add r1, r2
- dec r3
- jnz .nextrow
- REP_RET
-
-cglobal h264_biweight_8x8_sse2, 7, 7, 8
- BIWEIGHT_SSE2_SETUP
- mov r3, 4
- lea r4, [r2*2]
-
-.nextrow
- BIWEIGHT_SSE2_STEPA 0, 1, 0
- BIWEIGHT_SSE2_STEPA 1, 2, r2
- BIWEIGHT_SSE2_STEPB
- movh [r0], m0
- movhps [r0+r2], m0
- add r0, r4
- add r1, r4
- dec r3
- jnz .nextrow
- REP_RET
-
-%macro BIWEIGHT_SSSE3_SETUP 0
- add r6, 1
- or r6, 1
- add r3, 1
- movd m4, r4
- movd m0, r5
- movd m5, r6
- movd m6, r3
- pslld m5, m6
- psrld m5, 1
- punpcklbw m4, m0
- pshuflw m4, m4, 0
- pshuflw m5, m5, 0
- punpcklqdq m4, m4
- punpcklqdq m5, m5
-%endmacro
-
-%macro BIWEIGHT_SSSE3_OP 0
- pmaddubsw m0, m4
- pmaddubsw m2, m4
- paddsw m0, m5
- paddsw m2, m5
- psraw m0, m6
- psraw m2, m6
- packuswb m0, m2
-%endmacro
-
-cglobal h264_biweight_16x16_ssse3, 7, 7, 8
- BIWEIGHT_SSSE3_SETUP
- mov r3, 16
-
-.nextrow
- movh m0, [r0]
- movh m2, [r0+8]
- movh m3, [r1+8]
- punpcklbw m0, [r1]
- punpcklbw m2, m3
- BIWEIGHT_SSSE3_OP
- mova [r0], m0
- add r0, r2
- add r1, r2
- dec r3
- jnz .nextrow
- REP_RET
-
-cglobal h264_biweight_8x8_ssse3, 7, 7, 8
- BIWEIGHT_SSSE3_SETUP
- mov r3, 4
- lea r4, [r2*2]
-
-.nextrow
- movh m0, [r0]
- movh m1, [r1]
- movh m2, [r0+r2]
- movh m3, [r1+r2]
- punpcklbw m0, m1
- punpcklbw m2, m3
- BIWEIGHT_SSSE3_OP
- movh [r0], m0
- movhps [r0+r2], m0
- add r0, r4
- add r1, r4
- dec r3
- jnz .nextrow
- REP_RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
index 4b2e54603..efd8b78f1 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/h264dsp_mmx.c
@@ -18,8 +18,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/cpu.h"
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/h264dsp.h"
#include "dsputil_mmx.h"
-#include "libavcodec/h264pred.h"
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
@@ -27,772 +29,41 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
/***********************************/
/* IDCT */
-#define SUMSUB_BADC( a, b, c, d ) \
- "paddw "#b", "#a" \n\t"\
- "paddw "#d", "#c" \n\t"\
- "paddw "#b", "#b" \n\t"\
- "paddw "#d", "#d" \n\t"\
- "psubw "#a", "#b" \n\t"\
- "psubw "#c", "#d" \n\t"
-
-#define SUMSUBD2_AB( a, b, t ) \
- "movq "#b", "#t" \n\t"\
- "psraw $1 , "#b" \n\t"\
- "paddw "#a", "#b" \n\t"\
- "psraw $1 , "#a" \n\t"\
- "psubw "#t", "#a" \n\t"
-
-#define IDCT4_1D( s02, s13, d02, d13, t ) \
- SUMSUB_BA ( s02, d02 )\
- SUMSUBD2_AB( s13, d13, t )\
- SUMSUB_BADC( d13, s02, s13, d02 )
-
-#define STORE_DIFF_4P( p, t, z ) \
- "psraw $6, "#p" \n\t"\
- "movd (%0), "#t" \n\t"\
- "punpcklbw "#z", "#t" \n\t"\
- "paddsw "#t", "#p" \n\t"\
- "packuswb "#z", "#p" \n\t"\
- "movd "#p", (%0) \n\t"
-
-static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
-{
- /* Load dct coeffs */
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm1 \n\t"
- "movq 16(%0), %%mm2 \n\t"
- "movq 24(%0), %%mm3 \n\t"
- :: "r"(block) );
-
- __asm__ volatile(
- /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */
- IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 )
-
- "movq %0, %%mm6 \n\t"
- /* in: 1,4,0,2 out: 1,2,3,0 */
- TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 )
-
- "paddw %%mm6, %%mm3 \n\t"
-
- /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
- IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
-
- "pxor %%mm7, %%mm7 \n\t"
- :: "m"(ff_pw_32));
-
- __asm__ volatile(
- STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm2, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm3, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm4, %%mm1, %%mm7)
- : "+r"(dst)
- : "r" ((x86_reg)stride)
- );
-}
-
-static inline void h264_idct8_1d(int16_t *block)
-{
- __asm__ volatile(
- "movq 112(%0), %%mm7 \n\t"
- "movq 80(%0), %%mm0 \n\t"
- "movq 48(%0), %%mm3 \n\t"
- "movq 16(%0), %%mm5 \n\t"
-
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm5, %%mm1 \n\t"
- "psraw $1, %%mm4 \n\t"
- "psraw $1, %%mm1 \n\t"
- "paddw %%mm0, %%mm4 \n\t"
- "paddw %%mm5, %%mm1 \n\t"
- "paddw %%mm7, %%mm4 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psubw %%mm5, %%mm4 \n\t"
- "paddw %%mm3, %%mm1 \n\t"
-
- "psubw %%mm3, %%mm5 \n\t"
- "psubw %%mm3, %%mm0 \n\t"
- "paddw %%mm7, %%mm5 \n\t"
- "psubw %%mm7, %%mm0 \n\t"
- "psraw $1, %%mm3 \n\t"
- "psraw $1, %%mm7 \n\t"
- "psubw %%mm3, %%mm5 \n\t"
- "psubw %%mm7, %%mm0 \n\t"
-
- "movq %%mm4, %%mm3 \n\t"
- "movq %%mm1, %%mm7 \n\t"
- "psraw $2, %%mm1 \n\t"
- "psraw $2, %%mm3 \n\t"
- "paddw %%mm5, %%mm3 \n\t"
- "psraw $2, %%mm5 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psraw $2, %%mm0 \n\t"
- "psubw %%mm4, %%mm5 \n\t"
- "psubw %%mm0, %%mm7 \n\t"
-
- "movq 32(%0), %%mm2 \n\t"
- "movq 96(%0), %%mm6 \n\t"
- "movq %%mm2, %%mm4 \n\t"
- "movq %%mm6, %%mm0 \n\t"
- "psraw $1, %%mm4 \n\t"
- "psraw $1, %%mm6 \n\t"
- "psubw %%mm0, %%mm4 \n\t"
- "paddw %%mm2, %%mm6 \n\t"
-
- "movq (%0), %%mm2 \n\t"
- "movq 64(%0), %%mm0 \n\t"
- SUMSUB_BA( %%mm0, %%mm2 )
- SUMSUB_BA( %%mm6, %%mm0 )
- SUMSUB_BA( %%mm4, %%mm2 )
- SUMSUB_BA( %%mm7, %%mm6 )
- SUMSUB_BA( %%mm5, %%mm4 )
- SUMSUB_BA( %%mm3, %%mm2 )
- SUMSUB_BA( %%mm1, %%mm0 )
- :: "r"(block)
- );
-}
-
-static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
-{
- int i;
- DECLARE_ALIGNED(8, int16_t, b2)[64];
-
- block[0] += 32;
-
- for(i=0; i<2; i++){
- DECLARE_ALIGNED(8, uint64_t, tmp);
-
- h264_idct8_1d(block+4*i);
-
- __asm__ volatile(
- "movq %%mm7, %0 \n\t"
- TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
- "movq %%mm0, 8(%1) \n\t"
- "movq %%mm6, 24(%1) \n\t"
- "movq %%mm7, 40(%1) \n\t"
- "movq %%mm4, 56(%1) \n\t"
- "movq %0, %%mm7 \n\t"
- TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
- "movq %%mm7, (%1) \n\t"
- "movq %%mm1, 16(%1) \n\t"
- "movq %%mm0, 32(%1) \n\t"
- "movq %%mm3, 48(%1) \n\t"
- : "=m"(tmp)
- : "r"(b2+32*i)
- : "memory"
- );
- }
-
- for(i=0; i<2; i++){
- h264_idct8_1d(b2+4*i);
-
- __asm__ volatile(
- "psraw $6, %%mm7 \n\t"
- "psraw $6, %%mm6 \n\t"
- "psraw $6, %%mm5 \n\t"
- "psraw $6, %%mm4 \n\t"
- "psraw $6, %%mm3 \n\t"
- "psraw $6, %%mm2 \n\t"
- "psraw $6, %%mm1 \n\t"
- "psraw $6, %%mm0 \n\t"
-
- "movq %%mm7, (%0) \n\t"
- "movq %%mm5, 16(%0) \n\t"
- "movq %%mm3, 32(%0) \n\t"
- "movq %%mm1, 48(%0) \n\t"
- "movq %%mm0, 64(%0) \n\t"
- "movq %%mm2, 80(%0) \n\t"
- "movq %%mm4, 96(%0) \n\t"
- "movq %%mm6, 112(%0) \n\t"
- :: "r"(b2+4*i)
- : "memory"
- );
- }
-
- add_pixels_clamped_mmx(b2, dst, stride);
-}
-
-#define STORE_DIFF_8P( p, d, t, z )\
- "movq "#d", "#t" \n"\
- "psraw $6, "#p" \n"\
- "punpcklbw "#z", "#t" \n"\
- "paddsw "#t", "#p" \n"\
- "packuswb "#p", "#p" \n"\
- "movq "#p", "#d" \n"
-
-#define H264_IDCT8_1D_SSE2(a,b,c,d,e,f,g,h)\
- "movdqa "#c", "#a" \n"\
- "movdqa "#g", "#e" \n"\
- "psraw $1, "#c" \n"\
- "psraw $1, "#g" \n"\
- "psubw "#e", "#c" \n"\
- "paddw "#a", "#g" \n"\
- "movdqa "#b", "#e" \n"\
- "psraw $1, "#e" \n"\
- "paddw "#b", "#e" \n"\
- "paddw "#d", "#e" \n"\
- "paddw "#f", "#e" \n"\
- "movdqa "#f", "#a" \n"\
- "psraw $1, "#a" \n"\
- "paddw "#f", "#a" \n"\
- "paddw "#h", "#a" \n"\
- "psubw "#b", "#a" \n"\
- "psubw "#d", "#b" \n"\
- "psubw "#d", "#f" \n"\
- "paddw "#h", "#b" \n"\
- "psubw "#h", "#f" \n"\
- "psraw $1, "#d" \n"\
- "psraw $1, "#h" \n"\
- "psubw "#d", "#b" \n"\
- "psubw "#h", "#f" \n"\
- "movdqa "#e", "#d" \n"\
- "movdqa "#a", "#h" \n"\
- "psraw $2, "#d" \n"\
- "psraw $2, "#h" \n"\
- "paddw "#f", "#d" \n"\
- "paddw "#b", "#h" \n"\
- "psraw $2, "#f" \n"\
- "psraw $2, "#b" \n"\
- "psubw "#f", "#e" \n"\
- "psubw "#a", "#b" \n"\
- "movdqa 0x00(%1), "#a" \n"\
- "movdqa 0x40(%1), "#f" \n"\
- SUMSUB_BA(f, a)\
- SUMSUB_BA(g, f)\
- SUMSUB_BA(c, a)\
- SUMSUB_BA(e, g)\
- SUMSUB_BA(b, c)\
- SUMSUB_BA(h, a)\
- SUMSUB_BA(d, f)
-
-static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
-{
- __asm__ volatile(
- "movdqa 0x10(%1), %%xmm1 \n"
- "movdqa 0x20(%1), %%xmm2 \n"
- "movdqa 0x30(%1), %%xmm3 \n"
- "movdqa 0x50(%1), %%xmm5 \n"
- "movdqa 0x60(%1), %%xmm6 \n"
- "movdqa 0x70(%1), %%xmm7 \n"
- H264_IDCT8_1D_SSE2(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
- TRANSPOSE8(%%xmm4, %%xmm1, %%xmm7, %%xmm3, %%xmm5, %%xmm0, %%xmm2, %%xmm6, (%1))
- "paddw %4, %%xmm4 \n"
- "movdqa %%xmm4, 0x00(%1) \n"
- "movdqa %%xmm2, 0x40(%1) \n"
- H264_IDCT8_1D_SSE2(%%xmm4, %%xmm0, %%xmm6, %%xmm3, %%xmm2, %%xmm5, %%xmm7, %%xmm1)
- "movdqa %%xmm6, 0x60(%1) \n"
- "movdqa %%xmm7, 0x70(%1) \n"
- "pxor %%xmm7, %%xmm7 \n"
- STORE_DIFF_8P(%%xmm2, (%0), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm0, (%0,%2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm1, (%0,%2,2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm3, (%0,%3), %%xmm6, %%xmm7)
- "lea (%0,%2,4), %0 \n"
- STORE_DIFF_8P(%%xmm5, (%0), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm4, (%0,%2), %%xmm6, %%xmm7)
- "movdqa 0x60(%1), %%xmm0 \n"
- "movdqa 0x70(%1), %%xmm1 \n"
- STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
- :"+r"(dst)
- :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride), "m"(ff_pw_32)
- );
-}
-
-static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-{
- int dc = (block[0] + 32) >> 6;
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "pshufw $0, %%mm0, %%mm0 \n\t"
- "pxor %%mm1, %%mm1 \n\t"
- "psubw %%mm0, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- ::"r"(dc)
- );
- __asm__ volatile(
- "movd %0, %%mm2 \n\t"
- "movd %1, %%mm3 \n\t"
- "movd %2, %%mm4 \n\t"
- "movd %3, %%mm5 \n\t"
- "paddusb %%mm0, %%mm2 \n\t"
- "paddusb %%mm0, %%mm3 \n\t"
- "paddusb %%mm0, %%mm4 \n\t"
- "paddusb %%mm0, %%mm5 \n\t"
- "psubusb %%mm1, %%mm2 \n\t"
- "psubusb %%mm1, %%mm3 \n\t"
- "psubusb %%mm1, %%mm4 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "movd %%mm2, %0 \n\t"
- "movd %%mm3, %1 \n\t"
- "movd %%mm4, %2 \n\t"
- "movd %%mm5, %3 \n\t"
- :"+m"(*(uint32_t*)(dst+0*stride)),
- "+m"(*(uint32_t*)(dst+1*stride)),
- "+m"(*(uint32_t*)(dst+2*stride)),
- "+m"(*(uint32_t*)(dst+3*stride))
- );
-}
-
-static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-{
- int dc = (block[0] + 32) >> 6;
- int y;
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "pshufw $0, %%mm0, %%mm0 \n\t"
- "pxor %%mm1, %%mm1 \n\t"
- "psubw %%mm0, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- ::"r"(dc)
- );
- for(y=2; y--; dst += 4*stride){
- __asm__ volatile(
- "movq %0, %%mm2 \n\t"
- "movq %1, %%mm3 \n\t"
- "movq %2, %%mm4 \n\t"
- "movq %3, %%mm5 \n\t"
- "paddusb %%mm0, %%mm2 \n\t"
- "paddusb %%mm0, %%mm3 \n\t"
- "paddusb %%mm0, %%mm4 \n\t"
- "paddusb %%mm0, %%mm5 \n\t"
- "psubusb %%mm1, %%mm2 \n\t"
- "psubusb %%mm1, %%mm3 \n\t"
- "psubusb %%mm1, %%mm4 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "movq %%mm2, %0 \n\t"
- "movq %%mm3, %1 \n\t"
- "movq %%mm4, %2 \n\t"
- "movq %%mm5, %3 \n\t"
- :"+m"(*(uint64_t*)(dst+0*stride)),
- "+m"(*(uint64_t*)(dst+1*stride)),
- "+m"(*(uint64_t*)(dst+2*stride)),
- "+m"(*(uint64_t*)(dst+3*stride))
- );
- }
-}
-
-//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
-static const uint8_t scan8[16 + 2*4]={
- 4+1*8, 5+1*8, 4+2*8, 5+2*8,
- 6+1*8, 7+1*8, 6+2*8, 7+2*8,
- 4+3*8, 5+3*8, 4+4*8, 5+4*8,
- 6+3*8, 7+3*8, 6+4*8, 7+4*8,
- 1+1*8, 2+1*8,
- 1+2*8, 2+2*8,
- 1+4*8, 2+4*8,
- 1+5*8, 2+5*8,
-};
-
-static void ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i++){
- if(nnzc[ scan8[i] ])
- ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
- }
-}
-
-static void ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i+=4){
- if(nnzc[ scan8[i] ])
- ff_h264_idct8_add_mmx(dst + block_offset[i], block + i*16, stride);
- }
-}
-
-
-static void ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i++){
- int nnz = nnzc[ scan8[i] ];
- if(nnz){
- if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
- else ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
- }
- }
-}
-
-static void ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i++){
- if(nnzc[ scan8[i] ] || block[i*16])
- ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
- }
-}
-
-static void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i++){
- if(nnzc[ scan8[i] ]) ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
- else if(block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
- }
-}
-
-static void ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i+=4){
- int nnz = nnzc[ scan8[i] ];
- if(nnz){
- if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
- else ff_h264_idct8_add_mmx (dst + block_offset[i], block + i*16, stride);
- }
- }
-}
-
-static void ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i+=4){
- int nnz = nnzc[ scan8[i] ];
- if(nnz){
- if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
- else ff_h264_idct8_add_sse2 (dst + block_offset[i], block + i*16, stride);
- }
- }
-}
-
-static void ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=16; i<16+8; i++){
- if(nnzc[ scan8[i] ] || block[i*16])
- ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
- }
-}
-
-static void ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=16; i<16+8; i++){
- if(nnzc[ scan8[i] ])
- ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
- else if(block[i*16])
- ff_h264_idct_dc_add_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
- }
-}
-
-#if CONFIG_GPL && HAVE_YASM
-static void ff_h264_idct_dc_add8_mmx2(uint8_t *dst, int16_t *block, int stride)
-{
- __asm__ volatile(
- "movd %0, %%mm0 \n\t" // 0 0 X D
- "punpcklwd %1, %%mm0 \n\t" // x X d D
- "paddsw %2, %%mm0 \n\t"
- "psraw $6, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t" // d d D D
- "pxor %%mm1, %%mm1 \n\t" // 0 0 0 0
- "psubw %%mm0, %%mm1 \n\t" // -d-d-D-D
- "packuswb %%mm1, %%mm0 \n\t" // -d-d-D-D d d D D
- "pshufw $0xFA, %%mm0, %%mm1 \n\t" // -d-d-d-d-D-D-D-D
- "punpcklwd %%mm0, %%mm0 \n\t" // d d d d D D D D
- ::"m"(block[ 0]),
- "m"(block[16]),
- "m"(ff_pw_32)
- );
- __asm__ volatile(
- "movq %0, %%mm2 \n\t"
- "movq %1, %%mm3 \n\t"
- "movq %2, %%mm4 \n\t"
- "movq %3, %%mm5 \n\t"
- "paddusb %%mm0, %%mm2 \n\t"
- "paddusb %%mm0, %%mm3 \n\t"
- "paddusb %%mm0, %%mm4 \n\t"
- "paddusb %%mm0, %%mm5 \n\t"
- "psubusb %%mm1, %%mm2 \n\t"
- "psubusb %%mm1, %%mm3 \n\t"
- "psubusb %%mm1, %%mm4 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "movq %%mm2, %0 \n\t"
- "movq %%mm3, %1 \n\t"
- "movq %%mm4, %2 \n\t"
- "movq %%mm5, %3 \n\t"
- :"+m"(*(uint64_t*)(dst+0*stride)),
- "+m"(*(uint64_t*)(dst+1*stride)),
- "+m"(*(uint64_t*)(dst+2*stride)),
- "+m"(*(uint64_t*)(dst+3*stride))
- );
-}
-
-extern void ff_x264_add8x4_idct_sse2(uint8_t *dst, int16_t *block, int stride);
-
-static void ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i+=2)
- if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
- ff_x264_add8x4_idct_sse2 (dst + block_offset[i], block + i*16, stride);
-}
-
-static void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=0; i<16; i+=2){
- if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
- ff_x264_add8x4_idct_sse2 (dst + block_offset[i], block + i*16, stride);
- else if(block[i*16]|block[i*16+16])
- ff_h264_idct_dc_add8_mmx2(dst + block_offset[i], block + i*16, stride);
- }
-}
-
-static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
- int i;
- for(i=16; i<16+8; i+=2){
- if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
- ff_x264_add8x4_idct_sse2 (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
- else if(block[i*16]|block[i*16+16])
- ff_h264_idct_dc_add8_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
- }
-}
-#endif
+void ff_h264_idct_add_mmx (uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_add_mmx (uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_add_sse2 (uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride);
+
+void ff_h264_idct_add16_mmx (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_mmx (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16_mmx2 (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_mmx2 (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_sse2 (uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_mmx (uint8_t **dest, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_mmx2 (uint8_t **dest, const int *block_offset,
+ DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+
+void ff_h264_idct_add16_sse2 (uint8_t *dst, const int *block_offset, DCTELEM *block,
+ int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block,
+ int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTELEM *block,
+ int stride, const uint8_t nnzc[6*8]);
/***********************************/
/* deblocking */
-// out: o = |x-y|>a
-// clobbers: t
-#define DIFF_GT_MMX(x,y,a,o,t)\
- "movq "#y", "#t" \n\t"\
- "movq "#x", "#o" \n\t"\
- "psubusb "#x", "#t" \n\t"\
- "psubusb "#y", "#o" \n\t"\
- "por "#t", "#o" \n\t"\
- "psubusb "#a", "#o" \n\t"
-
-// out: o = |x-y|>a
-// clobbers: t
-#define DIFF_GT2_MMX(x,y,a,o,t)\
- "movq "#y", "#t" \n\t"\
- "movq "#x", "#o" \n\t"\
- "psubusb "#x", "#t" \n\t"\
- "psubusb "#y", "#o" \n\t"\
- "psubusb "#a", "#t" \n\t"\
- "psubusb "#a", "#o" \n\t"\
- "pcmpeqb "#t", "#o" \n\t"\
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1
-// out: mm5=beta-1, mm7=mask
-// clobbers: mm4,mm6
-#define H264_DEBLOCK_MASK(alpha1, beta1) \
- "pshufw $0, "#alpha1", %%mm4 \n\t"\
- "pshufw $0, "#beta1 ", %%mm5 \n\t"\
- "packuswb %%mm4, %%mm4 \n\t"\
- "packuswb %%mm5, %%mm5 \n\t"\
- DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\
- DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\
- "por %%mm4, %%mm7 \n\t"\
- DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\
- "por %%mm4, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pcmpeqb %%mm6, %%mm7 \n\t"
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
-// out: mm1=p0' mm2=q0'
-// clobbers: mm0,3-6
-#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
- "movq %%mm1 , %%mm5 \n\t"\
- "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\
- "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\
- "pcmpeqb %%mm4 , %%mm4 \n\t"\
- "pxor %%mm4 , %%mm3 \n\t"\
- "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
- "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
- "pxor %%mm1 , %%mm4 \n\t"\
- "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
- "pavgb %%mm5 , %%mm3 \n\t"\
- "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\
- "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
- "psubusb %%mm3 , %%mm6 \n\t"\
- "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
- "pminub %%mm7 , %%mm6 \n\t"\
- "pminub %%mm7 , %%mm3 \n\t"\
- "psubusb %%mm6 , %%mm1 \n\t"\
- "psubusb %%mm3 , %%mm2 \n\t"\
- "paddusb %%mm3 , %%mm1 \n\t"\
- "paddusb %%mm6 , %%mm2 \n\t"
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=ff_bone
-// out: (q1addr) = av_clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
-// clobbers: q2, tmp, tc0
-#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\
- "movq %%mm1, "#tmp" \n\t"\
- "pavgb %%mm2, "#tmp" \n\t"\
- "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\
- "pxor "q2addr", "#tmp" \n\t"\
- "pand %9, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\
- "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\
- "movq "#p1", "#tmp" \n\t"\
- "psubusb "#tc0", "#tmp" \n\t"\
- "paddusb "#p1", "#tc0" \n\t"\
- "pmaxub "#tmp", "#q2" \n\t"\
- "pminub "#tc0", "#q2" \n\t"\
- "movq "#q2", "q1addr" \n\t"
-
-static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
-{
- DECLARE_ALIGNED(8, uint64_t, tmp0)[2];
-
- __asm__ volatile(
- "movq (%2,%4), %%mm0 \n\t" //p1
- "movq (%2,%4,2), %%mm1 \n\t" //p0
- "movq (%3), %%mm2 \n\t" //q0
- "movq (%3,%4), %%mm3 \n\t" //q1
- H264_DEBLOCK_MASK(%7, %8)
-
- "movd %6, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm4 \n\t"
- "punpcklwd %%mm4, %%mm4 \n\t"
- "pcmpeqb %%mm3, %%mm3 \n\t"
- "movq %%mm4, %%mm6 \n\t"
- "pcmpgtb %%mm3, %%mm4 \n\t"
- "movq %%mm6, %1 \n\t"
- "pand %%mm4, %%mm7 \n\t"
- "movq %%mm7, %0 \n\t"
-
- /* filter p1 */
- "movq (%2), %%mm3 \n\t" //p2
- DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1
- "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|<beta
- "pand %1, %%mm7 \n\t" // mask & tc0
- "movq %%mm7, %%mm4 \n\t"
- "psubb %%mm6, %%mm7 \n\t"
- "pand %%mm4, %%mm6 \n\t" // mask & |p2-p0|<beta & tc0
- H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%2)", "(%2,%4)", %%mm6, %%mm4)
-
- /* filter q1 */
- "movq (%3,%4,2), %%mm4 \n\t" //q2
- DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1
- "pand %0, %%mm6 \n\t"
- "movq %1, %%mm5 \n\t" // can be merged with the and below but is slower then
- "pand %%mm6, %%mm5 \n\t"
- "psubb %%mm6, %%mm7 \n\t"
- "movq (%3,%4), %%mm3 \n\t"
- H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%3,%4,2)", "(%3,%4)", %%mm5, %%mm6)
-
- /* filter p0, q0 */
- H264_DEBLOCK_P0_Q0(%9, unused)
- "movq %%mm1, (%2,%4,2) \n\t"
- "movq %%mm2, (%3) \n\t"
-
- : "=m"(tmp0[0]), "=m"(tmp0[1])
- : "r"(pix-3*stride), "r"(pix), "r"((x86_reg)stride),
- "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
- "m"(ff_bone)
- );
-}
-
-static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- if((tc0[0] & tc0[1]) >= 0)
- h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0);
- if((tc0[2] & tc0[3]) >= 0)
- h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2);
-}
-static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- // also, it only needs to transpose 6x8
- DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
- int i;
- for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
- if((tc0[0] & tc0[1]) < 0)
- continue;
- transpose4x4(trans, pix-4, 8, stride);
- transpose4x4(trans +4*8, pix, 8, stride);
- transpose4x4(trans+4, pix-4+4*stride, 8, stride);
- transpose4x4(trans+4+4*8, pix +4*stride, 8, stride);
- h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0);
- transpose4x4(pix-2, trans +2*8, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8);
- }
-}
-
-static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
-{
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t" //p1
- "movq (%0,%2), %%mm1 \n\t" //p0
- "movq (%1), %%mm2 \n\t" //q0
- "movq (%1,%2), %%mm3 \n\t" //q1
- H264_DEBLOCK_MASK(%4, %5)
- "movd %3, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm6 \n\t"
- "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask
- H264_DEBLOCK_P0_Q0(%6, %7)
- "movq %%mm1, (%0,%2) \n\t"
- "movq %%mm2, (%1) \n\t"
-
- :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
- "r"(*(uint32_t*)tc0),
- "m"(alpha1), "m"(beta1), "m"(ff_bone), "m"(ff_pb_3F)
- );
-}
-
-static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0);
-}
-
-static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
- transpose4x4(trans, pix-2, 8, stride);
- transpose4x4(trans+4, pix-2+4*stride, 8, stride);
- h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
- transpose4x4(pix-2, trans, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4, stride, 8);
-}
-
-// p0 = (p0 + q1 + 2*p1 + 2) >> 2
-#define H264_FILTER_CHROMA4(p0, p1, q1, one) \
- "movq "#p0", %%mm4 \n\t"\
- "pxor "#q1", %%mm4 \n\t"\
- "pand "#one", %%mm4 \n\t" /* mm4 = (p0^q1)&1 */\
- "pavgb "#q1", "#p0" \n\t"\
- "psubusb %%mm4, "#p0" \n\t"\
- "pavgb "#p1", "#p0" \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\
-
-static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
-{
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t"
- "movq (%0,%2), %%mm1 \n\t"
- "movq (%1), %%mm2 \n\t"
- "movq (%1,%2), %%mm3 \n\t"
- H264_DEBLOCK_MASK(%3, %4)
- "movq %%mm1, %%mm5 \n\t"
- "movq %%mm2, %%mm6 \n\t"
- H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0'
- H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0'
- "psubb %%mm5, %%mm1 \n\t"
- "psubb %%mm6, %%mm2 \n\t"
- "pand %%mm7, %%mm1 \n\t"
- "pand %%mm7, %%mm2 \n\t"
- "paddb %%mm5, %%mm1 \n\t"
- "paddb %%mm6, %%mm2 \n\t"
- "movq %%mm1, (%0,%2) \n\t"
- "movq %%mm2, (%1) \n\t"
- :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
- "m"(alpha1), "m"(beta1), "m"(ff_bone)
- );
-}
-
-static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
-{
- h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1);
-}
-
-static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
- transpose4x4(trans, pix-2, 8, stride);
- transpose4x4(trans+4, pix-2+4*stride, 8, stride);
- h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
- transpose4x4(pix-2, trans, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4, stride, 8);
-}
-
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
int dir;
@@ -917,1507 +188,162 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
);
}
-/***********************************/
-/* motion compensation */
-
-#define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\
- "mov"#q" "#C", "#T" \n\t"\
- "mov"#d" (%0), "#F" \n\t"\
- "paddw "#D", "#T" \n\t"\
- "psllw $2, "#T" \n\t"\
- "psubw "#B", "#T" \n\t"\
- "psubw "#E", "#T" \n\t"\
- "punpcklbw "#Z", "#F" \n\t"\
- "pmullw %4, "#T" \n\t"\
- "paddw %5, "#A" \n\t"\
- "add %2, %0 \n\t"\
- "paddw "#F", "#A" \n\t"\
- "paddw "#A", "#T" \n\t"\
- "psraw $5, "#T" \n\t"\
- "packuswb "#T", "#T" \n\t"\
- OP(T, (%1), A, d)\
- "add %3, %1 \n\t"
-
-#define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\
- "mov"#q" "#C", "#T" \n\t"\
- "mov"#d" (%0), "#F" \n\t"\
- "paddw "#D", "#T" \n\t"\
- "psllw $2, "#T" \n\t"\
- "paddw %4, "#A" \n\t"\
- "psubw "#B", "#T" \n\t"\
- "psubw "#E", "#T" \n\t"\
- "punpcklbw "#Z", "#F" \n\t"\
- "pmullw %3, "#T" \n\t"\
- "paddw "#F", "#A" \n\t"\
- "add %2, %0 \n\t"\
- "paddw "#A", "#T" \n\t"\
- "mov"#q" "#T", "#OF"(%1) \n\t"
-
-#define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q)
-#define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q)
-#define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa)
-#define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa)
-
-
-#define QPEL_H264(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=4;\
-\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq "MANGLE(ff_pw_5) ", %%mm4\n\t"\
- "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
- "1: \n\t"\
- "movd -1(%0), %%mm1 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "movd 1(%0), %%mm3 \n\t"\
- "movd 2(%0), %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "paddw %%mm0, %%mm1 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "movd -2(%0), %%mm0 \n\t"\
- "movd 3(%0), %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm3, %%mm0 \n\t"\
- "psllw $2, %%mm2 \n\t"\
- "psubw %%mm1, %%mm2 \n\t"\
- "pmullw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm0 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm6, d)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=4;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %0, %%mm4 \n\t"\
- "movq %1, %%mm5 \n\t"\
- :: "m"(ff_pw_5), "m"(ff_pw_16)\
- );\
- do{\
- __asm__ volatile(\
- "movd -1(%0), %%mm1 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "movd 1(%0), %%mm3 \n\t"\
- "movd 2(%0), %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "paddw %%mm0, %%mm1 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "movd -2(%0), %%mm0 \n\t"\
- "movd 3(%0), %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm3, %%mm0 \n\t"\
- "psllw $2, %%mm2 \n\t"\
- "psubw %%mm1, %%mm2 \n\t"\
- "pmullw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm0 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "movd (%2), %%mm3 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- PAVGB" %%mm3, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm6, d)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }while(--h);\
-}\
-static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- src -= 2*srcStride;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}\
-static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- int h=4;\
- int w=3;\
- src -= 2*srcStride+2;\
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
- \
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- tmp += 4;\
- src += 4 - 9*srcStride;\
- }\
- tmp -= 3*4;\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "paddw 10(%0), %%mm0 \n\t"\
- "movq 2(%0), %%mm1 \n\t"\
- "paddw 8(%0), %%mm1 \n\t"\
- "movq 4(%0), %%mm2 \n\t"\
- "paddw 6(%0), %%mm2 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
- "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
- "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
- "paddsw %%mm2, %%mm0 \n\t"\
- "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\
- "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\
- "psraw $6, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm7, d)\
- "add $24, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 1(%0), %%mm2 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "psllw $2, %%mm0 \n\t"\
- "psllw $2, %%mm1 \n\t"\
- "movq -1(%0), %%mm2 \n\t"\
- "movq 2(%0), %%mm4 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "movq %%mm4, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm3, %%mm5 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm5, %%mm1 \n\t"\
- "pmullw %%mm6, %%mm0 \n\t"\
- "pmullw %%mm6, %%mm1 \n\t"\
- "movd -2(%0), %%mm2 \n\t"\
- "movd 7(%0), %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
- "paddw %%mm5, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm4, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm5, q)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %0, %%mm6 \n\t"\
- :: "m"(ff_pw_5)\
- );\
- do{\
- __asm__ volatile(\
- "movq (%0), %%mm0 \n\t"\
- "movq 1(%0), %%mm2 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "psllw $2, %%mm0 \n\t"\
- "psllw $2, %%mm1 \n\t"\
- "movq -1(%0), %%mm2 \n\t"\
- "movq 2(%0), %%mm4 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "movq %%mm4, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm3, %%mm5 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm5, %%mm1 \n\t"\
- "pmullw %%mm6, %%mm0 \n\t"\
- "pmullw %%mm6, %%mm1 \n\t"\
- "movd -2(%0), %%mm2 \n\t"\
- "movd 7(%0), %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "movq %5, %%mm5 \n\t"\
- "paddw %%mm5, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm4, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "movq (%2), %%mm4 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- PAVGB" %%mm4, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm5, q)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
- : "memory"\
- );\
- }while(--h);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- int w= 2;\
- src -= 2*srcStride;\
- \
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(h==16){\
- __asm__ volatile(\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
- src += 4-(h+5)*srcStride;\
- dst += 4-h*dstStride;\
- }\
-}\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
- int w = (size+8)>>2;\
- src -= 2*srcStride+2;\
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\
- QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\
- QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(size==16){\
- __asm__ volatile(\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
- QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
- QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
- tmp += 4;\
- src += 4 - (size+5)*srcStride;\
- }\
-}\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
- int w = size>>4;\
- do{\
- int h = size;\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm3 \n\t"\
- "movq 2(%0), %%mm1 \n\t"\
- "movq 10(%0), %%mm4 \n\t"\
- "paddw %%mm4, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "paddw 18(%0), %%mm3 \n\t"\
- "paddw 16(%0), %%mm4 \n\t"\
- "movq 4(%0), %%mm2 \n\t"\
- "movq 12(%0), %%mm5 \n\t"\
- "paddw 6(%0), %%mm2 \n\t"\
- "paddw 14(%0), %%mm5 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"\
- "psubw %%mm4, %%mm3 \n\t"\
- "psraw $2, %%mm0 \n\t"\
- "psraw $2, %%mm3 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"\
- "psubw %%mm4, %%mm3 \n\t"\
- "paddsw %%mm2, %%mm0 \n\t"\
- "paddsw %%mm5, %%mm3 \n\t"\
- "psraw $2, %%mm0 \n\t"\
- "psraw $2, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm5, %%mm3 \n\t"\
- "psraw $6, %%mm0 \n\t"\
- "psraw $6, %%mm3 \n\t"\
- "packuswb %%mm3, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm7, q)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- tmp += 8 - size*24;\
- dst += 8 - size*dstStride;\
- }while(w--);\
-}\
-\
-static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}\
-\
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
- src += 8*srcStride;\
- dst += 8*dstStride;\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
- src += 8*dstStride;\
- dst += 8*dstStride;\
- src2 += 8*src2Stride;\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
- put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
- OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
-}\
-static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
-}\
-\
-static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
-}\
-\
-static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- __asm__ volatile(\
- "movq (%1), %%mm0 \n\t"\
- "movq 24(%1), %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- "packuswb %%mm1, %%mm1 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm1 \n\t"\
- OP(%%mm0, (%2), %%mm4, d)\
- OP(%%mm1, (%2,%4), %%mm5, d)\
- "lea (%0,%3,2), %0 \n\t"\
- "lea (%2,%4,2), %2 \n\t"\
- "movq 48(%1), %%mm0 \n\t"\
- "movq 72(%1), %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- "packuswb %%mm1, %%mm1 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm1 \n\t"\
- OP(%%mm0, (%2), %%mm4, d)\
- OP(%%mm1, (%2,%4), %%mm5, d)\
- :"+a"(src8), "+c"(src16), "+d"(dst)\
- :"S"((x86_reg)src8Stride), "D"((x86_reg)dstStride)\
- :"memory");\
-}\
-static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- do{\
- __asm__ volatile(\
- "movq (%1), %%mm0 \n\t"\
- "movq 8(%1), %%mm1 \n\t"\
- "movq 48(%1), %%mm2 \n\t"\
- "movq 8+48(%1), %%mm3 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "psraw $5, %%mm2 \n\t"\
- "psraw $5, %%mm3 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- "packuswb %%mm3, %%mm2 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm2 \n\t"\
- OP(%%mm0, (%2), %%mm5, q)\
- OP(%%mm2, (%2,%4), %%mm5, q)\
- ::"a"(src8), "c"(src16), "d"(dst),\
- "r"((x86_reg)src8Stride), "r"((x86_reg)dstStride)\
- :"memory");\
- src8 += 2L*src8Stride;\
- src16 += 48;\
- dst += 2L*dstStride;\
- }while(h-=2);\
-}\
-static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
- OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
-}\
-
-
-#if ARCH_X86_64
-#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=16;\
- __asm__ volatile(\
- "pxor %%xmm15, %%xmm15 \n\t"\
- "movdqa %6, %%xmm14 \n\t"\
- "movdqa %7, %%xmm13 \n\t"\
- "1: \n\t"\
- "lddqu 6(%0), %%xmm1 \n\t"\
- "lddqu -2(%0), %%xmm7 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm15, %%xmm1 \n\t"\
- "punpcklbw %%xmm15, %%xmm0 \n\t"\
- "punpcklbw %%xmm15, %%xmm7 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm0, %%xmm6 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm0, %%xmm8 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm0, %%xmm9 \n\t"\
- "movdqa %%xmm0, %%xmm12 \n\t"\
- "movdqa %%xmm1, %%xmm11 \n\t"\
- "palignr $10,%%xmm0, %%xmm11\n\t"\
- "palignr $10,%%xmm7, %%xmm12\n\t"\
- "palignr $2, %%xmm0, %%xmm4 \n\t"\
- "palignr $2, %%xmm7, %%xmm9 \n\t"\
- "palignr $4, %%xmm0, %%xmm3 \n\t"\
- "palignr $4, %%xmm7, %%xmm8 \n\t"\
- "palignr $6, %%xmm0, %%xmm2 \n\t"\
- "palignr $6, %%xmm7, %%xmm6 \n\t"\
- "paddw %%xmm0 ,%%xmm11 \n\t"\
- "palignr $8, %%xmm0, %%xmm1 \n\t"\
- "palignr $8, %%xmm7, %%xmm0 \n\t"\
- "paddw %%xmm12,%%xmm7 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm8, %%xmm6 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm9, %%xmm0 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "psllw $2, %%xmm6 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "psubw %%xmm0, %%xmm6 \n\t"\
- "paddw %%xmm13,%%xmm11 \n\t"\
- "paddw %%xmm13,%%xmm7 \n\t"\
- "pmullw %%xmm14,%%xmm2 \n\t"\
- "pmullw %%xmm14,%%xmm6 \n\t"\
- "lddqu (%2), %%xmm3 \n\t"\
- "paddw %%xmm11,%%xmm2 \n\t"\
- "paddw %%xmm7, %%xmm6 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "psraw $5, %%xmm6 \n\t"\
- "packuswb %%xmm2,%%xmm6 \n\t"\
- "pavgb %%xmm3, %%xmm6 \n\t"\
- OP(%%xmm6, (%1), %%xmm4, dqa)\
- "add %5, %0 \n\t"\
- "add %5, %1 \n\t"\
- "add %4, %2 \n\t"\
- "decl %3 \n\t"\
- "jg 1b \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}
-#else // ARCH_X86_64
-#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
- src += 8*dstStride;\
- dst += 8*dstStride;\
- src2 += 8*src2Stride;\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
-}
-#endif // ARCH_X86_64
-
-#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movdqa %0, %%xmm6 \n\t"\
- :: "m"(ff_pw_5)\
- );\
- do{\
- __asm__ volatile(\
- "lddqu -2(%0), %%xmm1 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $2, %%xmm0, %%xmm4 \n\t"\
- "palignr $4, %%xmm0, %%xmm3 \n\t"\
- "palignr $6, %%xmm0, %%xmm2 \n\t"\
- "palignr $8, %%xmm0, %%xmm1 \n\t"\
- "palignr $10,%%xmm0, %%xmm5 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "movq (%2), %%xmm3 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "paddw %5, %%xmm0 \n\t"\
- "pmullw %%xmm6, %%xmm2 \n\t"\
- "paddw %%xmm0, %%xmm2 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "packuswb %%xmm2, %%xmm2 \n\t"\
- "pavgb %%xmm3, %%xmm2 \n\t"\
- OP(%%xmm2, (%1), %%xmm4, q)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
- : "memory"\
- );\
- }while(--h);\
-}\
-QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
- "1: \n\t"\
- "lddqu -2(%0), %%xmm1 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $2, %%xmm0, %%xmm4 \n\t"\
- "palignr $4, %%xmm0, %%xmm3 \n\t"\
- "palignr $6, %%xmm0, %%xmm2 \n\t"\
- "palignr $8, %%xmm0, %%xmm1 \n\t"\
- "palignr $10,%%xmm0, %%xmm5 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
- "pmullw %%xmm6, %%xmm2 \n\t"\
- "paddw %%xmm0, %%xmm2 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "packuswb %%xmm2, %%xmm2 \n\t"\
- OP(%%xmm2, (%1), %%xmm4, q)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
- src += 8*srcStride;\
- dst += 8*dstStride;\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-}\
-
-#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- src -= 2*srcStride;\
- \
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movq (%0), %%xmm0 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm1 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm2 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm3 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "punpcklbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm2 \n\t"\
- "punpcklbw %%xmm7, %%xmm3 \n\t"\
- "punpcklbw %%xmm7, %%xmm4 \n\t"\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
- QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(h==16){\
- __asm__ volatile(\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
- QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
-}\
-static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}
-
-static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
- int w = (size+8)>>3;
- src -= 2*srcStride+2;
- while(w--){
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7 \n\t"
- "movq (%0), %%xmm0 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm1 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm2 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm3 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm4 \n\t"
- "add %2, %0 \n\t"
- "punpcklbw %%xmm7, %%xmm0 \n\t"
- "punpcklbw %%xmm7, %%xmm1 \n\t"
- "punpcklbw %%xmm7, %%xmm2 \n\t"
- "punpcklbw %%xmm7, %%xmm3 \n\t"
- "punpcklbw %%xmm7, %%xmm4 \n\t"
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 0*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 1*48)
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 2*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 3*48)
- QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 4*48)
- QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
- : "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
- : "memory"
- );
- if(size==16){
- __asm__ volatile(
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
- QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
- QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48)
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48)
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
- : "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
- : "memory"
- );
- }
- tmp += 8;
- src += 8 - (size+5)*srcStride;
- }
-}
-
-#define QPEL_H264_HV2_XMM(OPNAME, OP, MMX)\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
- int h = size;\
- if(size == 16){\
- __asm__ volatile(\
- "1: \n\t"\
- "movdqa 32(%0), %%xmm4 \n\t"\
- "movdqa 16(%0), %%xmm5 \n\t"\
- "movdqa (%0), %%xmm7 \n\t"\
- "movdqa %%xmm4, %%xmm3 \n\t"\
- "movdqa %%xmm4, %%xmm2 \n\t"\
- "movdqa %%xmm4, %%xmm1 \n\t"\
- "movdqa %%xmm4, %%xmm0 \n\t"\
- "palignr $10, %%xmm5, %%xmm0 \n\t"\
- "palignr $8, %%xmm5, %%xmm1 \n\t"\
- "palignr $6, %%xmm5, %%xmm2 \n\t"\
- "palignr $4, %%xmm5, %%xmm3 \n\t"\
- "palignr $2, %%xmm5, %%xmm4 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "movdqa %%xmm5, %%xmm6 \n\t"\
- "movdqa %%xmm5, %%xmm4 \n\t"\
- "movdqa %%xmm5, %%xmm3 \n\t"\
- "palignr $8, %%xmm7, %%xmm4 \n\t"\
- "palignr $2, %%xmm7, %%xmm6 \n\t"\
- "palignr $10, %%xmm7, %%xmm3 \n\t"\
- "paddw %%xmm6, %%xmm4 \n\t"\
- "movdqa %%xmm5, %%xmm6 \n\t"\
- "palignr $6, %%xmm7, %%xmm5 \n\t"\
- "palignr $4, %%xmm7, %%xmm6 \n\t"\
- "paddw %%xmm7, %%xmm3 \n\t"\
- "paddw %%xmm6, %%xmm5 \n\t"\
- \
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psubw %%xmm4, %%xmm3 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psraw $2, %%xmm3 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psubw %%xmm4, %%xmm3 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "paddw %%xmm5, %%xmm3 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psraw $2, %%xmm3 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "paddw %%xmm5, %%xmm3 \n\t"\
- "psraw $6, %%xmm0 \n\t"\
- "psraw $6, %%xmm3 \n\t"\
- "packuswb %%xmm0, %%xmm3 \n\t"\
- OP(%%xmm3, (%1), %%xmm7, dqa)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }else{\
- __asm__ volatile(\
- "1: \n\t"\
- "movdqa 16(%0), %%xmm1 \n\t"\
- "movdqa (%0), %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $10, %%xmm0, %%xmm5 \n\t"\
- "palignr $8, %%xmm0, %%xmm4 \n\t"\
- "palignr $6, %%xmm0, %%xmm3 \n\t"\
- "palignr $4, %%xmm0, %%xmm2 \n\t"\
- "palignr $2, %%xmm0, %%xmm1 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "psraw $6, %%xmm0 \n\t"\
- "packuswb %%xmm0, %%xmm0 \n\t"\
- OP(%%xmm0, (%1), %%xmm7, q)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }\
-}
-
-#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
- put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
- OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
-}\
-static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
-}\
-static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
-}\
-
-#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2
-#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2
-#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2
-#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2
-#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2
-#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2
-#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2
-#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2
-
-#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2
-#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2
-#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2
-#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2
-#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2
-#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2
-#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2
-#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2
-
-#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2
-#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2
-#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2
-#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2
-
-#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
-#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
-#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
-#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
-
-#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2
-#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2
-
-#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
-H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
-
-static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
- put_pixels16_sse2(dst, src, stride, 16);
-}
-static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
- avg_pixels16_sse2(dst, src, stride, 16);
-}
-#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2
-#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2
-
-#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
-}\
-
-#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
-}\
-
-#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
-}\
-
-#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*SIZE];\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint16_t, temp)[SIZE*(SIZE<8?12:24)];\
- OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
-}\
-
-#define H264_MC_4816(MMX)\
-H264_MC(put_, 4, MMX, 8)\
-H264_MC(put_, 8, MMX, 8)\
-H264_MC(put_, 16,MMX, 8)\
-H264_MC(avg_, 4, MMX, 8)\
-H264_MC(avg_, 8, MMX, 8)\
-H264_MC(avg_, 16,MMX, 8)\
-
-#define H264_MC_816(QPEL, XMM)\
-QPEL(put_, 8, XMM, 16)\
-QPEL(put_, 16,XMM, 16)\
-QPEL(avg_, 8, XMM, 16)\
-QPEL(avg_, 16,XMM, 16)\
-
-
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
-#define PAVGB "pavgusb"
-QPEL_H264(put_, PUT_OP, 3dnow)
-QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
-#undef PAVGB
-#define PAVGB "pavgb"
-QPEL_H264(put_, PUT_OP, mmx2)
-QPEL_H264(avg_, AVG_MMX2_OP, mmx2)
-QPEL_H264_V_XMM(put_, PUT_OP, sse2)
-QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2)
-QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
-QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2)
-#if HAVE_SSSE3
-QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3)
-QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3)
-QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
-#endif
-#undef PAVGB
-
-H264_MC_4816(3dnow)
-H264_MC_4816(mmx2)
-H264_MC_816(H264_MC_V, sse2)
-H264_MC_816(H264_MC_HV, sse2)
-#if HAVE_SSSE3
-H264_MC_816(H264_MC_H, ssse3)
-H264_MC_816(H264_MC_HV, ssse3)
-#endif
-
-/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
-DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
- 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
-};
-
-#define H264_CHROMA_OP(S,D)
-#define H264_CHROMA_OP4(S,D,T)
-#define H264_CHROMA_MC8_TMPL put_h264_chroma_generic_mc8_mmx
-#define H264_CHROMA_MC4_TMPL put_h264_chroma_generic_mc4_mmx
-#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
-#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
-#include "dsputil_h264_template_mmx.c"
-
-static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-static void put_vc1_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg+2);
-}
-static void put_h264_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC2_TMPL
-#undef H264_CHROMA_MC8_MV0
+#define LF_FUNC(DIR, TYPE, OPT) \
+void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
+ int alpha, int beta, int8_t *tc0);
+#define LF_IFUNC(DIR, TYPE, OPT) \
+void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
+ int alpha, int beta);
-#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
-#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
- "pavgb " #T ", " #D " \n\t"
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_mmx2
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_mmx2
-#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
-#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
-#include "dsputil_h264_template_mmx.c"
-static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-static void avg_vc1_chroma_mc8_mmx2_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg+2);
-}
-static void avg_h264_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC2_TMPL
-#undef H264_CHROMA_MC8_MV0
+LF_FUNC (h, chroma, mmxext)
+LF_IFUNC(h, chroma_intra, mmxext)
+LF_FUNC (v, chroma, mmxext)
+LF_IFUNC(v, chroma_intra, mmxext)
-#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
-#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
- "pavgusb " #T ", " #D " \n\t"
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_3dnow
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_3dnow
-#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
-#include "dsputil_h264_template_mmx.c"
-static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-static void avg_h264_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+LF_FUNC (h, luma, mmxext)
+LF_IFUNC(h, luma_intra, mmxext)
+#if HAVE_YASM && ARCH_X86_32
+LF_FUNC (v8, luma, mmxext)
+static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
- avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
-}
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
-
-#if HAVE_SSSE3
-#define AVG_OP(X)
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_ssse3
-#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_ssse3
-#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
-#include "dsputil_h264_template_ssse3.c"
-static void put_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
-}
-static void put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
-}
-
-#undef AVG_OP
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
-#define AVG_OP(X) X
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_ssse3
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_ssse3
-#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
-#include "dsputil_h264_template_ssse3.c"
-static void avg_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
+ if((tc0[0] & tc0[1]) >= 0)
+ ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0);
+ if((tc0[2] & tc0[3]) >= 0)
+ ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2);
}
-static void avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+LF_IFUNC(v8, luma_intra, mmxext)
+static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
{
- avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
+ ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
+ ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
}
-#undef AVG_OP
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
#endif
+LF_FUNC (h, luma, sse2)
+LF_IFUNC(h, luma_intra, sse2)
+LF_FUNC (v, luma, sse2)
+LF_IFUNC(v, luma_intra, sse2)
+
/***********************************/
/* weighted prediction */
-static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h)
+#define H264_WEIGHT(W, H, OPT) \
+void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
+ int stride, int log2_denom, int weight, int offset);
+
+#define H264_BIWEIGHT(W, H, OPT) \
+void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \
+ uint8_t *src, int stride, int log2_denom, int weightd, \
+ int weights, int offset);
+
+#define H264_BIWEIGHT_MMX(W,H) \
+H264_WEIGHT (W, H, mmx2) \
+H264_BIWEIGHT(W, H, mmx2)
+
+#define H264_BIWEIGHT_MMX_SSE(W,H) \
+H264_BIWEIGHT_MMX(W, H) \
+H264_WEIGHT (W, H, sse2) \
+H264_BIWEIGHT (W, H, sse2) \
+H264_BIWEIGHT (W, H, ssse3)
+
+H264_BIWEIGHT_MMX_SSE(16, 16)
+H264_BIWEIGHT_MMX_SSE(16, 8)
+H264_BIWEIGHT_MMX_SSE( 8, 16)
+H264_BIWEIGHT_MMX_SSE( 8, 8)
+H264_BIWEIGHT_MMX_SSE( 8, 4)
+H264_BIWEIGHT_MMX ( 4, 8)
+H264_BIWEIGHT_MMX ( 4, 4)
+H264_BIWEIGHT_MMX ( 4, 2)
+
+void ff_h264dsp_init_x86(H264DSPContext *c)
{
- int x, y;
- offset <<= log2_denom;
- offset += (1 << log2_denom) >> 1;
- __asm__ volatile(
- "movd %0, %%mm4 \n\t"
- "movd %1, %%mm5 \n\t"
- "movd %2, %%mm6 \n\t"
- "pshufw $0, %%mm4, %%mm4 \n\t"
- "pshufw $0, %%mm5, %%mm5 \n\t"
- "pxor %%mm7, %%mm7 \n\t"
- :: "g"(weight), "g"(offset), "g"(log2_denom)
- );
- for(y=0; y<h; y+=2){
- for(x=0; x<w; x+=4){
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "movd %1, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm4, %%mm1 \n\t"
- "paddsw %%mm5, %%mm0 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
- "psraw %%mm6, %%mm0 \n\t"
- "psraw %%mm6, %%mm1 \n\t"
- "packuswb %%mm7, %%mm0 \n\t"
- "packuswb %%mm7, %%mm1 \n\t"
- "movd %%mm0, %0 \n\t"
- "movd %%mm1, %1 \n\t"
- : "+m"(*(uint32_t*)(dst+x)),
- "+m"(*(uint32_t*)(dst+x+stride))
- );
- }
- dst += 2*stride;
- }
-}
+ int mm_flags = av_get_cpu_flags();
-static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset, int w, int h)
-{
- int x, y;
- offset = ((offset + 1) | 1) << log2_denom;
- __asm__ volatile(
- "movd %0, %%mm3 \n\t"
- "movd %1, %%mm4 \n\t"
- "movd %2, %%mm5 \n\t"
- "movd %3, %%mm6 \n\t"
- "pshufw $0, %%mm3, %%mm3 \n\t"
- "pshufw $0, %%mm4, %%mm4 \n\t"
- "pshufw $0, %%mm5, %%mm5 \n\t"
- "pxor %%mm7, %%mm7 \n\t"
- :: "g"(weightd), "g"(weights), "g"(offset), "g"(log2_denom+1)
- );
- for(y=0; y<h; y++){
- for(x=0; x<w; x+=4){
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "movd %1, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm3, %%mm0 \n\t"
- "pmullw %%mm4, %%mm1 \n\t"
- "paddsw %%mm1, %%mm0 \n\t"
- "paddsw %%mm5, %%mm0 \n\t"
- "psraw %%mm6, %%mm0 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "movd %%mm0, %0 \n\t"
- : "+m"(*(uint32_t*)(dst+x))
- : "m"(*(uint32_t*)(src+x))
- );
- }
- src += stride;
- dst += stride;
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
+ c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
}
-}
-
-#define H264_WEIGHT(W,H) \
-static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
- ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
-} \
-static void ff_h264_weight_ ## W ## x ## H ## _mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
- ff_h264_weight_WxH_mmx2(dst, stride, log2_denom, weight, offset, W, H); \
-}
-
-H264_WEIGHT(16,16)
-H264_WEIGHT(16, 8)
-H264_WEIGHT( 8,16)
-H264_WEIGHT( 8, 8)
-H264_WEIGHT( 8, 4)
-H264_WEIGHT( 4, 8)
-H264_WEIGHT( 4, 4)
-H264_WEIGHT( 4, 2)
-
-void ff_h264_biweight_8x8_sse2(uint8_t *dst, uint8_t *src, int stride,
- int log2_denom, int weightd, int weights,
- int offset);
-
-void ff_h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
- int log2_denom, int weightd, int weights,
- int offset);
-
-void ff_h264_biweight_8x8_ssse3(uint8_t *dst, uint8_t *src, int stride,
- int log2_denom, int weightd, int weights,
- int offset);
-
-void ff_h264_biweight_16x16_ssse3(uint8_t *dst, uint8_t *src, int stride,
- int log2_denom, int weightd, int weights,
- int offset);
-
-void ff_pred16x16_vertical_mmx (uint8_t *src, int stride);
-void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
-void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
-void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
-void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
-void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
-void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
-void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
-void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
-void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-
-#if CONFIG_H264PRED
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
-{
- int mm_flags = mm_support();
-
#if HAVE_YASM
- if (mm_flags & FF_MM_MMX) {
- h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
- h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx;
- }
- }
-
- if (mm_flags & FF_MM_MMX2) {
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext;
- h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext;
- }
- }
-
- if (mm_flags & FF_MM_SSE) {
- h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
- }
-
- if (mm_flags & FF_MM_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- }
- }
-
- if (mm_flags & FF_MM_SSSE3) {
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
- if (codec_id == CODEC_ID_VP8) {
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
+ if (mm_flags & AV_CPU_FLAG_MMX) {
+ c->h264_idct_dc_add=
+ c->h264_idct_add= ff_h264_idct_add_mmx;
+ c->h264_idct8_dc_add=
+ c->h264_idct8_add= ff_h264_idct8_add_mmx;
+
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
+ c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+ c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+
+ c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext;
+ c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext;
+ c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext;
+ c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext;
+#if ARCH_X86_32
+ c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext;
+ c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext;
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
+#endif
+ c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
+ c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
+ c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
+ c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
+ c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
+ c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
+ c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
+ c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
+
+ c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
+ c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
+ c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
+ c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
+ c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
+ c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
+ c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
+ c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
+
+ if (mm_flags&AV_CPU_FLAG_SSE2) {
+ c->h264_idct8_add = ff_h264_idct8_add_sse2;
+ c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+
+ c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
+ c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
+ c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
+ c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
+ c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
+
+ c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
+ c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
+ c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
+ c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
+ c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
+
+#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
+ c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
+ c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
+#endif
+ c->h264_idct_add16 = ff_h264_idct_add16_sse2;
+ c->h264_idct_add8 = ff_h264_idct_add8_sse2;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
+ }
+ if (mm_flags&AV_CPU_FLAG_SSSE3) {
+ c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
+ c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
+ c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
+ c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
+ c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
+ }
}
}
#endif
}
-#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_sse2_xvid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_sse2_xvid.c
index fc670e25d..d8a534240 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_sse2_xvid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_sse2_xvid.c
@@ -385,11 +385,11 @@ inline void ff_idct_xvid_sse2(short *block)
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
{
ff_idct_xvid_sse2(block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
{
ff_idct_xvid_sse2(block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
index 75ec4b2cf..f3d0eb336 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mpegvideo_mmx.c
@@ -22,6 +22,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
@@ -625,9 +626,9 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
void MPV_common_init_mmx(MpegEncContext *s)
{
- int mm_flags = mm_support();
+ int mm_flags = av_get_cpu_flags();
- if (mm_flags & FF_MM_MMX) {
+ if (mm_flags & AV_CPU_FLAG_MMX) {
const int dct_algo = s->avctx->dct_algo;
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
@@ -638,7 +639,7 @@ void MPV_common_init_mmx(MpegEncContext *s)
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
- if (mm_flags & FF_MM_SSE2) {
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
s->denoise_dct= denoise_dct_sse2;
} else {
s->denoise_dct= denoise_dct_mmx;
@@ -646,13 +647,13 @@ void MPV_common_init_mmx(MpegEncContext *s)
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
#if HAVE_SSSE3
- if(mm_flags & FF_MM_SSSE3){
+ if(mm_flags & AV_CPU_FLAG_SSSE3){
s->dct_quantize= dct_quantize_SSSE3;
} else
#endif
- if(mm_flags & FF_MM_SSE2){
+ if(mm_flags & AV_CPU_FLAG_SSE2){
s->dct_quantize= dct_quantize_SSE2;
- } else if(mm_flags & FF_MM_MMX2){
+ } else if(mm_flags & AV_CPU_FLAG_MMX2){
s->dct_quantize= dct_quantize_MMX2;
} else {
s->dct_quantize= dct_quantize_MMX;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/simple_idct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/simple_idct_mmx.c
index e32b8f0b4..8ad0d3192 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/simple_idct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/simple_idct_mmx.c
@@ -1287,10 +1287,10 @@ void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
{
idct(block);
- put_pixels_clamped_mmx(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
{
idct(block);
- add_pixels_clamped_mmx(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
index eb3ad2c32..8889bb36e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_mmx.c
@@ -24,6 +24,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
@@ -714,7 +715,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
#endif
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
- int mm_flags = mm_support();
+ int mm_flags = av_get_cpu_flags();
dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
@@ -736,7 +737,7 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
- if (mm_flags & FF_MM_MMX2){
+ if (mm_flags & AV_CPU_FLAG_MMX2){
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
@@ -772,23 +773,23 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
#if HAVE_YASM
- if (mm_flags & FF_MM_MMX) {
+ if (mm_flags & AV_CPU_FLAG_MMX) {
ASSIGN_LF(mmx);
}
return;
- if (mm_flags & FF_MM_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_LF(mmx2);
}
- if (mm_flags & FF_MM_SSE2) {
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
}
- if (mm_flags & FF_MM_SSSE3) {
+ if (mm_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_LF(ssse3);
}
- if (mm_flags & FF_MM_SSE4) {
+ if (mm_flags & AV_CPU_FLAG_SSE4) {
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_yasm.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_yasm.asm
index 660ff1169..3ea9d8db4 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_yasm.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vc1dsp_yasm.asm
@@ -36,7 +36,7 @@ section .text
%endmacro
%macro STORE_4_WORDS_MMX 6
- movd %6, %5
+ movd %6d, %5
%if mmsize==16
psrldq %5, 4
%else
@@ -45,7 +45,7 @@ section .text
mov %1, %6w
shr %6, 16
mov %2, %6w
- movd %6, %5
+ movd %6d, %5
mov %3, %6w
shr %6, 16
mov %4, %6w
@@ -88,7 +88,7 @@ section .text
pxor m7, m3 ; d_sign ^= a0_sign
pxor m5, m5
- movd m3, r2
+ movd m3, r2d
%if %1 > 4
punpcklbw m3, m3
%endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp.asm
new file mode 100644
index 000000000..f2b0af326
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp.asm
@@ -0,0 +1,618 @@
+;******************************************************************************
+;* MMX/SSE2-optimized functions for the VP3 decoder
+;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+; MMX-optimized functions cribbed from the original VP3 source code.
+
+SECTION_RODATA
+
+vp3_idct_data: times 8 dw 64277
+ times 8 dw 60547
+ times 8 dw 54491
+ times 8 dw 46341
+ times 8 dw 36410
+ times 8 dw 25080
+ times 8 dw 12785
+
+cextern pb_1
+cextern pb_3
+cextern pb_7
+cextern pb_1F
+cextern pb_81
+
+cextern pw_8
+
+cextern put_signed_pixels_clamped_mmx
+cextern add_pixels_clamped_mmx
+
+SECTION .text
+
+; this is off by one or two for some cases when filter_limit is greater than 63
+; in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1
+; out: p1 in mm4, p2 in mm3
+%macro VP3_LOOP_FILTER 0
+ movq m7, m6
+ pand m6, [pb_7] ; p0&7
+ psrlw m7, 3
+ pand m7, [pb_1F] ; p0>>3
+ movq m3, m2 ; p2
+ pxor m2, m4
+ pand m2, [pb_1] ; (p2^p1)&1
+ movq m5, m2
+ paddb m2, m2
+ paddb m2, m5 ; 3*(p2^p1)&1
+ paddb m2, m6 ; extra bits lost in shifts
+ pcmpeqb m0, m0
+ pxor m1, m0 ; 255 - p3
+ pavgb m1, m2 ; (256 - p3 + extrabits) >> 1
+ pxor m0, m4 ; 255 - p1
+ pavgb m0, m3 ; (256 + p2-p1) >> 1
+ paddb m1, [pb_3]
+ pavgb m1, m0 ; 128+2+( p2-p1 - p3) >> 2
+ pavgb m1, m0 ; 128+1+(3*(p2-p1) - p3) >> 3
+ paddusb m7, m1 ; d+128+1
+ movq m6, [pb_81]
+ psubusb m6, m7
+ psubusb m7, [pb_81]
+
+ movq m5, [r2+516] ; flim
+ pminub m6, m5
+ pminub m7, m5
+ movq m0, m6
+ movq m1, m7
+ paddb m6, m6
+ paddb m7, m7
+ pminub m6, m5
+ pminub m7, m5
+ psubb m6, m0
+ psubb m7, m1
+ paddusb m4, m7
+ psubusb m4, m6
+ psubusb m3, m7
+ paddusb m3, m6
+%endmacro
+
+%macro STORE_4_WORDS 1
+ movd r2d, %1
+ mov [r0 -1], r2w
+ psrlq %1, 32
+ shr r2, 16
+ mov [r0+r1 -1], r2w
+ movd r2d, %1
+ mov [r0+r1*2-1], r2w
+ shr r2, 16
+ mov [r0+r3 -1], r2w
+%endmacro
+
+INIT_MMX
+cglobal vp3_v_loop_filter_mmx2, 3, 4
+%ifdef ARCH_X86_64
+ movsxd r1, r1d
+%endif
+ mov r3, r1
+ neg r1
+ movq m6, [r0+r1*2]
+ movq m4, [r0+r1 ]
+ movq m2, [r0 ]
+ movq m1, [r0+r3 ]
+
+ VP3_LOOP_FILTER
+
+ movq [r0+r1], m4
+ movq [r0 ], m3
+ RET
+
+cglobal vp3_h_loop_filter_mmx2, 3, 4
+%ifdef ARCH_X86_64
+ movsxd r1, r1d
+%endif
+ lea r3, [r1*3]
+
+ movd m6, [r0 -2]
+ movd m4, [r0+r1 -2]
+ movd m2, [r0+r1*2-2]
+ movd m1, [r0+r3 -2]
+ lea r0, [r0+r1*4 ]
+ punpcklbw m6, [r0 -2]
+ punpcklbw m4, [r0+r1 -2]
+ punpcklbw m2, [r0+r1*2-2]
+ punpcklbw m1, [r0+r3 -2]
+ sub r0, r3
+ sub r0, r1
+
+ TRANSPOSE4x4B 6, 4, 2, 1, 0
+ VP3_LOOP_FILTER
+ SBUTTERFLY bw, 4, 3, 5
+
+ STORE_4_WORDS m4
+ lea r0, [r0+r1*4 ]
+ STORE_4_WORDS m3
+ RET
+
+; from original comments: The Macro does IDct on 4 1-D Dcts
+%macro BeginIDCT 0
+ movq m2, I(3)
+ movq m6, C(3)
+ movq m4, m2
+ movq m7, J(5)
+ pmulhw m4, m6 ; r4 = c3*i3 - i3
+ movq m1, C(5)
+ pmulhw m6, m7 ; r6 = c3*i5 - i5
+ movq m5, m1
+ pmulhw m1, m2 ; r1 = c5*i3 - i3
+ movq m3, I(1)
+ pmulhw m5, m7 ; r5 = c5*i5 - i5
+ movq m0, C(1)
+ paddw m4, m2 ; r4 = c3*i3
+ paddw m6, m7 ; r6 = c3*i5
+ paddw m2, m1 ; r2 = c5*i3
+ movq m1, J(7)
+ paddw m7, m5 ; r7 = c5*i5
+ movq m5, m0 ; r5 = c1
+ pmulhw m0, m3 ; r0 = c1*i1 - i1
+ paddsw m4, m7 ; r4 = C = c3*i3 + c5*i5
+ pmulhw m5, m1 ; r5 = c1*i7 - i7
+ movq m7, C(7)
+ psubsw m6, m2 ; r6 = D = c3*i5 - c5*i3
+ paddw m0, m3 ; r0 = c1*i1
+ pmulhw m3, m7 ; r3 = c7*i1
+ movq m2, I(2)
+ pmulhw m7, m1 ; r7 = c7*i7
+ paddw m5, m1 ; r5 = c1*i7
+ movq m1, m2 ; r1 = i2
+ pmulhw m2, C(2) ; r2 = c2*i2 - i2
+ psubsw m3, m5 ; r3 = B = c7*i1 - c1*i7
+ movq m5, J(6)
+ paddsw m0, m7 ; r0 = A = c1*i1 + c7*i7
+ movq m7, m5 ; r7 = i6
+ psubsw m0, m4 ; r0 = A - C
+ pmulhw m5, C(2) ; r5 = c2*i6 - i6
+ paddw m2, m1 ; r2 = c2*i2
+ pmulhw m1, C(6) ; r1 = c6*i2
+ paddsw m4, m4 ; r4 = C + C
+ paddsw m4, m0 ; r4 = C. = A + C
+ psubsw m3, m6 ; r3 = B - D
+ paddw m5, m7 ; r5 = c2*i6
+ paddsw m6, m6 ; r6 = D + D
+ pmulhw m7, C(6) ; r7 = c6*i6
+ paddsw m6, m3 ; r6 = D. = B + D
+ movq I(1), m4 ; save C. at I(1)
+ psubsw m1, m5 ; r1 = H = c6*i2 - c2*i6
+ movq m4, C(4)
+ movq m5, m3 ; r5 = B - D
+ pmulhw m3, m4 ; r3 = (c4 - 1) * (B - D)
+ paddsw m7, m2 ; r3 = (c4 - 1) * (B - D)
+ movq I(2), m6 ; save D. at I(2)
+ movq m2, m0 ; r2 = A - C
+ movq m6, I(0)
+ pmulhw m0, m4 ; r0 = (c4 - 1) * (A - C)
+ paddw m5, m3 ; r5 = B. = c4 * (B - D)
+ movq m3, J(4)
+ psubsw m5, m1 ; r5 = B.. = B. - H
+ paddw m2, m0 ; r0 = A. = c4 * (A - C)
+ psubsw m6, m3 ; r6 = i0 - i4
+ movq m0, m6
+ pmulhw m6, m4 ; r6 = (c4 - 1) * (i0 - i4)
+ paddsw m3, m3 ; r3 = i4 + i4
+ paddsw m1, m1 ; r1 = H + H
+ paddsw m3, m0 ; r3 = i0 + i4
+ paddsw m1, m5 ; r1 = H. = B + H
+ pmulhw m4, m3 ; r4 = (c4 - 1) * (i0 + i4)
+ paddsw m6, m0 ; r6 = F = c4 * (i0 - i4)
+ psubsw m6, m2 ; r6 = F. = F - A.
+ paddsw m2, m2 ; r2 = A. + A.
+ movq m0, I(1) ; r0 = C.
+ paddsw m2, m6 ; r2 = A.. = F + A.
+ paddw m4, m3 ; r4 = E = c4 * (i0 + i4)
+ psubsw m2, m1 ; r2 = R2 = A.. - H.
+%endmacro
+
+; RowIDCT gets ready to transpose
+%macro RowIDCT 0
+ BeginIDCT
+ movq m3, I(2) ; r3 = D.
+ psubsw m4, m7 ; r4 = E. = E - G
+ paddsw m1, m1 ; r1 = H. + H.
+ paddsw m7, m7 ; r7 = G + G
+ paddsw m1, m2 ; r1 = R1 = A.. + H.
+ paddsw m7, m4 ; r1 = R1 = A.. + H.
+ psubsw m4, m3 ; r4 = R4 = E. - D.
+ paddsw m3, m3
+ psubsw m6, m5 ; r6 = R6 = F. - B..
+ paddsw m5, m5
+ paddsw m3, m4 ; r3 = R3 = E. + D.
+ paddsw m5, m6 ; r5 = R5 = F. + B..
+ psubsw m7, m0 ; r7 = R7 = G. - C.
+ paddsw m0, m0
+ movq I(1), m1 ; save R1
+ paddsw m0, m7 ; r0 = R0 = G. + C.
+%endmacro
+
+; Column IDCT normalizes and stores final results
+%macro ColumnIDCT 0
+ BeginIDCT
+ paddsw m2, OC_8 ; adjust R2 (and R1) for shift
+ paddsw m1, m1 ; r1 = H. + H.
+ paddsw m1, m2 ; r1 = R1 = A.. + H.
+ psraw m2, 4 ; r2 = NR2
+ psubsw m4, m7 ; r4 = E. = E - G
+ psraw m1, 4 ; r1 = NR2
+ movq m3, I(2) ; r3 = D.
+ paddsw m7, m7 ; r7 = G + G
+ movq I(2), m2 ; store NR2 at I2
+ paddsw m7, m4 ; r7 = G. = E + G
+ movq I(1), m1 ; store NR1 at I1
+ psubsw m4, m3 ; r4 = R4 = E. - D.
+ paddsw m4, OC_8 ; adjust R4 (and R3) for shift
+ paddsw m3, m3 ; r3 = D. + D.
+ paddsw m3, m4 ; r3 = R3 = E. + D.
+ psraw m4, 4 ; r4 = NR4
+ psubsw m6, m5 ; r6 = R6 = F. - B..
+ psraw m3, 4 ; r3 = NR3
+ paddsw m6, OC_8 ; adjust R6 (and R5) for shift
+ paddsw m5, m5 ; r5 = B.. + B..
+ paddsw m5, m6 ; r5 = R5 = F. + B..
+ psraw m6, 4 ; r6 = NR6
+ movq J(4), m4 ; store NR4 at J4
+ psraw m5, 4 ; r5 = NR5
+ movq I(3), m3 ; store NR3 at I3
+ psubsw m7, m0 ; r7 = R7 = G. - C.
+ paddsw m7, OC_8 ; adjust R7 (and R0) for shift
+ paddsw m0, m0 ; r0 = C. + C.
+ paddsw m0, m7 ; r0 = R0 = G. + C.
+ psraw m7, 4 ; r7 = NR7
+ movq J(6), m6 ; store NR6 at J6
+ psraw m0, 4 ; r0 = NR0
+ movq J(5), m5 ; store NR5 at J5
+ movq J(7), m7 ; store NR7 at J7
+ movq I(0), m0 ; store NR0 at I0
+%endmacro
+
+; Following macro does two 4x4 transposes in place.
+;
+; At entry (we assume):
+;
+; r0 = a3 a2 a1 a0
+; I(1) = b3 b2 b1 b0
+; r2 = c3 c2 c1 c0
+; r3 = d3 d2 d1 d0
+;
+; r4 = e3 e2 e1 e0
+; r5 = f3 f2 f1 f0
+; r6 = g3 g2 g1 g0
+; r7 = h3 h2 h1 h0
+;
+; At exit, we have:
+;
+; I(0) = d0 c0 b0 a0
+; I(1) = d1 c1 b1 a1
+; I(2) = d2 c2 b2 a2
+; I(3) = d3 c3 b3 a3
+;
+; J(4) = h0 g0 f0 e0
+; J(5) = h1 g1 f1 e1
+; J(6) = h2 g2 f2 e2
+; J(7) = h3 g3 f3 e3
+;
+; I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
+; J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
+;
+; Since r1 is free at entry, we calculate the Js first.
+%macro Transpose 0
+ movq m1, m4 ; r1 = e3 e2 e1 e0
+ punpcklwd m4, m5 ; r4 = f1 e1 f0 e0
+ movq I(0), m0 ; save a3 a2 a1 a0
+ punpckhwd m1, m5 ; r1 = f3 e3 f2 e2
+ movq m0, m6 ; r0 = g3 g2 g1 g0
+ punpcklwd m6, m7 ; r6 = h1 g1 h0 g0
+ movq m5, m4 ; r5 = f1 e1 f0 e0
+ punpckldq m4, m6 ; r4 = h0 g0 f0 e0 = R4
+ punpckhdq m5, m6 ; r5 = h1 g1 f1 e1 = R5
+ movq m6, m1 ; r6 = f3 e3 f2 e2
+ movq J(4), m4
+ punpckhwd m0, m7 ; r0 = h3 g3 h2 g2
+ movq J(5), m5
+ punpckhdq m6, m0 ; r6 = h3 g3 f3 e3 = R7
+ movq m4, I(0) ; r4 = a3 a2 a1 a0
+ punpckldq m1, m0 ; r1 = h2 g2 f2 e2 = R6
+ movq m5, I(1) ; r5 = b3 b2 b1 b0
+ movq m0, m4 ; r0 = a3 a2 a1 a0
+ movq J(7), m6
+ punpcklwd m0, m5 ; r0 = b1 a1 b0 a0
+ movq J(6), m1
+ punpckhwd m4, m5 ; r4 = b3 a3 b2 a2
+ movq m5, m2 ; r5 = c3 c2 c1 c0
+ punpcklwd m2, m3 ; r2 = d1 c1 d0 c0
+ movq m1, m0 ; r1 = b1 a1 b0 a0
+ punpckldq m0, m2 ; r0 = d0 c0 b0 a0 = R0
+ punpckhdq m1, m2 ; r1 = d1 c1 b1 a1 = R1
+ movq m2, m4 ; r2 = b3 a3 b2 a2
+ movq I(0), m0
+ punpckhwd m5, m3 ; r5 = d3 c3 d2 c2
+ movq I(1), m1
+ punpckhdq m4, m5 ; r4 = d3 c3 b3 a3 = R3
+ punpckldq m2, m5 ; r2 = d2 c2 b2 a2 = R2
+ movq I(3), m4
+ movq I(2), m2
+%endmacro
+
+%macro VP3_IDCT_mmx 1
+ ; eax = quantized input
+ ; ebx = dequantizer matrix
+ ; ecx = IDCT constants
+ ; M(I) = ecx + MaskOffset(0) + I * 8
+ ; C(I) = ecx + CosineOffset(32) + (I-1) * 8
+ ; edx = output
+ ; r0..r7 = mm0..mm7
+%define OC_8 [pw_8]
+%define C(x) [vp3_idct_data+16*(x-1)]
+
+ ; at this point, function has completed dequantization + dezigzag +
+ ; partial transposition; now do the idct itself
+%define I(x) [%1+16* x ]
+%define J(x) [%1+16*(x-4)+8]
+ RowIDCT
+ Transpose
+
+%define I(x) [%1+16* x +64]
+%define J(x) [%1+16*(x-4)+72]
+ RowIDCT
+ Transpose
+
+%define I(x) [%1+16*x]
+%define J(x) [%1+16*x]
+ ColumnIDCT
+
+%define I(x) [%1+16*x+8]
+%define J(x) [%1+16*x+8]
+ ColumnIDCT
+%endmacro
+
+%macro VP3_1D_IDCT_SSE2 0
+ movdqa m2, I(3) ; xmm2 = i3
+ movdqa m6, C(3) ; xmm6 = c3
+ movdqa m4, m2 ; xmm4 = i3
+ movdqa m7, I(5) ; xmm7 = i5
+ pmulhw m4, m6 ; xmm4 = c3 * i3 - i3
+ movdqa m1, C(5) ; xmm1 = c5
+ pmulhw m6, m7 ; xmm6 = c3 * i5 - i5
+ movdqa m5, m1 ; xmm5 = c5
+ pmulhw m1, m2 ; xmm1 = c5 * i3 - i3
+ movdqa m3, I(1) ; xmm3 = i1
+ pmulhw m5, m7 ; xmm5 = c5 * i5 - i5
+ movdqa m0, C(1) ; xmm0 = c1
+ paddw m4, m2 ; xmm4 = c3 * i3
+ paddw m6, m7 ; xmm6 = c3 * i5
+ paddw m2, m1 ; xmm2 = c5 * i3
+ movdqa m1, I(7) ; xmm1 = i7
+ paddw m7, m5 ; xmm7 = c5 * i5
+ movdqa m5, m0 ; xmm5 = c1
+ pmulhw m0, m3 ; xmm0 = c1 * i1 - i1
+ paddsw m4, m7 ; xmm4 = c3 * i3 + c5 * i5 = C
+ pmulhw m5, m1 ; xmm5 = c1 * i7 - i7
+ movdqa m7, C(7) ; xmm7 = c7
+ psubsw m6, m2 ; xmm6 = c3 * i5 - c5 * i3 = D
+ paddw m0, m3 ; xmm0 = c1 * i1
+ pmulhw m3, m7 ; xmm3 = c7 * i1
+ movdqa m2, I(2) ; xmm2 = i2
+ pmulhw m7, m1 ; xmm7 = c7 * i7
+ paddw m5, m1 ; xmm5 = c1 * i7
+ movdqa m1, m2 ; xmm1 = i2
+ pmulhw m2, C(2) ; xmm2 = i2 * c2 -i2
+ psubsw m3, m5 ; xmm3 = c7 * i1 - c1 * i7 = B
+ movdqa m5, I(6) ; xmm5 = i6
+ paddsw m0, m7 ; xmm0 = c1 * i1 + c7 * i7 = A
+ movdqa m7, m5 ; xmm7 = i6
+ psubsw m0, m4 ; xmm0 = A - C
+ pmulhw m5, C(2) ; xmm5 = c2 * i6 - i6
+ paddw m2, m1 ; xmm2 = i2 * c2
+ pmulhw m1, C(6) ; xmm1 = c6 * i2
+ paddsw m4, m4 ; xmm4 = C + C
+ paddsw m4, m0 ; xmm4 = A + C = C.
+ psubsw m3, m6 ; xmm3 = B - D
+ paddw m5, m7 ; xmm5 = c2 * i6
+ paddsw m6, m6 ; xmm6 = D + D
+ pmulhw m7, C(6) ; xmm7 = c6 * i6
+ paddsw m6, m3 ; xmm6 = B + D = D.
+ movdqa I(1), m4 ; Save C. at I(1)
+ psubsw m1, m5 ; xmm1 = c6 * i2 - c2 * i6 = H
+ movdqa m4, C(4) ; xmm4 = C4
+ movdqa m5, m3 ; xmm5 = B - D
+ pmulhw m3, m4 ; xmm3 = ( c4 -1 ) * ( B - D )
+ paddsw m7, m2 ; xmm7 = c2 * i2 + c6 * i6 = G
+ movdqa I(2), m6 ; save D. at I(2)
+ movdqa m2, m0 ; xmm2 = A - C
+ movdqa m6, I(0) ; xmm6 = i0
+ pmulhw m0, m4 ; xmm0 = ( c4 - 1 ) * ( A - C ) = A.
+ paddw m5, m3 ; xmm5 = c4 * ( B - D ) = B.
+ movdqa m3, I(4) ; xmm3 = i4
+ psubsw m5, m1 ; xmm5 = B. - H = B..
+ paddw m2, m0 ; xmm2 = c4 * ( A - C) = A.
+ psubsw m6, m3 ; xmm6 = i0 - i4
+ movdqa m0, m6 ; xmm0 = i0 - i4
+ pmulhw m6, m4 ; xmm6 = (c4 - 1) * (i0 - i4) = F
+ paddsw m3, m3 ; xmm3 = i4 + i4
+ paddsw m1, m1 ; xmm1 = H + H
+ paddsw m3, m0 ; xmm3 = i0 + i4
+ paddsw m1, m5 ; xmm1 = B. + H = H.
+ pmulhw m4, m3 ; xmm4 = ( c4 - 1 ) * ( i0 + i4 )
+ paddw m6, m0 ; xmm6 = c4 * ( i0 - i4 )
+ psubsw m6, m2 ; xmm6 = F - A. = F.
+ paddsw m2, m2 ; xmm2 = A. + A.
+ movdqa m0, I(1) ; Load C. from I(1)
+ paddsw m2, m6 ; xmm2 = F + A. = A..
+ paddw m4, m3 ; xmm4 = c4 * ( i0 + i4 ) = 3
+ psubsw m2, m1 ; xmm2 = A.. - H. = R2
+ ADD(m2) ; Adjust R2 and R1 before shifting
+ paddsw m1, m1 ; xmm1 = H. + H.
+ paddsw m1, m2 ; xmm1 = A.. + H. = R1
+ SHIFT(m2) ; xmm2 = op2
+ psubsw m4, m7 ; xmm4 = E - G = E.
+ SHIFT(m1) ; xmm1 = op1
+ movdqa m3, I(2) ; Load D. from I(2)
+ paddsw m7, m7 ; xmm7 = G + G
+ paddsw m7, m4 ; xmm7 = E + G = G.
+ psubsw m4, m3 ; xmm4 = E. - D. = R4
+ ADD(m4) ; Adjust R4 and R3 before shifting
+ paddsw m3, m3 ; xmm3 = D. + D.
+ paddsw m3, m4 ; xmm3 = E. + D. = R3
+ SHIFT(m4) ; xmm4 = op4
+ psubsw m6, m5 ; xmm6 = F. - B..= R6
+ SHIFT(m3) ; xmm3 = op3
+ ADD(m6) ; Adjust R6 and R5 before shifting
+ paddsw m5, m5 ; xmm5 = B.. + B..
+ paddsw m5, m6 ; xmm5 = F. + B.. = R5
+ SHIFT(m6) ; xmm6 = op6
+ SHIFT(m5) ; xmm5 = op5
+ psubsw m7, m0 ; xmm7 = G. - C. = R7
+ ADD(m7) ; Adjust R7 and R0 before shifting
+ paddsw m0, m0 ; xmm0 = C. + C.
+ paddsw m0, m7 ; xmm0 = G. + C.
+ SHIFT(m7) ; xmm7 = op7
+ SHIFT(m0) ; xmm0 = op0
+%endmacro
+
+%macro PUT_BLOCK 8
+ movdqa O(0), m%1
+ movdqa O(1), m%2
+ movdqa O(2), m%3
+ movdqa O(3), m%4
+ movdqa O(4), m%5
+ movdqa O(5), m%6
+ movdqa O(6), m%7
+ movdqa O(7), m%8
+%endmacro
+
+%macro VP3_IDCT_sse2 1
+%define I(x) [%1+16*x]
+%define O(x) [%1+16*x]
+%define C(x) [vp3_idct_data+16*(x-1)]
+%define SHIFT(x)
+%define ADD(x)
+ VP3_1D_IDCT_SSE2
+%ifdef ARCH_X86_64
+ TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+ TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16]
+%endif
+ PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
+
+%define SHIFT(x) psraw x, 4
+%define ADD(x) paddsw x, [pw_8]
+ VP3_1D_IDCT_SSE2
+ PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
+%endmacro
+
+%macro vp3_idct_funcs 3
+cglobal vp3_idct_%1, 1, 1, %2
+ VP3_IDCT_%1 r0
+ RET
+
+cglobal vp3_idct_put_%1, 3, %3, %2
+ VP3_IDCT_%1 r2
+%ifdef ARCH_X86_64
+ mov r3, r2
+ mov r2, r1
+ mov r1, r0
+ mov r0, r3
+%else
+ mov r0m, r2
+ mov r1m, r0
+ mov r2m, r1
+%endif
+%ifdef WIN64
+ call put_signed_pixels_clamped_mmx
+ RET
+%else
+ jmp put_signed_pixels_clamped_mmx
+%endif
+
+cglobal vp3_idct_add_%1, 3, %3, %2
+ VP3_IDCT_%1 r2
+%ifdef ARCH_X86_64
+ mov r3, r2
+ mov r2, r1
+ mov r1, r0
+ mov r0, r3
+%else
+ mov r0m, r2
+ mov r1m, r0
+ mov r2m, r1
+%endif
+%ifdef WIN64
+ call add_pixels_clamped_mmx
+ RET
+%else
+ jmp add_pixels_clamped_mmx
+%endif
+%endmacro
+
+%ifdef ARCH_X86_64
+%define REGS 4
+%else
+%define REGS 3
+%endif
+INIT_MMX
+vp3_idct_funcs mmx, 0, REGS
+INIT_XMM
+vp3_idct_funcs sse2, 9, REGS
+%undef REGS
+
+%macro DC_ADD 0
+ movq m2, [r0 ]
+ movq m3, [r0+r1 ]
+ paddusb m2, m0
+ movq m4, [r0+r1*2]
+ paddusb m3, m0
+ movq m5, [r0+r3 ]
+ paddusb m4, m0
+ paddusb m5, m0
+ psubusb m2, m1
+ psubusb m3, m1
+ movq [r0 ], m2
+ psubusb m4, m1
+ movq [r0+r1 ], m3
+ psubusb m5, m1
+ movq [r0+r1*2], m4
+ movq [r0+r3 ], m5
+%endmacro
+
+INIT_MMX
+cglobal vp3_idct_dc_add_mmx2, 3, 4
+%ifdef ARCH_X86_64
+ movsxd r1, r1d
+%endif
+ lea r3, [r1*3]
+ movsx r2, word [r2]
+ add r2, 15
+ sar r2, 5
+ movd m0, r2d
+ pshufw m0, m0, 0x0
+ pxor m1, m1
+ psubw m1, m0
+ packuswb m0, m0
+ packuswb m1, m1
+ DC_ADD
+ lea r0, [r0+r1*4]
+ DC_ADD
+ RET
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
deleted file mode 100644
index 92985921e..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/*
- * Copyright (C) 2004 the ffmpeg project
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * MMX-optimized functions cribbed from the original VP3 source code.
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp3dsp_mmx.h"
-
-extern const uint16_t ff_vp3_idct_data[];
-
-// this is off by one or two for some cases when filter_limit is greater than 63
-// in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1
-// out: p1 in mm4, p2 in mm3
-#define VP3_LOOP_FILTER(flim) \
- "movq %%mm6, %%mm7 \n\t" \
- "pand "MANGLE(ff_pb_7 )", %%mm6 \n\t" /* p0&7 */ \
- "psrlw $3, %%mm7 \n\t" \
- "pand "MANGLE(ff_pb_1F)", %%mm7 \n\t" /* p0>>3 */ \
- "movq %%mm2, %%mm3 \n\t" /* mm3 = p2 */ \
- "pxor %%mm4, %%mm2 \n\t" \
- "pand "MANGLE(ff_pb_1 )", %%mm2 \n\t" /* (p2^p1)&1 */ \
- "movq %%mm2, %%mm5 \n\t" \
- "paddb %%mm2, %%mm2 \n\t" \
- "paddb %%mm5, %%mm2 \n\t" /* 3*(p2^p1)&1 */ \
- "paddb %%mm6, %%mm2 \n\t" /* extra bits lost in shifts */ \
- "pcmpeqb %%mm0, %%mm0 \n\t" \
- "pxor %%mm0, %%mm1 \n\t" /* 255 - p3 */ \
- "pavgb %%mm2, %%mm1 \n\t" /* (256 - p3 + extrabits) >> 1 */ \
- "pxor %%mm4, %%mm0 \n\t" /* 255 - p1 */ \
- "pavgb %%mm3, %%mm0 \n\t" /* (256 + p2-p1) >> 1 */ \
- "paddb "MANGLE(ff_pb_3 )", %%mm1 \n\t" \
- "pavgb %%mm0, %%mm1 \n\t" /* 128+2+( p2-p1 - p3) >> 2 */ \
- "pavgb %%mm0, %%mm1 \n\t" /* 128+1+(3*(p2-p1) - p3) >> 3 */ \
- "paddusb %%mm1, %%mm7 \n\t" /* d+128+1 */ \
- "movq "MANGLE(ff_pb_81)", %%mm6 \n\t" \
- "psubusb %%mm7, %%mm6 \n\t" \
- "psubusb "MANGLE(ff_pb_81)", %%mm7 \n\t" \
-\
- "movq "#flim", %%mm5 \n\t" \
- "pminub %%mm5, %%mm6 \n\t" \
- "pminub %%mm5, %%mm7 \n\t" \
- "movq %%mm6, %%mm0 \n\t" \
- "movq %%mm7, %%mm1 \n\t" \
- "paddb %%mm6, %%mm6 \n\t" \
- "paddb %%mm7, %%mm7 \n\t" \
- "pminub %%mm5, %%mm6 \n\t" \
- "pminub %%mm5, %%mm7 \n\t" \
- "psubb %%mm0, %%mm6 \n\t" \
- "psubb %%mm1, %%mm7 \n\t" \
- "paddusb %%mm7, %%mm4 \n\t" \
- "psubusb %%mm6, %%mm4 \n\t" \
- "psubusb %%mm7, %%mm3 \n\t" \
- "paddusb %%mm6, %%mm3 \n\t"
-
-#define STORE_4_WORDS(dst0, dst1, dst2, dst3, mm) \
- "movd "#mm", %0 \n\t" \
- "movw %w0, -1"#dst0" \n\t" \
- "psrlq $32, "#mm" \n\t" \
- "shr $16, %0 \n\t" \
- "movw %w0, -1"#dst1" \n\t" \
- "movd "#mm", %0 \n\t" \
- "movw %w0, -1"#dst2" \n\t" \
- "shr $16, %0 \n\t" \
- "movw %w0, -1"#dst3" \n\t"
-
-void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values)
-{
- __asm__ volatile(
- "movq %0, %%mm6 \n\t"
- "movq %1, %%mm4 \n\t"
- "movq %2, %%mm2 \n\t"
- "movq %3, %%mm1 \n\t"
-
- VP3_LOOP_FILTER(%4)
-
- "movq %%mm4, %1 \n\t"
- "movq %%mm3, %2 \n\t"
-
- : "+m" (*(uint64_t*)(src - 2*stride)),
- "+m" (*(uint64_t*)(src - 1*stride)),
- "+m" (*(uint64_t*)(src + 0*stride)),
- "+m" (*(uint64_t*)(src + 1*stride))
- : "m"(*(uint64_t*)(bounding_values+129))
- );
-}
-
-void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values)
-{
- x86_reg tmp;
-
- __asm__ volatile(
- "movd -2(%1), %%mm6 \n\t"
- "movd -2(%1,%3), %%mm0 \n\t"
- "movd -2(%1,%3,2), %%mm1 \n\t"
- "movd -2(%1,%4), %%mm4 \n\t"
-
- TRANSPOSE8x4(%%mm6, %%mm0, %%mm1, %%mm4, -2(%2), -2(%2,%3), -2(%2,%3,2), -2(%2,%4), %%mm2)
- VP3_LOOP_FILTER(%5)
- SBUTTERFLY(%%mm4, %%mm3, %%mm5, bw, q)
-
- STORE_4_WORDS((%1), (%1,%3), (%1,%3,2), (%1,%4), %%mm4)
- STORE_4_WORDS((%2), (%2,%3), (%2,%3,2), (%2,%4), %%mm5)
-
- : "=&r"(tmp)
- : "r"(src), "r"(src+4*stride), "r"((x86_reg)stride), "r"((x86_reg)3*stride),
- "m"(*(uint64_t*)(bounding_values+129))
- : "memory"
- );
-}
-
-/* from original comments: The Macro does IDct on 4 1-D Dcts */
-#define BeginIDCT() \
- "movq "I(3)", %%mm2 \n\t" \
- "movq "C(3)", %%mm6 \n\t" \
- "movq %%mm2, %%mm4 \n\t" \
- "movq "J(5)", %%mm7 \n\t" \
- "pmulhw %%mm6, %%mm4 \n\t" /* r4 = c3*i3 - i3 */ \
- "movq "C(5)", %%mm1 \n\t" \
- "pmulhw %%mm7, %%mm6 \n\t" /* r6 = c3*i5 - i5 */ \
- "movq %%mm1, %%mm5 \n\t" \
- "pmulhw %%mm2, %%mm1 \n\t" /* r1 = c5*i3 - i3 */ \
- "movq "I(1)", %%mm3 \n\t" \
- "pmulhw %%mm7, %%mm5 \n\t" /* r5 = c5*i5 - i5 */ \
- "movq "C(1)", %%mm0 \n\t" \
- "paddw %%mm2, %%mm4 \n\t" /* r4 = c3*i3 */ \
- "paddw %%mm7, %%mm6 \n\t" /* r6 = c3*i5 */ \
- "paddw %%mm1, %%mm2 \n\t" /* r2 = c5*i3 */ \
- "movq "J(7)", %%mm1 \n\t" \
- "paddw %%mm5, %%mm7 \n\t" /* r7 = c5*i5 */ \
- "movq %%mm0, %%mm5 \n\t" /* r5 = c1 */ \
- "pmulhw %%mm3, %%mm0 \n\t" /* r0 = c1*i1 - i1 */ \
- "paddsw %%mm7, %%mm4 \n\t" /* r4 = C = c3*i3 + c5*i5 */ \
- "pmulhw %%mm1, %%mm5 \n\t" /* r5 = c1*i7 - i7 */ \
- "movq "C(7)", %%mm7 \n\t" \
- "psubsw %%mm2, %%mm6 \n\t" /* r6 = D = c3*i5 - c5*i3 */ \
- "paddw %%mm3, %%mm0 \n\t" /* r0 = c1*i1 */ \
- "pmulhw %%mm7, %%mm3 \n\t" /* r3 = c7*i1 */ \
- "movq "I(2)", %%mm2 \n\t" \
- "pmulhw %%mm1, %%mm7 \n\t" /* r7 = c7*i7 */ \
- "paddw %%mm1, %%mm5 \n\t" /* r5 = c1*i7 */ \
- "movq %%mm2, %%mm1 \n\t" /* r1 = i2 */ \
- "pmulhw "C(2)", %%mm2 \n\t" /* r2 = c2*i2 - i2 */ \
- "psubsw %%mm5, %%mm3 \n\t" /* r3 = B = c7*i1 - c1*i7 */ \
- "movq "J(6)", %%mm5 \n\t" \
- "paddsw %%mm7, %%mm0 \n\t" /* r0 = A = c1*i1 + c7*i7 */ \
- "movq %%mm5, %%mm7 \n\t" /* r7 = i6 */ \
- "psubsw %%mm4, %%mm0 \n\t" /* r0 = A - C */ \
- "pmulhw "C(2)", %%mm5 \n\t" /* r5 = c2*i6 - i6 */ \
- "paddw %%mm1, %%mm2 \n\t" /* r2 = c2*i2 */ \
- "pmulhw "C(6)", %%mm1 \n\t" /* r1 = c6*i2 */ \
- "paddsw %%mm4, %%mm4 \n\t" /* r4 = C + C */ \
- "paddsw %%mm0, %%mm4 \n\t" /* r4 = C. = A + C */ \
- "psubsw %%mm6, %%mm3 \n\t" /* r3 = B - D */ \
- "paddw %%mm7, %%mm5 \n\t" /* r5 = c2*i6 */ \
- "paddsw %%mm6, %%mm6 \n\t" /* r6 = D + D */ \
- "pmulhw "C(6)", %%mm7 \n\t" /* r7 = c6*i6 */ \
- "paddsw %%mm3, %%mm6 \n\t" /* r6 = D. = B + D */ \
- "movq %%mm4, "I(1)"\n\t" /* save C. at I(1) */ \
- "psubsw %%mm5, %%mm1 \n\t" /* r1 = H = c6*i2 - c2*i6 */ \
- "movq "C(4)", %%mm4 \n\t" \
- "movq %%mm3, %%mm5 \n\t" /* r5 = B - D */ \
- "pmulhw %%mm4, %%mm3 \n\t" /* r3 = (c4 - 1) * (B - D) */ \
- "paddsw %%mm2, %%mm7 \n\t" /* r3 = (c4 - 1) * (B - D) */ \
- "movq %%mm6, "I(2)"\n\t" /* save D. at I(2) */ \
- "movq %%mm0, %%mm2 \n\t" /* r2 = A - C */ \
- "movq "I(0)", %%mm6 \n\t" \
- "pmulhw %%mm4, %%mm0 \n\t" /* r0 = (c4 - 1) * (A - C) */ \
- "paddw %%mm3, %%mm5 \n\t" /* r5 = B. = c4 * (B - D) */ \
- "movq "J(4)", %%mm3 \n\t" \
- "psubsw %%mm1, %%mm5 \n\t" /* r5 = B.. = B. - H */ \
- "paddw %%mm0, %%mm2 \n\t" /* r0 = A. = c4 * (A - C) */ \
- "psubsw %%mm3, %%mm6 \n\t" /* r6 = i0 - i4 */ \
- "movq %%mm6, %%mm0 \n\t" \
- "pmulhw %%mm4, %%mm6 \n\t" /* r6 = (c4 - 1) * (i0 - i4) */ \
- "paddsw %%mm3, %%mm3 \n\t" /* r3 = i4 + i4 */ \
- "paddsw %%mm1, %%mm1 \n\t" /* r1 = H + H */ \
- "paddsw %%mm0, %%mm3 \n\t" /* r3 = i0 + i4 */ \
- "paddsw %%mm5, %%mm1 \n\t" /* r1 = H. = B + H */ \
- "pmulhw %%mm3, %%mm4 \n\t" /* r4 = (c4 - 1) * (i0 + i4) */ \
- "paddsw %%mm0, %%mm6 \n\t" /* r6 = F = c4 * (i0 - i4) */ \
- "psubsw %%mm2, %%mm6 \n\t" /* r6 = F. = F - A. */ \
- "paddsw %%mm2, %%mm2 \n\t" /* r2 = A. + A. */ \
- "movq "I(1)", %%mm0 \n\t" /* r0 = C. */ \
- "paddsw %%mm6, %%mm2 \n\t" /* r2 = A.. = F + A. */ \
- "paddw %%mm3, %%mm4 \n\t" /* r4 = E = c4 * (i0 + i4) */ \
- "psubsw %%mm1, %%mm2 \n\t" /* r2 = R2 = A.. - H. */
-
-/* RowIDCT gets ready to transpose */
-#define RowIDCT() \
- BeginIDCT() \
- "movq "I(2)", %%mm3 \n\t" /* r3 = D. */ \
- "psubsw %%mm7, %%mm4 \n\t" /* r4 = E. = E - G */ \
- "paddsw %%mm1, %%mm1 \n\t" /* r1 = H. + H. */ \
- "paddsw %%mm7, %%mm7 \n\t" /* r7 = G + G */ \
- "paddsw %%mm2, %%mm1 \n\t" /* r1 = R1 = A.. + H. */ \
- "paddsw %%mm4, %%mm7 \n\t" /* r1 = R1 = A.. + H. */ \
- "psubsw %%mm3, %%mm4 \n\t" /* r4 = R4 = E. - D. */ \
- "paddsw %%mm3, %%mm3 \n\t" \
- "psubsw %%mm5, %%mm6 \n\t" /* r6 = R6 = F. - B.. */ \
- "paddsw %%mm5, %%mm5 \n\t" \
- "paddsw %%mm4, %%mm3 \n\t" /* r3 = R3 = E. + D. */ \
- "paddsw %%mm6, %%mm5 \n\t" /* r5 = R5 = F. + B.. */ \
- "psubsw %%mm0, %%mm7 \n\t" /* r7 = R7 = G. - C. */ \
- "paddsw %%mm0, %%mm0 \n\t" \
- "movq %%mm1, "I(1)"\n\t" /* save R1 */ \
- "paddsw %%mm7, %%mm0 \n\t" /* r0 = R0 = G. + C. */
-
-/* Column IDCT normalizes and stores final results */
-#define ColumnIDCT() \
- BeginIDCT() \
- "paddsw "OC_8", %%mm2 \n\t" /* adjust R2 (and R1) for shift */ \
- "paddsw %%mm1, %%mm1 \n\t" /* r1 = H. + H. */ \
- "paddsw %%mm2, %%mm1 \n\t" /* r1 = R1 = A.. + H. */ \
- "psraw $4, %%mm2 \n\t" /* r2 = NR2 */ \
- "psubsw %%mm7, %%mm4 \n\t" /* r4 = E. = E - G */ \
- "psraw $4, %%mm1 \n\t" /* r1 = NR1 */ \
- "movq "I(2)", %%mm3 \n\t" /* r3 = D. */ \
- "paddsw %%mm7, %%mm7 \n\t" /* r7 = G + G */ \
- "movq %%mm2, "I(2)"\n\t" /* store NR2 at I2 */ \
- "paddsw %%mm4, %%mm7 \n\t" /* r7 = G. = E + G */ \
- "movq %%mm1, "I(1)"\n\t" /* store NR1 at I1 */ \
- "psubsw %%mm3, %%mm4 \n\t" /* r4 = R4 = E. - D. */ \
- "paddsw "OC_8", %%mm4 \n\t" /* adjust R4 (and R3) for shift */ \
- "paddsw %%mm3, %%mm3 \n\t" /* r3 = D. + D. */ \
- "paddsw %%mm4, %%mm3 \n\t" /* r3 = R3 = E. + D. */ \
- "psraw $4, %%mm4 \n\t" /* r4 = NR4 */ \
- "psubsw %%mm5, %%mm6 \n\t" /* r6 = R6 = F. - B.. */ \
- "psraw $4, %%mm3 \n\t" /* r3 = NR3 */ \
- "paddsw "OC_8", %%mm6 \n\t" /* adjust R6 (and R5) for shift */ \
- "paddsw %%mm5, %%mm5 \n\t" /* r5 = B.. + B.. */ \
- "paddsw %%mm6, %%mm5 \n\t" /* r5 = R5 = F. + B.. */ \
- "psraw $4, %%mm6 \n\t" /* r6 = NR6 */ \
- "movq %%mm4, "J(4)"\n\t" /* store NR4 at J4 */ \
- "psraw $4, %%mm5 \n\t" /* r5 = NR5 */ \
- "movq %%mm3, "I(3)"\n\t" /* store NR3 at I3 */ \
- "psubsw %%mm0, %%mm7 \n\t" /* r7 = R7 = G. - C. */ \
- "paddsw "OC_8", %%mm7 \n\t" /* adjust R7 (and R0) for shift */ \
- "paddsw %%mm0, %%mm0 \n\t" /* r0 = C. + C. */ \
- "paddsw %%mm7, %%mm0 \n\t" /* r0 = R0 = G. + C. */ \
- "psraw $4, %%mm7 \n\t" /* r7 = NR7 */ \
- "movq %%mm6, "J(6)"\n\t" /* store NR6 at J6 */ \
- "psraw $4, %%mm0 \n\t" /* r0 = NR0 */ \
- "movq %%mm5, "J(5)"\n\t" /* store NR5 at J5 */ \
- "movq %%mm7, "J(7)"\n\t" /* store NR7 at J7 */ \
- "movq %%mm0, "I(0)"\n\t" /* store NR0 at I0 */
-
-/* Following macro does two 4x4 transposes in place.
-
- At entry (we assume):
-
- r0 = a3 a2 a1 a0
- I(1) = b3 b2 b1 b0
- r2 = c3 c2 c1 c0
- r3 = d3 d2 d1 d0
-
- r4 = e3 e2 e1 e0
- r5 = f3 f2 f1 f0
- r6 = g3 g2 g1 g0
- r7 = h3 h2 h1 h0
-
- At exit, we have:
-
- I(0) = d0 c0 b0 a0
- I(1) = d1 c1 b1 a1
- I(2) = d2 c2 b2 a2
- I(3) = d3 c3 b3 a3
-
- J(4) = h0 g0 f0 e0
- J(5) = h1 g1 f1 e1
- J(6) = h2 g2 f2 e2
- J(7) = h3 g3 f3 e3
-
- I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
- J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7.
-
- Since r1 is free at entry, we calculate the Js first. */
-#define Transpose() \
- "movq %%mm4, %%mm1 \n\t" /* r1 = e3 e2 e1 e0 */ \
- "punpcklwd %%mm5, %%mm4 \n\t" /* r4 = f1 e1 f0 e0 */ \
- "movq %%mm0, "I(0)"\n\t" /* save a3 a2 a1 a0 */ \
- "punpckhwd %%mm5, %%mm1 \n\t" /* r1 = f3 e3 f2 e2 */ \
- "movq %%mm6, %%mm0 \n\t" /* r0 = g3 g2 g1 g0 */ \
- "punpcklwd %%mm7, %%mm6 \n\t" /* r6 = h1 g1 h0 g0 */ \
- "movq %%mm4, %%mm5 \n\t" /* r5 = f1 e1 f0 e0 */ \
- "punpckldq %%mm6, %%mm4 \n\t" /* r4 = h0 g0 f0 e0 = R4 */ \
- "punpckhdq %%mm6, %%mm5 \n\t" /* r5 = h1 g1 f1 e1 = R5 */ \
- "movq %%mm1, %%mm6 \n\t" /* r6 = f3 e3 f2 e2 */ \
- "movq %%mm4, "J(4)"\n\t" \
- "punpckhwd %%mm7, %%mm0 \n\t" /* r0 = h3 g3 h2 g2 */ \
- "movq %%mm5, "J(5)"\n\t" \
- "punpckhdq %%mm0, %%mm6 \n\t" /* r6 = h3 g3 f3 e3 = R7 */ \
- "movq "I(0)", %%mm4 \n\t" /* r4 = a3 a2 a1 a0 */ \
- "punpckldq %%mm0, %%mm1 \n\t" /* r1 = h2 g2 f2 e2 = R6 */ \
- "movq "I(1)", %%mm5 \n\t" /* r5 = b3 b2 b1 b0 */ \
- "movq %%mm4, %%mm0 \n\t" /* r0 = a3 a2 a1 a0 */ \
- "movq %%mm6, "J(7)"\n\t" \
- "punpcklwd %%mm5, %%mm0 \n\t" /* r0 = b1 a1 b0 a0 */ \
- "movq %%mm1, "J(6)"\n\t" \
- "punpckhwd %%mm5, %%mm4 \n\t" /* r4 = b3 a3 b2 a2 */ \
- "movq %%mm2, %%mm5 \n\t" /* r5 = c3 c2 c1 c0 */ \
- "punpcklwd %%mm3, %%mm2 \n\t" /* r2 = d1 c1 d0 c0 */ \
- "movq %%mm0, %%mm1 \n\t" /* r1 = b1 a1 b0 a0 */ \
- "punpckldq %%mm2, %%mm0 \n\t" /* r0 = d0 c0 b0 a0 = R0 */ \
- "punpckhdq %%mm2, %%mm1 \n\t" /* r1 = d1 c1 b1 a1 = R1 */ \
- "movq %%mm4, %%mm2 \n\t" /* r2 = b3 a3 b2 a2 */ \
- "movq %%mm0, "I(0)"\n\t" \
- "punpckhwd %%mm3, %%mm5 \n\t" /* r5 = d3 c3 d2 c2 */ \
- "movq %%mm1, "I(1)"\n\t" \
- "punpckhdq %%mm5, %%mm4 \n\t" /* r4 = d3 c3 b3 a3 = R3 */ \
- "punpckldq %%mm5, %%mm2 \n\t" /* r2 = d2 c2 b2 a2 = R2 */ \
- "movq %%mm4, "I(3)"\n\t" \
- "movq %%mm2, "I(2)"\n\t"
-
-void ff_vp3_idct_mmx(int16_t *output_data)
-{
- /* eax = quantized input
- * ebx = dequantizer matrix
- * ecx = IDCT constants
- * M(I) = ecx + MaskOffset(0) + I * 8
- * C(I) = ecx + CosineOffset(32) + (I-1) * 8
- * edx = output
- * r0..r7 = mm0..mm7
- */
-
-#define C(x) AV_STRINGIFY(16*(x-1))"(%1)"
-#define OC_8 "%2"
-
- /* at this point, function has completed dequantization + dezigzag +
- * partial transposition; now do the idct itself */
-#define I(x) AV_STRINGIFY(16* x )"(%0)"
-#define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)"
-
- __asm__ volatile (
- RowIDCT()
- Transpose()
-
-#undef I
-#undef J
-#define I(x) AV_STRINGIFY(16* x + 64)"(%0)"
-#define J(x) AV_STRINGIFY(16*(x-4) + 72)"(%0)"
-
- RowIDCT()
- Transpose()
-
-#undef I
-#undef J
-#define I(x) AV_STRINGIFY(16*x)"(%0)"
-#define J(x) AV_STRINGIFY(16*x)"(%0)"
-
- ColumnIDCT()
-
-#undef I
-#undef J
-#define I(x) AV_STRINGIFY(16*x + 8)"(%0)"
-#define J(x) AV_STRINGIFY(16*x + 8)"(%0)"
-
- ColumnIDCT()
- :: "r"(output_data), "r"(ff_vp3_idct_data), "m"(ff_pw_8)
- );
-#undef I
-#undef J
-
-}
-
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_vp3_idct_mmx(block);
- put_signed_pixels_clamped_mmx(block, dest, line_size);
-}
-
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_vp3_idct_mmx(block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
-
-void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int linesize, const DCTELEM *block)
-{
- int dc = (block[0] + 15) >> 5;
-
- __asm__ volatile(
- "movd %3, %%mm0 \n\t"
- "pshufw $0, %%mm0, %%mm0 \n\t"
- "pxor %%mm1, %%mm1 \n\t"
- "psubw %%mm0, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
-
-#define DC_ADD \
- "movq (%0), %%mm2 \n\t" \
- "movq (%0,%1), %%mm3 \n\t" \
- "paddusb %%mm0, %%mm2 \n\t" \
- "movq (%0,%1,2), %%mm4 \n\t" \
- "paddusb %%mm0, %%mm3 \n\t" \
- "movq (%0,%2), %%mm5 \n\t" \
- "paddusb %%mm0, %%mm4 \n\t" \
- "paddusb %%mm0, %%mm5 \n\t" \
- "psubusb %%mm1, %%mm2 \n\t" \
- "psubusb %%mm1, %%mm3 \n\t" \
- "movq %%mm2, (%0) \n\t" \
- "psubusb %%mm1, %%mm4 \n\t" \
- "movq %%mm3, (%0,%1) \n\t" \
- "psubusb %%mm1, %%mm5 \n\t" \
- "movq %%mm4, (%0,%1,2) \n\t" \
- "movq %%mm5, (%0,%2) \n\t"
-
- DC_ADD
- "lea (%0,%1,4), %0 \n\t"
- DC_ADD
-
- : "+r"(dest)
- : "r"((x86_reg)linesize), "r"((x86_reg)3*linesize), "r"(dc)
- );
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
deleted file mode 100644
index e0ebf0b0f..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_mmx.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * vp3dsp MMX function declarations
- * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP3DSP_MMX_H
-#define AVCODEC_X86_VP3DSP_MMX_H
-
-#include <stdint.h>
-#include "libavcodec/dsputil.h"
-
-void ff_vp3_idct_mmx(int16_t *data);
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
-
-void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-
-#endif /* AVCODEC_X86_VP3DSP_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
deleted file mode 100644
index b54ffa39e..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (C) 2004 the ffmpeg project
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * SSE2-optimized functions cribbed from the original VP3 source code.
- */
-
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-#include "vp3dsp_sse2.h"
-
-DECLARE_ALIGNED(16, const uint16_t, ff_vp3_idct_data)[7 * 8] =
-{
- 64277,64277,64277,64277,64277,64277,64277,64277,
- 60547,60547,60547,60547,60547,60547,60547,60547,
- 54491,54491,54491,54491,54491,54491,54491,54491,
- 46341,46341,46341,46341,46341,46341,46341,46341,
- 36410,36410,36410,36410,36410,36410,36410,36410,
- 25080,25080,25080,25080,25080,25080,25080,25080,
- 12785,12785,12785,12785,12785,12785,12785,12785
-};
-
-
-#define VP3_1D_IDCT_SSE2(ADD, SHIFT) \
- "movdqa "I(3)", %%xmm2 \n\t" /* xmm2 = i3 */ \
- "movdqa "C(3)", %%xmm6 \n\t" /* xmm6 = c3 */ \
- "movdqa %%xmm2, %%xmm4 \n\t" /* xmm4 = i3 */ \
- "movdqa "I(5)", %%xmm7 \n\t" /* xmm7 = i5 */ \
- "pmulhw %%xmm6, %%xmm4 \n\t" /* xmm4 = c3 * i3 - i3 */ \
- "movdqa "C(5)", %%xmm1 \n\t" /* xmm1 = c5 */ \
- "pmulhw %%xmm7, %%xmm6 \n\t" /* xmm6 = c3 * i5 - i5 */ \
- "movdqa %%xmm1, %%xmm5 \n\t" /* xmm5 = c5 */ \
- "pmulhw %%xmm2, %%xmm1 \n\t" /* xmm1 = c5 * i3 - i3 */ \
- "movdqa "I(1)", %%xmm3 \n\t" /* xmm3 = i1 */ \
- "pmulhw %%xmm7, %%xmm5 \n\t" /* xmm5 = c5 * i5 - i5 */ \
- "movdqa "C(1)", %%xmm0 \n\t" /* xmm0 = c1 */ \
- "paddw %%xmm2, %%xmm4 \n\t" /* xmm4 = c3 * i3 */ \
- "paddw %%xmm7, %%xmm6 \n\t" /* xmm6 = c3 * i5 */ \
- "paddw %%xmm1, %%xmm2 \n\t" /* xmm2 = c5 * i3 */ \
- "movdqa "I(7)", %%xmm1 \n\t" /* xmm1 = i7 */ \
- "paddw %%xmm5, %%xmm7 \n\t" /* xmm7 = c5 * i5 */ \
- "movdqa %%xmm0, %%xmm5 \n\t" /* xmm5 = c1 */ \
- "pmulhw %%xmm3, %%xmm0 \n\t" /* xmm0 = c1 * i1 - i1 */ \
- "paddsw %%xmm7, %%xmm4 \n\t" /* xmm4 = c3 * i3 + c5 * i5 = C */ \
- "pmulhw %%xmm1, %%xmm5 \n\t" /* xmm5 = c1 * i7 - i7 */ \
- "movdqa "C(7)", %%xmm7 \n\t" /* xmm7 = c7 */ \
- "psubsw %%xmm2, %%xmm6 \n\t" /* xmm6 = c3 * i5 - c5 * i3 = D */ \
- "paddw %%xmm3, %%xmm0 \n\t" /* xmm0 = c1 * i1 */ \
- "pmulhw %%xmm7, %%xmm3 \n\t" /* xmm3 = c7 * i1 */ \
- "movdqa "I(2)", %%xmm2 \n\t" /* xmm2 = i2 */ \
- "pmulhw %%xmm1, %%xmm7 \n\t" /* xmm7 = c7 * i7 */ \
- "paddw %%xmm1, %%xmm5 \n\t" /* xmm5 = c1 * i7 */ \
- "movdqa %%xmm2, %%xmm1 \n\t" /* xmm1 = i2 */ \
- "pmulhw "C(2)", %%xmm2 \n\t" /* xmm2 = i2 * c2 -i2 */ \
- "psubsw %%xmm5, %%xmm3 \n\t" /* xmm3 = c7 * i1 - c1 * i7 = B */ \
- "movdqa "I(6)", %%xmm5 \n\t" /* xmm5 = i6 */ \
- "paddsw %%xmm7, %%xmm0 \n\t" /* xmm0 = c1 * i1 + c7 * i7 = A */ \
- "movdqa %%xmm5, %%xmm7 \n\t" /* xmm7 = i6 */ \
- "psubsw %%xmm4, %%xmm0 \n\t" /* xmm0 = A - C */ \
- "pmulhw "C(2)", %%xmm5 \n\t" /* xmm5 = c2 * i6 - i6 */ \
- "paddw %%xmm1, %%xmm2 \n\t" /* xmm2 = i2 * c2 */ \
- "pmulhw "C(6)", %%xmm1 \n\t" /* xmm1 = c6 * i2 */ \
- "paddsw %%xmm4, %%xmm4 \n\t" /* xmm4 = C + C */ \
- "paddsw %%xmm0, %%xmm4 \n\t" /* xmm4 = A + C = C. */ \
- "psubsw %%xmm6, %%xmm3 \n\t" /* xmm3 = B - D */ \
- "paddw %%xmm7, %%xmm5 \n\t" /* xmm5 = c2 * i6 */ \
- "paddsw %%xmm6, %%xmm6 \n\t" /* xmm6 = D + D */ \
- "pmulhw "C(6)", %%xmm7 \n\t" /* xmm7 = c6 * i6 */ \
- "paddsw %%xmm3, %%xmm6 \n\t" /* xmm6 = B + D = D. */ \
- "movdqa %%xmm4, "I(1)" \n\t" /* Save C. at I(1) */ \
- "psubsw %%xmm5, %%xmm1 \n\t" /* xmm1 = c6 * i2 - c2 * i6 = H */ \
- "movdqa "C(4)", %%xmm4 \n\t" /* xmm4 = c4 */ \
- "movdqa %%xmm3, %%xmm5 \n\t" /* xmm5 = B - D */ \
- "pmulhw %%xmm4, %%xmm3 \n\t" /* xmm3 = ( c4 -1 ) * ( B - D ) */ \
- "paddsw %%xmm2, %%xmm7 \n\t" /* xmm7 = c2 * i2 + c6 * i6 = G */ \
- "movdqa %%xmm6, "I(2)" \n\t" /* Save D. at I(2) */ \
- "movdqa %%xmm0, %%xmm2 \n\t" /* xmm2 = A - C */ \
- "movdqa "I(0)", %%xmm6 \n\t" /* xmm6 = i0 */ \
- "pmulhw %%xmm4, %%xmm0 \n\t" /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \
- "paddw %%xmm3, %%xmm5 \n\t" /* xmm5 = c4 * ( B - D ) = B. */ \
- "movdqa "I(4)", %%xmm3 \n\t" /* xmm3 = i4 */ \
- "psubsw %%xmm1, %%xmm5 \n\t" /* xmm5 = B. - H = B.. */ \
- "paddw %%xmm0, %%xmm2 \n\t" /* xmm2 = c4 * ( A - C) = A. */ \
- "psubsw %%xmm3, %%xmm6 \n\t" /* xmm6 = i0 - i4 */ \
- "movdqa %%xmm6, %%xmm0 \n\t" /* xmm0 = i0 - i4 */ \
- "pmulhw %%xmm4, %%xmm6 \n\t" /* xmm6 = (c4 - 1) * (i0 - i4) = F */ \
- "paddsw %%xmm3, %%xmm3 \n\t" /* xmm3 = i4 + i4 */ \
- "paddsw %%xmm1, %%xmm1 \n\t" /* xmm1 = H + H */ \
- "paddsw %%xmm0, %%xmm3 \n\t" /* xmm3 = i0 + i4 */ \
- "paddsw %%xmm5, %%xmm1 \n\t" /* xmm1 = B. + H = H. */ \
- "pmulhw %%xmm3, %%xmm4 \n\t" /* xmm4 = ( c4 - 1 ) * ( i0 + i4 ) */ \
- "paddw %%xmm0, %%xmm6 \n\t" /* xmm6 = c4 * ( i0 - i4 ) */ \
- "psubsw %%xmm2, %%xmm6 \n\t" /* xmm6 = F - A. = F. */ \
- "paddsw %%xmm2, %%xmm2 \n\t" /* xmm2 = A. + A. */ \
- "movdqa "I(1)", %%xmm0 \n\t" /* Load C. from I(1) */ \
- "paddsw %%xmm6, %%xmm2 \n\t" /* xmm2 = F + A. = A.. */ \
- "paddw %%xmm3, %%xmm4 \n\t" /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \
- "psubsw %%xmm1, %%xmm2 \n\t" /* xmm2 = A.. - H. = R2 */ \
- ADD(%%xmm2) /* Adjust R2 and R1 before shifting */ \
- "paddsw %%xmm1, %%xmm1 \n\t" /* xmm1 = H. + H. */ \
- "paddsw %%xmm2, %%xmm1 \n\t" /* xmm1 = A.. + H. = R1 */ \
- SHIFT(%%xmm2) /* xmm2 = op2 */ \
- "psubsw %%xmm7, %%xmm4 \n\t" /* xmm4 = E - G = E. */ \
- SHIFT(%%xmm1) /* xmm1 = op1 */ \
- "movdqa "I(2)", %%xmm3 \n\t" /* Load D. from I(2) */ \
- "paddsw %%xmm7, %%xmm7 \n\t" /* xmm7 = G + G */ \
- "paddsw %%xmm4, %%xmm7 \n\t" /* xmm7 = E + G = G. */ \
- "psubsw %%xmm3, %%xmm4 \n\t" /* xmm4 = E. - D. = R4 */ \
- ADD(%%xmm4) /* Adjust R4 and R3 before shifting */ \
- "paddsw %%xmm3, %%xmm3 \n\t" /* xmm3 = D. + D. */ \
- "paddsw %%xmm4, %%xmm3 \n\t" /* xmm3 = E. + D. = R3 */ \
- SHIFT(%%xmm4) /* xmm4 = op4 */ \
- "psubsw %%xmm5, %%xmm6 \n\t" /* xmm6 = F. - B..= R6 */ \
- SHIFT(%%xmm3) /* xmm3 = op3 */ \
- ADD(%%xmm6) /* Adjust R6 and R5 before shifting */ \
- "paddsw %%xmm5, %%xmm5 \n\t" /* xmm5 = B.. + B.. */ \
- "paddsw %%xmm6, %%xmm5 \n\t" /* xmm5 = F. + B.. = R5 */ \
- SHIFT(%%xmm6) /* xmm6 = op6 */ \
- SHIFT(%%xmm5) /* xmm5 = op5 */ \
- "psubsw %%xmm0, %%xmm7 \n\t" /* xmm7 = G. - C. = R7 */ \
- ADD(%%xmm7) /* Adjust R7 and R0 before shifting */ \
- "paddsw %%xmm0, %%xmm0 \n\t" /* xmm0 = C. + C. */ \
- "paddsw %%xmm7, %%xmm0 \n\t" /* xmm0 = G. + C. */ \
- SHIFT(%%xmm7) /* xmm7 = op7 */ \
- SHIFT(%%xmm0) /* xmm0 = op0 */
-
-#define PUT_BLOCK(r0, r1, r2, r3, r4, r5, r6, r7) \
- "movdqa " #r0 ", " O(0) "\n\t" \
- "movdqa " #r1 ", " O(1) "\n\t" \
- "movdqa " #r2 ", " O(2) "\n\t" \
- "movdqa " #r3 ", " O(3) "\n\t" \
- "movdqa " #r4 ", " O(4) "\n\t" \
- "movdqa " #r5 ", " O(5) "\n\t" \
- "movdqa " #r6 ", " O(6) "\n\t" \
- "movdqa " #r7 ", " O(7) "\n\t"
-
-#define NOP(xmm)
-#define SHIFT4(xmm) "psraw $4, "#xmm"\n\t"
-#define ADD8(xmm) "paddsw %2, "#xmm"\n\t"
-
-void ff_vp3_idct_sse2(int16_t *input_data)
-{
-#define I(x) AV_STRINGIFY(16*x)"(%0)"
-#define O(x) I(x)
-#define C(x) AV_STRINGIFY(16*(x-1))"(%1)"
-
- __asm__ volatile (
- VP3_1D_IDCT_SSE2(NOP, NOP)
-
- TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0))
- PUT_BLOCK(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)
-
- VP3_1D_IDCT_SSE2(ADD8, SHIFT4)
- PUT_BLOCK(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
- :: "r"(input_data), "r"(ff_vp3_idct_data), "m"(ff_pw_8)
- );
-}
-
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_vp3_idct_sse2(block);
- put_signed_pixels_clamped_mmx(block, dest, line_size);
-}
-
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_vp3_idct_sse2(block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.h
deleted file mode 100644
index 9094620eb..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp3dsp_sse2.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * vp3dsp SSE2 function declarations
- * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP3DSP_SSE2_H
-#define AVCODEC_X86_VP3DSP_SSE2_H
-
-#include "libavcodec/dsputil.h"
-
-void ff_vp3_idct_sse2(int16_t *input_data);
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-
-#endif /* AVCODEC_X86_VP3DSP_SSE2_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
index 1b3165e54..0543ba00c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp.asm
@@ -48,8 +48,8 @@ SECTION .text
movq m5, m2
punpcklbw m1, m7
punpcklbw m2, m7
- punpcklbw m4, m7
- punpcklbw m5, m7
+ punpckhbw m4, m7
+ punpckhbw m5, m7
pmullw m1, [rsp+8*13] ; src[x+8 ] * biweight [2]
pmullw m2, [rsp+8*14] ; src[x+16] * biweight [3]
pmullw m4, [rsp+8*13] ; src[x+8 ] * biweight [2]
@@ -95,13 +95,13 @@ SECTION .text
punpckldq m3, m3
punpckhdq m4, m4
punpckhwd m5, m5
- movq m6, m5
- punpckhdq m6, m6
+ movq m2, m5
+ punpckhdq m2, m2
punpckldq m5, m5
movq [rsp+8*11], m3
movq [rsp+8*12], m4
movq [rsp+8*13], m5
- movq [rsp+8*14], m6
+ movq [rsp+8*14], m2
%endmacro
%macro SPLAT4REGS_SSE2 0
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
index 5120ed231..87fc93531 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56dsp_init.c
@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/vp56dsp.h"
@@ -32,14 +33,14 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec)
{
#if HAVE_YASM
- int mm_flags = mm_support();
+ int mm_flags = av_get_cpu_flags();
if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) {
- if (mm_flags & FF_MM_MMX) {
+ if (mm_flags & AV_CPU_FLAG_MMX) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
}
- if (mm_flags & FF_MM_SSE2) {
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
}
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
index ed5cf4602..201b34e24 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/vp8dsp.h"
@@ -282,10 +283,10 @@ DECLARE_LOOP_FILTER(sse4)
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
{
- int mm_flags = mm_support();
+ int mm_flags = av_get_cpu_flags();
#if HAVE_YASM
- if (mm_flags & FF_MM_MMX) {
+ if (mm_flags & AV_CPU_FLAG_MMX) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx;
c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
@@ -312,7 +313,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
- if (mm_flags & FF_MM_MMX2) {
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
VP8_LUMA_MC_FUNC(0, 16, mmxext);
VP8_MC_FUNC(1, 8, mmxext);
VP8_MC_FUNC(2, 4, mmxext);
@@ -334,14 +335,14 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
}
- if (mm_flags & FF_MM_SSE) {
+ if (mm_flags & AV_CPU_FLAG_SSE) {
c->vp8_idct_add = ff_vp8_idct_add_sse;
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
c->put_vp8_epel_pixels_tab[0][0][0] =
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
}
- if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) {
+ if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
VP8_LUMA_MC_FUNC(0, 16, sse2);
VP8_MC_FUNC(1, 8, sse2);
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
@@ -356,7 +357,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
}
- if (mm_flags & FF_MM_SSE2) {
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
@@ -368,7 +369,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
}
- if (mm_flags & FF_MM_SSSE3) {
+ if (mm_flags & AV_CPU_FLAG_SSSE3) {
VP8_LUMA_MC_FUNC(0, 16, ssse3);
VP8_MC_FUNC(1, 8, ssse3);
VP8_MC_FUNC(2, 4, ssse3);
@@ -390,7 +391,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
}
- if (mm_flags & FF_MM_SSE4) {
+ if (mm_flags & AV_CPU_FLAG_SSE4) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
index 8cdbb3c7a..bc5ccc8e3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
@@ -1342,7 +1342,7 @@ VP8_DC_WHT sse
psrldq m%2, 4
%if %10 == 8
movd [%5+%8*2], m%1
- movd %5, m%3
+ movd %5d, m%3
%endif
psrldq m%3, 4
psrldq m%4, 4
@@ -1379,26 +1379,26 @@ VP8_DC_WHT sse
; 4 is a pointer to the destination's 4th line
; 5/6 is -stride and +stride
%macro WRITE_2x4W 6
- movd %3, %1
+ movd %3d, %1
punpckhdq %1, %1
mov [%4+%5*4], %3w
shr %3, 16
add %4, %6
mov [%4+%5*4], %3w
- movd %3, %1
+ movd %3d, %1
add %4, %5
mov [%4+%5*2], %3w
shr %3, 16
mov [%4+%5 ], %3w
- movd %3, %2
+ movd %3d, %2
punpckhdq %2, %2
mov [%4 ], %3w
shr %3, 16
mov [%4+%6 ], %3w
- movd %3, %2
+ movd %3d, %2
add %4, %6
mov [%4+%6 ], %3w
shr %3, 16
@@ -1407,27 +1407,27 @@ VP8_DC_WHT sse
%endmacro
%macro WRITE_8W_SSE2 5
- movd %2, %1
+ movd %2d, %1
psrldq %1, 4
mov [%3+%4*4], %2w
shr %2, 16
add %3, %5
mov [%3+%4*4], %2w
- movd %2, %1
+ movd %2d, %1
psrldq %1, 4
add %3, %4
mov [%3+%4*2], %2w
shr %2, 16
mov [%3+%4 ], %2w
- movd %2, %1
+ movd %2d, %1
psrldq %1, 4
mov [%3 ], %2w
shr %2, 16
mov [%3+%5 ], %2w
- movd %2, %1
+ movd %2d, %1
add %3, %5
mov [%3+%5 ], %2w
shr %2, 16
@@ -1446,27 +1446,27 @@ VP8_DC_WHT sse
%endmacro
%macro SPLATB_REG_MMX 2-3
- movd %1, %2
+ movd %1, %2d
punpcklbw %1, %1
punpcklwd %1, %1
punpckldq %1, %1
%endmacro
%macro SPLATB_REG_MMXEXT 2-3
- movd %1, %2
+ movd %1, %2d
punpcklbw %1, %1
pshufw %1, %1, 0x0
%endmacro
%macro SPLATB_REG_SSE2 2-3
- movd %1, %2
+ movd %1, %2d
punpcklbw %1, %1
pshuflw %1, %1, 0x0
punpcklqdq %1, %1
%endmacro
%macro SPLATB_REG_SSSE3 3
- movd %1, %2
+ movd %1, %2d
pshufb %1, %3
%endmacro
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h
index 5ddb6167a..d73572e27 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h
@@ -24,10 +24,10 @@
* shared media utilities for the libav* libraries
*/
-#include <libavutil/avutil.h>
+#include "libavutil/avutil.h"
#define LIBAVCORE_VERSION_MAJOR 0
-#define LIBAVCORE_VERSION_MINOR 2
+#define LIBAVCORE_VERSION_MINOR 9
#define LIBAVCORE_VERSION_MICRO 0
#define LIBAVCORE_VERSION_INT AV_VERSION_INT(LIBAVCORE_VERSION_MAJOR, \
@@ -55,4 +55,12 @@ const char *avcore_configuration(void);
*/
const char *avcore_license(void);
+/**
+ * Those FF_API_* defines are not part of public API.
+ * They may change, break or disappear at any time.
+ */
+#ifndef FF_API_OLD_IMAGE_NAMES
+#define FF_API_OLD_IMAGE_NAMES (LIBAVCORE_VERSION_MAJOR < 1)
+#endif
+
#endif /* AVCORE_AVCORE_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
index ebaeff16c..0a21f6de2 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
@@ -24,7 +24,25 @@
#include "imgutils.h"
#include "libavutil/pixdesc.h"
-int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane)
+void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc)
+{
+ int i;
+ memset(max_pixsteps, 0, 4*sizeof(max_pixsteps[0]));
+ if (max_pixstep_comps)
+ memset(max_pixstep_comps, 0, 4*sizeof(max_pixstep_comps[0]));
+
+ for (i = 0; i < 4; i++) {
+ const AVComponentDescriptor *comp = &(pixdesc->comp[i]);
+ if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) {
+ max_pixsteps[comp->plane] = comp->step_minus1+1;
+ if (max_pixstep_comps)
+ max_pixstep_comps[comp->plane] = i;
+ }
+ }
+}
+
+int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane)
{
const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
int max_step [4]; /* max pixel step for each plane */
@@ -34,12 +52,12 @@ int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane)
if (desc->flags & PIX_FMT_BITSTREAM)
return (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
- av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
+ av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
s = (max_step_comp[plane] == 1 || max_step_comp[plane] == 2) ? desc->log2_chroma_w : 0;
return max_step[plane] * (((width + (1 << s) - 1)) >> s);
}
-int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
+int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
{
int i;
const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
@@ -48,7 +66,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
memset(linesizes, 0, 4*sizeof(linesizes[0]));
- if (desc->flags & PIX_FMT_HWACCEL)
+ if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
return AVERROR(EINVAL);
if (desc->flags & PIX_FMT_BITSTREAM) {
@@ -56,7 +74,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
return 0;
}
- av_fill_image_max_pixsteps(max_step, max_step_comp, desc);
+ av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
for (i = 0; i < 4; i++) {
int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s);
@@ -65,7 +83,7 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
return 0;
}
-int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+int av_image_fill_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
uint8_t *ptr, const int linesizes[4])
{
int i, total_size, size[4], has_plane[4];
@@ -75,7 +93,7 @@ int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int heigh
memset(size , 0, sizeof(size));
memset(has_plane, 0, sizeof(has_plane));
- if (desc->flags & PIX_FMT_HWACCEL)
+ if ((unsigned)pix_fmt >= PIX_FMT_NB || desc->flags & PIX_FMT_HWACCEL)
return AVERROR(EINVAL);
data[0] = ptr;
@@ -110,7 +128,7 @@ typedef struct ImgUtils {
static const AVClass imgutils_class = { "IMGUTILS", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(ImgUtils, log_offset), offsetof(ImgUtils, log_ctx) };
-int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx)
+int av_image_check_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx)
{
ImgUtils imgutils = { &imgutils_class, log_offset, log_ctx };
@@ -120,3 +138,79 @@ int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *lo
av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
return AVERROR(EINVAL);
}
+
+void av_image_copy_plane(uint8_t *dst, int dst_linesize,
+ const uint8_t *src, int src_linesize,
+ int bytewidth, int height)
+{
+ if (!dst || !src)
+ return;
+ for (;height > 0; height--) {
+ memcpy(dst, src, bytewidth);
+ dst += dst_linesize;
+ src += src_linesize;
+ }
+}
+
+void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
+ const uint8_t *src_data[4], const int src_linesizes[4],
+ enum PixelFormat pix_fmt, int width, int height)
+{
+ const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+
+ if (desc->flags & PIX_FMT_HWACCEL)
+ return;
+
+ if (desc->flags & PIX_FMT_PAL) {
+ av_image_copy_plane(dst_data[0], dst_linesizes[0],
+ src_data[0], src_linesizes[0],
+ width, height);
+ /* copy the palette */
+ memcpy(dst_data[1], src_data[1], 4*256);
+ } else {
+ int i, planes_nb = 0;
+
+ for (i = 0; i < desc->nb_components; i++)
+ planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+
+ for (i = 0; i < planes_nb; i++) {
+ int h = height;
+ int bwidth = av_image_get_linesize(pix_fmt, width, i);
+ if (i == 1 || i == 2) {
+ h= -((-height)>>desc->log2_chroma_h);
+ }
+ av_image_copy_plane(dst_data[i], dst_linesizes[i],
+ src_data[i], src_linesizes[i],
+ bwidth, h);
+ }
+ }
+}
+
+#if FF_API_OLD_IMAGE_NAMES
+void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc)
+{
+ av_image_fill_max_pixsteps(max_pixsteps, max_pixstep_comps, pixdesc);
+}
+
+int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane)
+{
+ return av_image_get_linesize(pix_fmt, width, plane);
+}
+
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
+{
+ return av_image_fill_linesizes(linesizes, pix_fmt, width);
+}
+
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4])
+{
+ return av_image_fill_pointers(data, pix_fmt, height, ptr, linesizes);
+}
+
+int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx)
+{
+ return av_image_check_size(w, h, log_offset, log_ctx);
+}
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
index 8e08d4738..8458fc6bb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
@@ -25,7 +25,6 @@
*/
#include "libavutil/pixdesc.h"
-#include "libavutil/pixfmt.h"
#include "avcore.h"
/**
@@ -44,23 +43,8 @@
* @param max_pixstep_comps an array which is filled with the component
* for each plane which has the max pixel step. May be NULL.
*/
-static inline void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
- const AVPixFmtDescriptor *pixdesc)
-{
- int i;
- memset(max_pixsteps, 0, 4*sizeof(max_pixsteps[0]));
- if (max_pixstep_comps)
- memset(max_pixstep_comps, 0, 4*sizeof(max_pixstep_comps[0]));
-
- for (i = 0; i < 4; i++) {
- const AVComponentDescriptor *comp = &(pixdesc->comp[i]);
- if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) {
- max_pixsteps[comp->plane] = comp->step_minus1+1;
- if (max_pixstep_comps)
- max_pixstep_comps[comp->plane] = i;
- }
- }
-}
+void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc);
/**
* Compute the size of an image line with format pix_fmt and width
@@ -68,7 +52,7 @@ static inline void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixst
*
* @return the computed size in bytes
*/
-int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane);
+int av_image_get_linesize(enum PixelFormat pix_fmt, int width, int plane);
/**
* Fill plane linesizes for an image with pixel format pix_fmt and
@@ -77,7 +61,7 @@ int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane);
* @param linesizes array to be filled with the linesize for each plane
* @return >= 0 in case of success, a negative error code otherwise
*/
-int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width);
+int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width);
/**
* Fill plane data pointers for an image with pixel format pix_fmt and
@@ -86,14 +70,37 @@ int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
* @param data pointers array to be filled with the pointer for each image plane
* @param ptr the pointer to a buffer which will contain the image
* @param linesizes[4] the array containing the linesize for each
- * plane, should be filled by av_fill_image_linesizes()
+ * plane, should be filled by av_image_fill_linesizes()
* @return the size in bytes required for the image buffer, a negative
* error code in case of failure
*/
-int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+int av_image_fill_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
uint8_t *ptr, const int linesizes[4]);
/**
+ * Copy image plane from src to dst.
+ * That is, copy "height" number of lines of "bytewidth" bytes each.
+ * The first byte of each successive line is separated by *_linesize
+ * bytes.
+ *
+ * @param dst_linesize linesize for the image plane in dst
+ * @param src_linesize linesize for the image plane in src
+ */
+void av_image_copy_plane(uint8_t *dst, int dst_linesize,
+ const uint8_t *src, int src_linesize,
+ int bytewidth, int height);
+
+/**
+ * Copy image in src_data to dst_data.
+ *
+ * @param dst_linesize linesizes for the image in dst_data
+ * @param src_linesize linesizes for the image in src_data
+ */
+void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
+ const uint8_t *src_data[4], const int src_linesizes[4],
+ enum PixelFormat pix_fmt, int width, int height);
+
+/**
* Check if the given dimension of an image is valid, meaning that all
* bytes of the image can be addressed with a signed int.
*
@@ -103,6 +110,25 @@ int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int heigh
* @param log_ctx the parent logging context, it may be NULL
* @return >= 0 if valid, a negative error code otherwise
*/
+int av_image_check_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx);
+
+#if FF_API_OLD_IMAGE_NAMES
+attribute_deprecated
+void av_fill_image_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
+ const AVPixFmtDescriptor *pixdesc);
+
+attribute_deprecated
+int av_get_image_linesize(enum PixelFormat pix_fmt, int width, int plane);
+
+attribute_deprecated
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width);
+
+attribute_deprecated
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4]);
+
+attribute_deprecated
int av_check_image_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx);
+#endif
#endif /* AVCORE_IMGUTILS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.c
new file mode 100644
index 000000000..1ce680ac4
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avstring.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2007 Mans Rullgard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "avstring.h"
+#include "mem.h"
+
+#define vsnprintf _vsnprintf
+#define snprintf _snprintf
+
+int av_strstart(const char *str, const char *pfx, const char **ptr)
+{
+ while (*pfx && *pfx == *str) {
+ pfx++;
+ str++;
+ }
+ if (!*pfx && ptr)
+ *ptr = str;
+ return !*pfx;
+}
+
+int av_stristart(const char *str, const char *pfx, const char **ptr)
+{
+ while (*pfx && toupper((unsigned)*pfx) == toupper((unsigned)*str)) {
+ pfx++;
+ str++;
+ }
+ if (!*pfx && ptr)
+ *ptr = str;
+ return !*pfx;
+}
+
+char *av_stristr(const char *s1, const char *s2)
+{
+ if (!*s2)
+ return s1;
+
+ do {
+ if (av_stristart(s1, s2, NULL))
+ return s1;
+ } while (*s1++);
+
+ return NULL;
+}
+
+size_t av_strlcpy(char *dst, const char *src, size_t size)
+{
+ size_t len = 0;
+ while (++len < size && *src)
+ *dst++ = *src++;
+ if (len <= size)
+ *dst = 0;
+ return len + strlen(src) - 1;
+}
+
+size_t av_strlcat(char *dst, const char *src, size_t size)
+{
+ size_t len = strlen(dst);
+ if (size <= len + 1)
+ return len + strlen(src);
+ return len + av_strlcpy(dst + len, src, size - len);
+}
+
+size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...)
+{
+ int len = strlen(dst);
+ va_list vl;
+
+ va_start(vl, fmt);
+ len += vsnprintf(dst + len, size > len ? size - len : 0, fmt, vl);
+ va_end(vl);
+
+ return len;
+}
+
+char *av_d2str(double d)
+{
+ char *str= av_malloc(16);
+ if(str) snprintf(str, 16, "%f", d);
+ return str;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
index 2c6d2e5b6..a2b8f0130 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/avutil.h
@@ -40,7 +40,7 @@
#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
#define LIBAVUTIL_VERSION_MAJOR 50
-#define LIBAVUTIL_VERSION_MINOR 22
+#define LIBAVUTIL_VERSION_MINOR 27
#define LIBAVUTIL_VERSION_MICRO 0
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
index 5d5e0f2c4..670d3b934 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/common.h
@@ -198,6 +198,20 @@ static inline av_const int av_ceil_log2_c(int x)
return av_log2((x - 1) << 1);
}
+/**
+ * Count number of bits set to one in x
+ * @param x value to count bits of
+ * @return the number of bits set to one in x
+ */
+static inline av_const int av_popcount_c(uint32_t x)
+{
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ x += x >> 8;
+ return (x + (x >> 16)) & 0x3F;
+}
+
#define MKTAG(a,b,c,d) ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24))
#define MKBETAG(a,b,c,d) ((d) | ((c) << 8) | ((b) << 16) | ((a) << 24))
@@ -360,5 +374,8 @@ static inline av_const int av_ceil_log2_c(int x)
#ifndef av_clipf
# define av_clipf av_clipf_c
#endif
+#ifndef av_popcount
+# define av_popcount av_popcount_c
+#endif
#endif /* HAVE_AV_CONFIG_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.c
new file mode 100644
index 000000000..1548530b8
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.c
@@ -0,0 +1,68 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "cpu.h"
+#include "config.h"
+
+int av_get_cpu_flags(void)
+{
+ static int flags, checked;
+
+ if (checked)
+ return flags;
+
+ if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
+ if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
+ if (ARCH_X86) flags = ff_get_cpu_flags_x86();
+
+ checked = 1;
+ return flags;
+}
+
+#ifdef TEST
+
+#undef printf
+
+int main(void)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ printf("cpu_flags = 0x%08X\n", cpu_flags);
+ printf("cpu_flags = %s%s%s%s%s%s%s%s%s%s%s%s\n",
+#if ARCH_ARM
+ cpu_flags & AV_CPU_FLAG_IWMMXT ? "IWMMXT " : "",
+#elif ARCH_PPC
+ cpu_flags & AV_CPU_FLAG_ALTIVEC ? "ALTIVEC " : "",
+#elif ARCH_X86
+ cpu_flags & AV_CPU_FLAG_MMX ? "MMX " : "",
+ cpu_flags & AV_CPU_FLAG_MMX2 ? "MMX2 " : "",
+ cpu_flags & AV_CPU_FLAG_SSE ? "SSE " : "",
+ cpu_flags & AV_CPU_FLAG_SSE2 ? "SSE2 " : "",
+ cpu_flags & AV_CPU_FLAG_SSE2SLOW ? "SSE2(slow) " : "",
+ cpu_flags & AV_CPU_FLAG_SSE3 ? "SSE3 " : "",
+ cpu_flags & AV_CPU_FLAG_SSE3SLOW ? "SSE3(slow) " : "",
+ cpu_flags & AV_CPU_FLAG_SSSE3 ? "SSSE3 " : "",
+ cpu_flags & AV_CPU_FLAG_SSE4 ? "SSE4.1 " : "",
+ cpu_flags & AV_CPU_FLAG_SSE42 ? "SSE4.2 " : "",
+ cpu_flags & AV_CPU_FLAG_3DNOW ? "3DNow " : "",
+ cpu_flags & AV_CPU_FLAG_3DNOWEXT ? "3DNowExt " : "");
+#endif
+ return 0;
+}
+
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.h
new file mode 100644
index 000000000..71cc26529
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/cpu.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_CPU_H
+#define AVUTIL_CPU_H
+
+#define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */
+
+ /* lower 16 bits - CPU features */
+#define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX
+#define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
+#define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
+#define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions
+#define AV_CPU_FLAG_SSE2 0x0010 ///< PIV SSE2 functions
+#define AV_CPU_FLAG_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster
+#define AV_CPU_FLAG_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define AV_CPU_FLAG_SSE3 0x0040 ///< Prescott SSE3 functions
+#define AV_CPU_FLAG_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster
+#define AV_CPU_FLAG_SSSE3 0x0080 ///< Conroe SSSE3 functions
+#define AV_CPU_FLAG_SSE4 0x0100 ///< Penryn SSE4.1 functions
+#define AV_CPU_FLAG_SSE42 0x0200 ///< Nehalem SSE4.2 functions
+#define AV_CPU_FLAG_IWMMXT 0x0100 ///< XScale IWMMXT
+#define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard
+
+/**
+ * Return the flags which specify extensions supported by the CPU.
+ */
+int av_get_cpu_flags(void);
+
+/* The following CPU-specific functions shall not be called directly. */
+int ff_get_cpu_flags_arm(void);
+int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_x86(void);
+
+#endif /* AVUTIL_CPU_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/eval.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/eval.c
new file mode 100644
index 000000000..db6e63a5e
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/eval.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2002-2006 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * simple arithmetic expression evaluator.
+ *
+ * see http://joe.hotchkiss.com/programming/eval/eval.html
+ */
+
+#include "libavutil/avutil.h"
+#include "eval.h"
+
+typedef struct Parser {
+ const AVClass *class;
+ int stack_index;
+ char *s;
+ const double *const_values;
+ const char * const *const_names; // NULL terminated
+ double (* const *funcs1)(void *, double a); // NULL terminated
+ const char * const *func1_names; // NULL terminated
+ double (* const *funcs2)(void *, double a, double b); // NULL terminated
+ const char * const *func2_names; // NULL terminated
+ void *opaque;
+ int log_offset;
+ void *log_ctx;
+#define VARS 10
+ double var[VARS];
+} Parser;
+
+static const AVClass class = { "Eval", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(Parser,log_offset), offsetof(Parser,log_ctx) };
+
+#ifdef __GNUC__
+static const int8_t si_prefixes['z' - 'E' + 1]={
+ ['y'-'E']= -24,
+ ['z'-'E']= -21,
+ ['a'-'E']= -18,
+ ['f'-'E']= -15,
+ ['p'-'E']= -12,
+ ['n'-'E']= - 9,
+ ['u'-'E']= - 6,
+ ['m'-'E']= - 3,
+ ['c'-'E']= - 2,
+ ['d'-'E']= - 1,
+ ['h'-'E']= 2,
+ ['k'-'E']= 3,
+ ['K'-'E']= 3,
+ ['M'-'E']= 6,
+ ['G'-'E']= 9,
+ ['T'-'E']= 12,
+ ['P'-'E']= 15,
+ ['E'-'E']= 18,
+ ['Z'-'E']= 21,
+ ['Y'-'E']= 24,
+};
+#else
+static const int8_t si_prefixes['z' - 'E' + 1];
+#endif
+
+double av_strtod(const char *numstr, char **tail)
+{
+ double d;
+ char *next;
+ d = strtod(numstr, &next);
+ /* if parsing succeeded, check for and interpret postfixes */
+ if (next!=numstr) {
+ if (*next >= 'E' && *next <= 'z') {
+ int e= si_prefixes[*next - 'E'];
+ if (e) {
+ if (next[1] == 'i') {
+ d*= pow( 2, e/0.3);
+ next+=2;
+ } else {
+ d*= pow(10, e);
+ next++;
+ }
+ }
+ }
+
+ if (*next=='B') {
+ d*=8;
+ next++;
+ }
+ }
+ /* if requested, fill in tail with the position after the last parsed
+ character */
+ if (tail)
+ *tail = next;
+ return d;
+}
+
+static int strmatch(const char *s, const char *prefix)
+{
+ int i;
+ for (i=0; prefix[i]; i++) {
+ if (prefix[i] != s[i]) return 0;
+ }
+ return 1;
+}
+
+struct AVExpr {
+ enum {
+ e_value, e_const, e_func0, e_func1, e_func2,
+ e_squish, e_gauss, e_ld,
+ e_mod, e_max, e_min, e_eq, e_gt, e_gte,
+ e_pow, e_mul, e_div, e_add,
+ e_last, e_st, e_while,
+ } type;
+ double value; // is sign in other types
+ union {
+ int const_index;
+ double (*func0)(double);
+ double (*func1)(void *, double);
+ double (*func2)(void *, double, double);
+ } a;
+ struct AVExpr *param[2];
+};
+
+static double eval_expr(Parser *p, AVExpr *e)
+{
+ switch (e->type) {
+ case e_value: return e->value;
+ case e_const: return e->value * p->const_values[e->a.const_index];
+ case e_func0: return e->value * e->a.func0(eval_expr(p, e->param[0]));
+ case e_func1: return e->value * e->a.func1(p->opaque, eval_expr(p, e->param[0]));
+ case e_func2: return e->value * e->a.func2(p->opaque, eval_expr(p, e->param[0]), eval_expr(p, e->param[1]));
+ case e_squish: return 1/(1+exp(4*eval_expr(p, e->param[0])));
+ case e_gauss: { double d = eval_expr(p, e->param[0]); return exp(-d*d/2)/sqrt(2*M_PI); }
+ case e_ld: return e->value * p->var[av_clip(eval_expr(p, e->param[0]), 0, VARS-1)];
+ case e_while: {
+ double d = NAN;
+ while (eval_expr(p, e->param[0]))
+ d=eval_expr(p, e->param[1]);
+ return d;
+ }
+ default: {
+ double d = eval_expr(p, e->param[0]);
+ double d2 = eval_expr(p, e->param[1]);
+ switch (e->type) {
+ case e_mod: return e->value * (d - floor(d/d2)*d2);
+ case e_max: return e->value * (d > d2 ? d : d2);
+ case e_min: return e->value * (d < d2 ? d : d2);
+ case e_eq: return e->value * (d == d2 ? 1.0 : 0.0);
+ case e_gt: return e->value * (d > d2 ? 1.0 : 0.0);
+ case e_gte: return e->value * (d >= d2 ? 1.0 : 0.0);
+ case e_pow: return e->value * pow(d, d2);
+ case e_mul: return e->value * (d * d2);
+ case e_div: return e->value * (d / d2);
+ case e_add: return e->value * (d + d2);
+ case e_last:return e->value * d2;
+ case e_st : return e->value * (p->var[av_clip(d, 0, VARS-1)]= d2);
+ }
+ }
+ }
+ return NAN;
+}
+
+static int parse_expr(AVExpr **e, Parser *p);
+
+void av_free_expr(AVExpr *e)
+{
+ if (!e) return;
+ av_free_expr(e->param[0]);
+ av_free_expr(e->param[1]);
+ av_freep(&e);
+}
+
+static int parse_primary(AVExpr **e, Parser *p)
+{
+ AVExpr *d = av_mallocz(sizeof(AVExpr));
+ char *next = p->s, *s0 = p->s;
+ int ret, i;
+
+ if (!d)
+ return AVERROR(ENOMEM);
+
+ /* number */
+ d->value = av_strtod(p->s, &next);
+ if (next != p->s) {
+ d->type = e_value;
+ p->s= next;
+ *e = d;
+ return 0;
+ }
+ d->value = 1;
+
+ /* named constants */
+ for (i=0; p->const_names && p->const_names[i]; i++) {
+ if (strmatch(p->s, p->const_names[i])) {
+ p->s+= strlen(p->const_names[i]);
+ d->type = e_const;
+ d->a.const_index = i;
+ *e = d;
+ return 0;
+ }
+ }
+
+ p->s= strchr(p->s, '(');
+ if (p->s==NULL) {
+ av_log(p, AV_LOG_ERROR, "Undefined constant or missing '(' in '%s'\n", s0);
+ p->s= next;
+ av_free_expr(d);
+ return AVERROR(EINVAL);
+ }
+ p->s++; // "("
+ if (*next == '(') { // special case do-nothing
+ av_freep(&d);
+ if ((ret = parse_expr(&d, p)) < 0)
+ return ret;
+ if (p->s[0] != ')') {
+ av_log(p, AV_LOG_ERROR, "Missing ')' in '%s'\n", s0);
+ av_free_expr(d);
+ return AVERROR(EINVAL);
+ }
+ p->s++; // ")"
+ *e = d;
+ return 0;
+ }
+ if ((ret = parse_expr(&(d->param[0]), p)) < 0) {
+ av_free_expr(d);
+ return ret;
+ }
+ if (p->s[0]== ',') {
+ p->s++; // ","
+ parse_expr(&d->param[1], p);
+ }
+ if (p->s[0] != ')') {
+ av_log(p, AV_LOG_ERROR, "Missing ')' or too many args in '%s'\n", s0);
+ av_free_expr(d);
+ return AVERROR(EINVAL);
+ }
+ p->s++; // ")"
+
+ d->type = e_func0;
+ if (strmatch(next, "sinh" )) d->a.func0 = sinh;
+ else if (strmatch(next, "cosh" )) d->a.func0 = cosh;
+ else if (strmatch(next, "tanh" )) d->a.func0 = tanh;
+ else if (strmatch(next, "sin" )) d->a.func0 = sin;
+ else if (strmatch(next, "cos" )) d->a.func0 = cos;
+ else if (strmatch(next, "tan" )) d->a.func0 = tan;
+ else if (strmatch(next, "atan" )) d->a.func0 = atan;
+ else if (strmatch(next, "asin" )) d->a.func0 = asin;
+ else if (strmatch(next, "acos" )) d->a.func0 = acos;
+ else if (strmatch(next, "exp" )) d->a.func0 = exp;
+ else if (strmatch(next, "log" )) d->a.func0 = log;
+ else if (strmatch(next, "abs" )) d->a.func0 = fabs;
+ else if (strmatch(next, "squish")) d->type = e_squish;
+ else if (strmatch(next, "gauss" )) d->type = e_gauss;
+ else if (strmatch(next, "mod" )) d->type = e_mod;
+ else if (strmatch(next, "max" )) d->type = e_max;
+ else if (strmatch(next, "min" )) d->type = e_min;
+ else if (strmatch(next, "eq" )) d->type = e_eq;
+ else if (strmatch(next, "gte" )) d->type = e_gte;
+ else if (strmatch(next, "gt" )) d->type = e_gt;
+ else if (strmatch(next, "lte" )) { AVExpr *tmp = d->param[1]; d->param[1] = d->param[0]; d->param[0] = tmp; d->type = e_gt; }
+ else if (strmatch(next, "lt" )) { AVExpr *tmp = d->param[1]; d->param[1] = d->param[0]; d->param[0] = tmp; d->type = e_gte; }
+ else if (strmatch(next, "ld" )) d->type = e_ld;
+ else if (strmatch(next, "st" )) d->type = e_st;
+ else if (strmatch(next, "while" )) d->type = e_while;
+ else {
+ for (i=0; p->func1_names && p->func1_names[i]; i++) {
+ if (strmatch(next, p->func1_names[i])) {
+ d->a.func1 = p->funcs1[i];
+ d->type = e_func1;
+ *e = d;
+ return 0;
+ }
+ }
+
+ for (i=0; p->func2_names && p->func2_names[i]; i++) {
+ if (strmatch(next, p->func2_names[i])) {
+ d->a.func2 = p->funcs2[i];
+ d->type = e_func2;
+ *e = d;
+ return 0;
+ }
+ }
+
+ av_log(p, AV_LOG_ERROR, "Unknown function in '%s'\n", s0);
+ av_free_expr(d);
+ return AVERROR(EINVAL);
+ }
+
+ *e = d;
+ return 0;
+}
+
+static AVExpr *new_eval_expr(int type, int value, AVExpr *p0, AVExpr *p1)
+{
+ AVExpr *e = av_mallocz(sizeof(AVExpr));
+ if (!e)
+ return NULL;
+ e->type =type ;
+ e->value =value ;
+ e->param[0] =p0 ;
+ e->param[1] =p1 ;
+ return e;
+}
+
+static int parse_pow(AVExpr **e, Parser *p, int *sign)
+{
+ *sign= (*p->s == '+') - (*p->s == '-');
+ p->s += *sign&1;
+ return parse_primary(e, p);
+}
+
+static int parse_factor(AVExpr **e, Parser *p)
+{
+ int sign, sign2, ret;
+ AVExpr *e0, *e1, *e2;
+ if ((ret = parse_pow(&e0, p, &sign)) < 0)
+ return ret;
+ while(p->s[0]=='^'){
+ e1 = e0;
+ p->s++;
+ if ((ret = parse_pow(&e2, p, &sign2)) < 0) {
+ av_free_expr(e1);
+ return ret;
+ }
+ e0 = new_eval_expr(e_pow, 1, e1, e2);
+ if (!e0) {
+ av_free_expr(e1);
+ av_free_expr(e2);
+ return AVERROR(ENOMEM);
+ }
+ if (e0->param[1]) e0->param[1]->value *= (sign2|1);
+ }
+ if (e0) e0->value *= (sign|1);
+
+ *e = e0;
+ return 0;
+}
+
+static int parse_term(AVExpr **e, Parser *p)
+{
+ int ret;
+ AVExpr *e0, *e1, *e2;
+ if ((ret = parse_factor(&e0, p)) < 0)
+ return ret;
+ while (p->s[0]=='*' || p->s[0]=='/') {
+ int c= *p->s++;
+ e1 = e0;
+ if ((ret = parse_factor(&e2, p)) < 0) {
+ av_free_expr(e1);
+ return ret;
+ }
+ e0 = new_eval_expr(c == '*' ? e_mul : e_div, 1, e1, e2);
+ if (!e0) {
+ av_free_expr(e1);
+ av_free_expr(e2);
+ return AVERROR(ENOMEM);
+ }
+ }
+ *e = e0;
+ return 0;
+}
+
+static int parse_subexpr(AVExpr **e, Parser *p)
+{
+ int ret;
+ AVExpr *e0, *e1, *e2;
+ if ((ret = parse_term(&e0, p)) < 0)
+ return ret;
+ while (*p->s == '+' || *p->s == '-') {
+ e1 = e0;
+ if ((ret = parse_term(&e2, p)) < 0) {
+ av_free_expr(e1);
+ return ret;
+ }
+ e0 = new_eval_expr(e_add, 1, e1, e2);
+ if (!e0) {
+ av_free_expr(e1);
+ av_free_expr(e2);
+ return AVERROR(ENOMEM);
+ }
+ };
+
+ *e = e0;
+ return 0;
+}
+
+static int parse_expr(AVExpr **e, Parser *p)
+{
+ int ret;
+ AVExpr *e0, *e1, *e2;
+ if (p->stack_index <= 0) //protect against stack overflows
+ return AVERROR(EINVAL);
+ p->stack_index--;
+
+ if ((ret = parse_subexpr(&e0, p)) < 0)
+ return ret;
+ while (*p->s == ';') {
+ e1 = e0;
+ if ((ret = parse_subexpr(&e2, p)) < 0) {
+ av_free_expr(e1);
+ return ret;
+ }
+ p->s++;
+ e0 = new_eval_expr(e_last, 1, e1, e2);
+ if (!e0) {
+ av_free_expr(e1);
+ av_free_expr(e2);
+ return AVERROR(ENOMEM);
+ }
+ };
+
+ p->stack_index++;
+ *e = e0;
+ return 0;
+}
+
+static int verify_expr(AVExpr *e)
+{
+ if (!e) return 0;
+ switch (e->type) {
+ case e_value:
+ case e_const: return 1;
+ case e_func0:
+ case e_func1:
+ case e_squish:
+ case e_ld:
+ case e_gauss: return verify_expr(e->param[0]);
+ default: return verify_expr(e->param[0]) && verify_expr(e->param[1]);
+ }
+}
+
+int av_parse_expr(AVExpr **expr, const char *s,
+ const char * const *const_names,
+ const char * const *func1_names, double (* const *funcs1)(void *, double),
+ const char * const *func2_names, double (* const *funcs2)(void *, double, double),
+ int log_offset, void *log_ctx)
+{
+ Parser p;
+ AVExpr *e = NULL;
+ char *w = av_malloc(strlen(s) + 1);
+ char *wp = w;
+ const char *s0 = s;
+ int ret = 0;
+
+ if (!w)
+ return AVERROR(ENOMEM);
+
+ while (*s)
+ if (!isspace(*s++)) *wp++ = s[-1];
+ *wp++ = 0;
+
+ p.class = &class;
+ p.stack_index=100;
+ p.s= w;
+ p.const_names = const_names;
+ p.funcs1 = funcs1;
+ p.func1_names = func1_names;
+ p.funcs2 = funcs2;
+ p.func2_names = func2_names;
+ p.log_offset = log_offset;
+ p.log_ctx = log_ctx;
+
+ if ((ret = parse_expr(&e, &p)) < 0)
+ goto end;
+ if (*p.s) {
+ av_log(&p, AV_LOG_ERROR, "Invalid chars '%s' at the end of expression '%s'\n", p.s, s0);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ if (!verify_expr(e)) {
+ av_free_expr(e);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ *expr = e;
+end:
+ av_free(w);
+ return ret;
+}
+
+double av_eval_expr(AVExpr *e, const double *const_values, void *opaque)
+{
+ Parser p;
+
+ p.const_values = const_values;
+ p.opaque = opaque;
+ return eval_expr(&p, e);
+}
+
+int av_parse_and_eval_expr(double *d, const char *s,
+ const char * const *const_names, const double *const_values,
+ const char * const *func1_names, double (* const *funcs1)(void *, double),
+ const char * const *func2_names, double (* const *funcs2)(void *, double, double),
+ void *opaque, int log_offset, void *log_ctx)
+{
+ AVExpr *e = NULL;
+ int ret = av_parse_expr(&e, s, const_names, func1_names, funcs1, func2_names, funcs2, log_offset, log_ctx);
+
+ if (ret < 0) {
+ *d = NAN;
+ return ret;
+ }
+ *d = av_eval_expr(e, const_values, opaque);
+ av_free_expr(e);
+#ifdef __GNUC__
+ return isnan(*d) ? AVERROR(EINVAL) : 0;
+#else
+ return 0;
+#endif
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
index 41f14ba89..1ebf77b83 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/internal.h
@@ -106,7 +106,7 @@
#endif
/* MPC custom code start */
-#if defined(_DEBUG)
+#if defined (_DEBUG)
# define snprintf _snprintf
#endif
/* MPC custom code end */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c
index 1e60fc1db..036a2d130 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/intfloat_readwrite.c
@@ -21,7 +21,7 @@
*/
/**
- * @file libavutil/intfloat_readwrite.c
+ * @file
* portable IEEE float/double read/write functions
*/
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lls.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lls.c
index 047f976c3..385579276 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lls.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/lls.c
@@ -21,7 +21,7 @@
*/
/**
- * @file libavutil/lls.c
+ * @file
* linear least squares model
*/
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
index b9f4e902a..4274c3ea7 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
@@ -43,6 +43,7 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
static int print_prefix=1;
static int count;
static char line[1024], prev[1024];
+ static int detect_repeats;
AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
if(level>av_log_level)
return;
@@ -61,7 +62,12 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
vsnprintf(line + strlen(line), sizeof(line) - strlen(line), fmt, vl);
print_prefix= line[strlen(line)-1] == '\n';
- if(print_prefix && !strcmp(line, prev)){
+
+#if HAVE_ISATTY
+ if(!detect_repeats) detect_repeats= isatty(2) ? 1 : -1;
+#endif
+
+ if(print_prefix && detect_repeats==1 && !strcmp(line, prev)){
count++;
fprintf(stderr, " Last message repeated %d times\r", count);
return;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c
index 08daaa307..9015e4fa8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/pixdesc.c
@@ -54,11 +54,14 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4], const int linesiz
}
} else {
const uint8_t *p = data[plane]+ y*linesize[plane] + x*step + comp.offset_plus1-1;
+ int is_8bit = shift + depth <= 8;
+
+ if (is_8bit)
+ p += !!(flags & PIX_FMT_BE);
while(w--){
- int val;
- if(flags & PIX_FMT_BE) val= AV_RB16(p);
- else val= AV_RL16(p);
+ int val = is_8bit ? *p :
+ flags & PIX_FMT_BE ? AV_RB16(p) : AV_RL16(p);
val = (val>>shift) & mask;
if(read_pal_component)
val= data[1][4*val + c];
@@ -92,15 +95,23 @@ void av_write_image_line(const uint16_t *src, uint8_t *data[4], const int linesi
int shift = comp.shift;
uint8_t *p = data[plane]+ y*linesize[plane] + x*step + comp.offset_plus1-1;
- while (w--) {
- if (flags & PIX_FMT_BE) {
- uint16_t val = AV_RB16(p) | (*src++<<shift);
- AV_WB16(p, val);
- } else {
- uint16_t val = AV_RL16(p) | (*src++<<shift);
- AV_WL16(p, val);
+ if (shift + depth <= 8) {
+ p += !!(flags & PIX_FMT_BE);
+ while (w--) {
+ *p |= (*src++<<shift);
+ p += step;
+ }
+ } else {
+ while (w--) {
+ if (flags & PIX_FMT_BE) {
+ uint16_t val = AV_RB16(p) | (*src++<<shift);
+ AV_WB16(p, val);
+ } else {
+ uint16_t val = AV_RL16(p) | (*src++<<shift);
+ AV_WL16(p, val);
+ }
+ p+= step;
}
- p+= step;
}
}
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
index 462309f41..b6ceb76d3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/bswap.h
@@ -17,7 +17,7 @@
*/
/**
- * @file libavutil/x86/bswap.h
+ * @file
* byte swapping routines
*/
@@ -28,15 +28,15 @@
#include "config.h"
#include "libavutil/attributes.h"
-#define bswap_16 bswap_16
-static av_always_inline av_const uint16_t bswap_16(uint16_t x)
+#define av_bswap16 av_bswap16
+static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
{
__asm__("rorw $8, %0" : "+r"(x));
return x;
}
-#define bswap_32 bswap_32
-static av_always_inline av_const uint32_t bswap_32(uint32_t x)
+#define av_bswap32 av_bswap32
+static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
{
#if HAVE_BSWAP
__asm__("bswap %0" : "+r" (x));
@@ -50,8 +50,8 @@ static av_always_inline av_const uint32_t bswap_32(uint32_t x)
}
#if ARCH_X86_64
-#define bswap_64 bswap_64
-static inline uint64_t av_const bswap_64(uint64_t x)
+#define av_bswap64 av_bswap64
+static inline uint64_t av_const av_bswap64(uint64_t x)
{
__asm__("bswap %0": "=r" (x) : "0" (x));
return x;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/cpu.c
index e96e3a93c..7f66ab7c3 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/cpuid.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/x86/cpu.c
@@ -21,10 +21,9 @@
*/
#include <stdlib.h>
+#include <string.h>
#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-
-#undef printf
+#include "libavutil/cpu.h"
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index,eax,ebx,ecx,edx)\
@@ -37,7 +36,7 @@
: "0" (index));
/* Function to test if multimedia instructions are supported... */
-int mm_support(void)
+int ff_get_cpu_flags_x86(void)
{
int rval = 0;
int eax, ebx, ecx, edx;
@@ -79,21 +78,21 @@ int mm_support(void)
family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
if (std_caps & (1<<23))
- rval |= FF_MM_MMX;
+ rval |= AV_CPU_FLAG_MMX;
if (std_caps & (1<<25))
- rval |= FF_MM_MMX2
+ rval |= AV_CPU_FLAG_MMX2
#if HAVE_SSE
- | FF_MM_SSE;
+ | AV_CPU_FLAG_SSE;
if (std_caps & (1<<26))
- rval |= FF_MM_SSE2;
+ rval |= AV_CPU_FLAG_SSE2;
if (ecx & 1)
- rval |= FF_MM_SSE3;
+ rval |= AV_CPU_FLAG_SSE3;
if (ecx & 0x00000200 )
- rval |= FF_MM_SSSE3;
+ rval |= AV_CPU_FLAG_SSSE3;
if (ecx & 0x00080000 )
- rval |= FF_MM_SSE4;
+ rval |= AV_CPU_FLAG_SSE4;
if (ecx & 0x00100000 )
- rval |= FF_MM_SSE42;
+ rval |= AV_CPU_FLAG_SSE42;
#endif
;
}
@@ -103,13 +102,13 @@ int mm_support(void)
if(max_ext_level >= 0x80000001){
cpuid(0x80000001, eax, ebx, ecx, ext_caps);
if (ext_caps & (1<<31))
- rval |= FF_MM_3DNOW;
+ rval |= AV_CPU_FLAG_3DNOW;
if (ext_caps & (1<<30))
- rval |= FF_MM_3DNOWEXT;
+ rval |= AV_CPU_FLAG_3DNOWEXT;
if (ext_caps & (1<<23))
- rval |= FF_MM_MMX;
+ rval |= AV_CPU_FLAG_MMX;
if (ext_caps & (1<<22))
- rval |= FF_MM_MMX2;
+ rval |= AV_CPU_FLAG_MMX2;
}
if (!strncmp(vendor.c, "GenuineIntel", 12) &&
@@ -117,26 +116,9 @@ int mm_support(void)
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
* theoretically support sse2, but it's usually slower than mmx,
* so let's just pretend they don't. */
- if (rval & FF_MM_SSE2) rval ^= FF_MM_SSE2SLOW|FF_MM_SSE2;
- if (rval & FF_MM_SSE3) rval ^= FF_MM_SSE3SLOW|FF_MM_SSE3;
+ if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2;
+ if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3;
}
-#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s%s%s%s%s\n",
- (rval&FF_MM_MMX) ? "MMX ":"",
- (rval&FF_MM_MMX2) ? "MMX2 ":"",
- (rval&FF_MM_SSE) ? "SSE ":"",
- (rval&FF_MM_SSE2) ? "SSE2 ":"",
- (rval&FF_MM_SSE2SLOW) ? "SSE2(slow) ":"",
- (rval&FF_MM_SSE3) ? "SSE3 ":"",
- (rval&FF_MM_SSE3SLOW) ? "SSE3(slow) ":"",
- (rval&FF_MM_SSSE3) ? "SSSE3 ":"",
- (rval&FF_MM_SSE4) ? "SSE4.1 ":"",
- (rval&FF_MM_SSE42) ? "SSE4.2 ":"",
- (rval&FF_MM_3DNOW) ? "3DNow ":"",
- (rval&FF_MM_3DNOWEXT) ? "3DNowExt ":"");
-#endif
return rval;
-
- /* TODO: allow overriding with ffdshow settings for disabling extensions */
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc b/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
index 7217f1b83..a8bf2d476 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/makefile_c.inc
@@ -21,7 +21,7 @@ ifeq ($(64BIT),yes)
else
TARGET_OS=i686-pc-mingw32
CFLAGS+=-DWIN32 -D_WIN32
- OPTFLAGS+=-O3 -march=i686 -mmmx
+ OPTFLAGS+=-O2 -march=i686 -mmmx -msse -mfpmath=sse
endif
CFLAGS+=-mno-cygwin -mdll -mthreads -pipe