Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2010-08-02 12:54:35 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2010-08-02 12:54:35 +0400
commitc5d184664fec4340b57082cd1cc31220d9f1220b (patch)
tree34c6103e95b6538a8e71d1ba87897536b7b51015 /src/filters/transform/MPCVideoDec/ffmpeg
parentaef6f86e8400fa5ee1e9d9ddc6190412ef09e519 (diff)
updated ffmpeg (thanks to Aleksoid for finding the conflict in avcore\utils.c for Debug VS2010 builds)
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@2180 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src/filters/transform/MPCVideoDec/ffmpeg')
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/Makefile9
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt1
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj20
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters15
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c136
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c2
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c5
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c8
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h84
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c14
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c213
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h30
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c445
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h267
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h54
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c4
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm575
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h58
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c43
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c97
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h53
-rw-r--r--src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c1
25 files changed, 1147 insertions, 1003 deletions
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
index 1497e5ab7..f5b7c3b0b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/Makefile
@@ -1,4 +1,5 @@
LAVC_DIR = libavcodec
+LAVCORE_DIR=libavcore
LAVU_DIR = libavutil
LSWS_DIR = libswscale
PNG_DIR = ../../../../thirdparty/libpng
@@ -17,6 +18,7 @@ OUT_DIRS = ../../../../../bin/obj/Release_x64/libavcodec_gcc/ \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec/amr_float \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcodec/x86 \
+ ../../../../../bin/obj/Release_x64/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libavutil \
../../../../../bin/obj/Release_x64/libavcodec_gcc/libswscale \
$(SLIB_DIR)
@@ -25,12 +27,13 @@ OUT_DIRS = ../../../../../bin/obj/Release_Win32/libavcodec_gcc/ \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec/amr_float \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcodec/x86 \
+ ../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavcore \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libavutil \
../../../../../bin/obj/Release_Win32/libavcodec_gcc/libswscale \
$(SLIB_DIR)
endif
-CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
+CFLAGS+= -I. -I.. -I$(LAVC_DIR) -I$(LAVCORE_DIR) -I$(LAVU_DIR) -I$(LSWS_DIR) -I$(ZLIB_DIR) -I$(PNG_DIR) \
-DHAVE_AV_CONFIG_H -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -std=gnu99
SRCS_C=\
@@ -156,6 +159,9 @@ SRCS_C=\
$(LAVC_DIR)/x86/vp6dsp_sse2.c \
$(LAVC_DIR)/x86/vp8dsp-init.c \
\
+ $(LAVCORE_DIR)/avcore_utils.c \
+ $(LAVCORE_DIR)/imgutils.c \
+\
$(LAVU_DIR)/crc.c \
$(LAVU_DIR)/intfloat_readwrite.c \
$(LAVU_DIR)/inverse.c \
@@ -213,6 +219,7 @@ clean:
$(OUT_DIR)$(LAVC_DIR)/*.o $(OUT_DIR)$(LAVC_DIR)/*.d \
$(OUT_DIR)$(LAVC_DIR)/x86/*.o $(OUT_DIR)$(LAVC_DIR)/x86/*.d \
$(OUT_DIR)$(LAVC_DIR)/amr_float/*.o $(OUT_DIR)$(LAVC_DIR)/amr_float/*.d \
+ $(OUT_DIR)$(LAVCORE_DIR)/*.o $(OUT_DIR)$(LAVCORE_DIR)/*.d \
$(OUT_DIR)$(LAVU_DIR)/*.o $(OUT_DIR)$(LAVU_DIR)/*.d \
$(OUT_DIR)$(LSWS_DIR)/*.o $(OUT_DIR)$(LSWS_DIR)/*.d \
$(ZLIB_DIR)/*.o $(ZLIB_DIR)/*.d $(PNG_DIR)/*.o $(SLIB)
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
index 3ad7a140e..74108e99b 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/custom_code.txt
@@ -13,5 +13,6 @@ The following files have MPC-specific custom code (compared to ffdshow):
* libavcodec/mpc_helper.c
* libavcodec/mpeg12.c
* libavcodec/vp3.c
+* libavcore/avcore_utils.c (renamed from utils.c to avoid conflicts in MSVC2010)
* libavutil/internal.h
* libavutil/log.h
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
index 0d276b5ba..110c08cba 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcproj
@@ -1591,6 +1591,26 @@
</FileConfiguration>
</File>
</Filter>
+ <Filter
+ Name="libavcore"
+ >
+ <File
+ RelativePath=".\libavcore\avcore.h"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\avcore_utils.c"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\imgutils.c"
+ >
+ </File>
+ <File
+ RelativePath=".\libavcore\imgutils.h"
+ >
+ </File>
+ </Filter>
<File
RelativePath=".\array_allocator.h"
>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
index fe19a7491..141c87d5c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj
@@ -126,6 +126,8 @@
<ClInclude Include="libavcodec\vp8dsp.h" />
<ClInclude Include="libavcodec\wmv2.h" />
<ClInclude Include="libavcodec\xiph.h" />
+ <ClInclude Include="libavcore\avcore.h" />
+ <ClInclude Include="libavcore\imgutils.h" />
<ClInclude Include="libavutil\attributes.h" />
<ClInclude Include="libavutil\avconfig.h" />
<ClInclude Include="libavutil\avstring.h" />
@@ -276,6 +278,8 @@
<ClCompile Include="libavcodec\wmv2.c" />
<ClCompile Include="libavcodec\wmv2dec.c" />
<ClCompile Include="libavcodec\xiph.c" />
+ <ClCompile Include="libavcore\imgutils.c" />
+ <ClCompile Include="libavcore\avcore_utils.c" />
<ClCompile Include="libavutil\crc.c" />
<ClCompile Include="libavutil\intfloat_readwrite.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
index 9cf688ad0..67bf63abb 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec.vcxproj.filters
@@ -13,6 +13,9 @@
<Filter Include="libavcodec\amr_float">
<UniqueIdentifier>{dcef6bb8-4262-415c-935a-c8dd0f056c4c}</UniqueIdentifier>
</Filter>
+ <Filter Include="libavcore">
+ <UniqueIdentifier>{bb994511-43fc-42df-8ba2-b6186af844b2}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="libswscale\asmalign.h">
@@ -427,6 +430,12 @@
<ClInclude Include="libavcodec\AVPaletteControl.h">
<Filter>libavcodec</Filter>
</ClInclude>
+ <ClInclude Include="libavcore\avcore.h">
+ <Filter>libavcore</Filter>
+ </ClInclude>
+ <ClInclude Include="libavcore\imgutils.h">
+ <Filter>libavcore</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="libswscale\isP4HT.c">
@@ -813,5 +822,11 @@
<ClCompile Include="libavcodec\avpacket.c">
<Filter>libavcodec</Filter>
</ClCompile>
+ <ClCompile Include="libavcore\imgutils.c">
+ <Filter>libavcore</Filter>
+ </ClCompile>
+ <ClCompile Include="libavcore\avcore_utils.c">
+ <Filter>libavcore</Filter>
+ </ClCompile>
</ItemGroup>
</Project> \ No newline at end of file
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
index 3db5fb47d..293fa3c8a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.c
@@ -35,9 +35,9 @@
#include "internal.h"
#include "imgconvert.h"
#include "libavutil/pixdesc.h"
+#include "libavcore/imgutils.h"
#if HAVE_MMX
-#include "x86/mmx.h"
#include "x86/dsputil_mmx.h"
#endif
@@ -748,144 +748,18 @@ int ff_set_systematic_pal(uint32_t pal[256], enum PixelFormat pix_fmt){
return 0;
}
+#if LIBAVCODEC_VERSION_MAJOR < 53
int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width)
{
- int i;
- const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
- int max_plane_step [4];
- int max_plane_step_comp[4];
-
- memset(picture->linesize, 0, sizeof(picture->linesize));
-
- if (desc->flags & PIX_FMT_HWACCEL)
- return -1;
-
- if (desc->flags & PIX_FMT_BITSTREAM) {
- picture->linesize[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
- return 0;
- }
-
- memset(max_plane_step , 0, sizeof(max_plane_step ));
- memset(max_plane_step_comp, 0, sizeof(max_plane_step_comp));
- for (i = 0; i < 4; i++) {
- const AVComponentDescriptor *comp = &(desc->comp[i]);
- if ((comp->step_minus1+1) > max_plane_step[comp->plane]) {
- max_plane_step [comp->plane] = comp->step_minus1+1;
- max_plane_step_comp[comp->plane] = i;
- }
- }
-
- for (i = 0; i < 4; i++) {
- int s = (max_plane_step_comp[i] == 1 || max_plane_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
- picture->linesize[i] = max_plane_step[i] * (((width + (1 << s) - 1)) >> s);
- }
-
- return 0;
+ return av_fill_image_linesizes(picture->linesize, pix_fmt, width);
}
int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt,
int height)
{
- int size, h2, size2;
- const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
-
- size = picture->linesize[0] * height;
- switch(pix_fmt) {
- case PIX_FMT_YUV420P:
- case PIX_FMT_YUV422P:
- case PIX_FMT_YUV444P:
- case PIX_FMT_YUV410P:
- case PIX_FMT_YUV411P:
- case PIX_FMT_YUV440P:
- case PIX_FMT_YUVJ420P:
- case PIX_FMT_YUVJ422P:
- case PIX_FMT_YUVJ444P:
- case PIX_FMT_YUVJ440P:
- case PIX_FMT_YUV420P16LE:
- case PIX_FMT_YUV422P16LE:
- case PIX_FMT_YUV444P16LE:
- case PIX_FMT_YUV420P16BE:
- case PIX_FMT_YUV422P16BE:
- case PIX_FMT_YUV444P16BE:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size2;
- picture->data[3] = NULL;
- return size + 2 * size2;
- case PIX_FMT_YUVA420P:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = picture->data[1] + size2;
- picture->data[3] = picture->data[1] + size2 + size2;
- return 2 * size + 2 * size2;
- case PIX_FMT_NV12:
- case PIX_FMT_NV21:
- h2 = (height + (1 << desc->log2_chroma_h) - 1) >> desc->log2_chroma_h;
- size2 = picture->linesize[1] * h2;
- picture->data[0] = ptr;
- picture->data[1] = picture->data[0] + size;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size + size2;
- case PIX_FMT_RGB24:
- case PIX_FMT_BGR24:
- case PIX_FMT_ARGB:
- case PIX_FMT_ABGR:
- case PIX_FMT_RGBA:
- case PIX_FMT_BGRA:
- case PIX_FMT_RGB48BE:
- case PIX_FMT_RGB48LE:
- case PIX_FMT_GRAY16BE:
- case PIX_FMT_GRAY16LE:
- case PIX_FMT_BGR444BE:
- case PIX_FMT_BGR444LE:
- case PIX_FMT_BGR555BE:
- case PIX_FMT_BGR555LE:
- case PIX_FMT_BGR565BE:
- case PIX_FMT_BGR565LE:
- case PIX_FMT_RGB444BE:
- case PIX_FMT_RGB444LE:
- case PIX_FMT_RGB555BE:
- case PIX_FMT_RGB555LE:
- case PIX_FMT_RGB565BE:
- case PIX_FMT_RGB565LE:
- case PIX_FMT_YUYV422:
- case PIX_FMT_UYVY422:
- case PIX_FMT_UYYVYY411:
- case PIX_FMT_RGB4:
- case PIX_FMT_BGR4:
- case PIX_FMT_MONOWHITE:
- case PIX_FMT_MONOBLACK:
- case PIX_FMT_Y400A:
- picture->data[0] = ptr;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size;
- case PIX_FMT_PAL8:
- case PIX_FMT_RGB8:
- case PIX_FMT_BGR8:
- case PIX_FMT_RGB4_BYTE:
- case PIX_FMT_BGR4_BYTE:
- case PIX_FMT_GRAY8:
- size2 = (size + 3) & ~3;
- picture->data[0] = ptr;
- picture->data[1] = ptr + size2; /* palette is stored here as 256 32 bit words */
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return size2 + 256 * 4;
- default:
- picture->data[0] = NULL;
- picture->data[1] = NULL;
- picture->data[2] = NULL;
- picture->data[3] = NULL;
- return -1;
- }
+ return av_fill_image_pointers(picture->data, pix_fmt, height, ptr, picture->linesize);
}
+#endif
void ff_img_copy_plane(uint8_t *dst, int dst_wrap,
const uint8_t *src, int src_wrap,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h
index 48e2f1271..f09fcbfc0 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/imgconvert.h
@@ -27,9 +27,13 @@
#include <stdint.h>
#include "avcodec.h"
+#if LIBAVCODEC_VERSION_MAJOR < 53
+attribute_deprecated
int ff_fill_linesize(AVPicture *picture, enum PixelFormat pix_fmt, int width);
+attribute_deprecated
int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, enum PixelFormat pix_fmt, int height);
+#endif
int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane);
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
index 99ff3fa62..93268052a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/mpeg12.c
@@ -1151,7 +1151,7 @@ typedef struct Mpeg1Context {
MpegEncContext mpeg_enc_ctx;
int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
int repeat_field; /* true if we must repeat the field */
- AVPanScan pan_scan; /** some temporary storage for the panscan */
+ AVPanScan pan_scan; /**< some temporary storage for the panscan */
int slice_count;
int swap_uv;//indicate VCR2
int save_aspect_info;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
index 1391625c0..78925d915 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/utils.c
@@ -28,6 +28,7 @@
#include "libavutil/avstring.h"
#include "libavutil/crc.h"
#include "libavutil/pixdesc.h"
+#include "libavcore/imgutils.h"
#include "avcodec.h"
#include "dsputil.h"
#include "imgconvert.h"
@@ -281,7 +282,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
do {
// NOTE: do not align linesizes individually, this breaks e.g. assumptions
// that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
- ff_fill_linesize(&picture, s->pix_fmt, w);
+ av_fill_image_linesizes(picture.linesize, s->pix_fmt, w);
// increase alignment of w for next try (rhs gives the lowest bit set in w)
w += w & ~(w-1);
@@ -291,7 +292,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
}
} while (unaligned);
- tmpsize = ff_fill_pointer(&picture, NULL, s->pix_fmt, h);
+ tmpsize = av_fill_image_pointers(picture.data, s->pix_fmt, h, NULL, picture.linesize);
if (tmpsize < 0)
return -1;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c
index 3ebff017a..bb9b62e07 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp5.c
@@ -42,7 +42,7 @@ static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
vp56_init_range_decoder(&s->c, buf, buf_size);
s->framep[VP56_FRAME_CURRENT]->key_frame = !vp56_rac_get(c);
vp56_rac_get(c);
- vp56_init_dequant(s, vp56_rac_gets(c, 6));
+ ff_vp56_init_dequant(s, vp56_rac_gets(c, 6));
if (s->framep[VP56_FRAME_CURRENT]->key_frame)
{
vp56_rac_gets(c, 8);
@@ -254,7 +254,7 @@ static av_cold int vp5_decode_init(AVCodecContext *avctx)
{
VP56Context *s = avctx->priv_data;
- vp56_init(avctx, 1, 0);
+ ff_vp56_init(avctx, 1, 0);
s->vp56_coord_div = vp5_coord_div;
s->parse_vector_adjustment = vp5_parse_vector_adjustment;
s->parse_coeff = vp5_parse_coeff;
@@ -273,8 +273,8 @@ AVCodec vp5_decoder = {
sizeof(VP56Context),
vp5_decode_init,
NULL,
- vp56_free,
- vp56_decode_frame,
+ ff_vp56_free,
+ ff_vp56_decode_frame,
/*.capabilities = */CODEC_CAP_DR1,
/*.next = */NULL,
/*.flush = */NULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
index 188bfcfbf..3f5569eb8 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.c
@@ -28,7 +28,7 @@
#include "vp56data.h"
-void vp56_init_dequant(VP56Context *s, int quantizer)
+void ff_vp56_init_dequant(VP56Context *s, int quantizer)
{
s->quantizer = quantizer;
s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
@@ -481,7 +481,7 @@ static int vp56_size_changed(AVCodecContext *avctx)
return 0;
}
-int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf, int buf_size)
{
VP56Context *s = avctx->priv_data;
@@ -638,7 +638,7 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
return buf_size;
}
-av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
+av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
{
VP56Context *s = avctx->priv_data;
int i;
@@ -677,7 +677,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
}
}
-av_cold int vp56_free(AVCodecContext *avctx)
+av_cold int ff_vp56_free(AVCodecContext *avctx)
{
VP56Context *s = avctx->priv_data;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
index a4ef49ede..69518fa73 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp56.h
@@ -56,7 +56,7 @@ typedef struct {
bits left) in order to eliminate a negate in cache refilling */
const uint8_t *buffer;
const uint8_t *end;
- unsigned long code_word;
+ unsigned int code_word;
} VP56RangeCoder;
typedef struct {
@@ -170,10 +170,10 @@ struct vp56_context {
};
-void vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
-int vp56_free(AVCodecContext *avctx);
-void vp56_init_dequant(VP56Context *s, int quantizer);
-int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
+int ff_vp56_free(AVCodecContext *avctx);
+void ff_vp56_init_dequant(VP56Context *s, int quantizer);
+int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf, int buf_size);
@@ -191,25 +191,12 @@ static inline void vp56_init_range_decoder(VP56RangeCoder *c,
c->code_word = bytestream_get_be16(&c->buffer);
}
-static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
{
- /* Don't put c->high in a local variable; if we do that, gcc gets
- * the stupids and turns the code below into a branch again. */
+ int shift = ff_h264_norm_shift[c->high] - 1;
int bits = c->bits;
- unsigned long code_word = c->code_word;
- unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
- unsigned int low_shift = low << 8;
- int bit = code_word >= low_shift;
- int shift;
+ unsigned int code_word = c->code_word;
- /* Incantation to convince GCC to turn these into conditional moves
- * instead of branches -- faster, as this branch is basically
- * unpredictable. */
- c->high = bit ? c->high - low : low;
- code_word = bit ? code_word - low_shift : code_word;
-
- /* normalize */
- shift = ff_h264_norm_shift[c->high] - 1;
c->high <<= shift;
code_word <<= shift;
bits += shift;
@@ -218,29 +205,62 @@ static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
bits -= 8;
}
c->bits = bits;
- c->code_word = code_word;
+ return code_word;
+}
+
+#if ARCH_X86
+#include "x86/vp56_arith.h"
+#endif
+
+#ifndef vp56_rac_get_prob
+#define vp56_rac_get_prob vp56_rac_get_prob
+static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+ unsigned int code_word = vp56_rac_renorm(c);
+ unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+ unsigned int low_shift = low << 8;
+ int bit = code_word >= low_shift;
+
+ c->high = bit ? c->high - low : low;
+ c->code_word = bit ? code_word - low_shift : code_word;
+
return bit;
}
+#endif
+
+// branchy variant, to be used where there's a branch based on the bit decoded
+static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
+{
+ unsigned long code_word = vp56_rac_renorm(c);
+ unsigned low = 1 + (((c->high - 1) * prob) >> 8);
+ unsigned low_shift = low << 8;
+
+ if (code_word >= low_shift) {
+ c->high -= low;
+ c->code_word = code_word - low_shift;
+ return 1;
+ }
+
+ c->high = low;
+ c->code_word = code_word;
+ return 0;
+}
static inline int vp56_rac_get(VP56RangeCoder *c)
{
+ unsigned int code_word = vp56_rac_renorm(c);
/* equiprobable */
int low = (c->high + 1) >> 1;
unsigned int low_shift = low << 8;
- int bit = c->code_word >= low_shift;
+ int bit = code_word >= low_shift;
if (bit) {
- c->high = (c->high - low) << 1;
- c->code_word -= low_shift;
+ c->high -= low;
+ code_word -= low_shift;
} else {
- c->high = low << 1;
+ c->high = low;
}
- /* normalize */
- c->code_word <<= 1;
- if (++c->bits == 0 && c->buffer < c->end) {
- c->bits = -8;
- c->code_word |= *c->buffer++;
- }
+ c->code_word = code_word;
return bit;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
index ce0ec642a..4c2aec67a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp6.c
@@ -54,7 +54,7 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
int separated_coeff = buf[0] & 1;
s->framep[VP56_FRAME_CURRENT]->key_frame = !(buf[0] & 0x80);
- vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
+ ff_vp56_init_dequant(s, (buf[0] >> 1) & 0x3F);
if (s->framep[VP56_FRAME_CURRENT]->key_frame) {
sub_version = buf[1] >> 3;
@@ -576,8 +576,8 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx)
{
VP56Context *s = avctx->priv_data;
- vp56_init(avctx, avctx->codec->id == CODEC_ID_VP6,
- avctx->codec->id == CODEC_ID_VP6A);
+ ff_vp56_init(avctx, avctx->codec->id == CODEC_ID_VP6,
+ avctx->codec->id == CODEC_ID_VP6A);
s->vp56_coord_div = vp6_coord_div;
s->parse_vector_adjustment = vp6_parse_vector_adjustment;
s->filter = vp6_filter;
@@ -594,7 +594,7 @@ static av_cold int vp6_decode_free(AVCodecContext *avctx)
VP56Context *s = avctx->priv_data;
int pt, ct, cg;
- vp56_free(avctx);
+ ff_vp56_free(avctx);
for (pt=0; pt<2; pt++) {
free_vlc(&s->dccv_vlc[pt]);
@@ -614,7 +614,7 @@ AVCodec vp6_decoder = {
vp6_decode_init,
NULL,
vp6_decode_free,
- vp56_decode_frame,
+ ff_vp56_decode_frame,
/*.capabilities = */CODEC_CAP_DR1,
/*.next = */NULL,
/*.flush = */NULL,
@@ -632,7 +632,7 @@ AVCodec vp6f_decoder = {
vp6_decode_init,
NULL,
vp6_decode_free,
- vp56_decode_frame,
+ ff_vp56_decode_frame,
/*.capabilities = */CODEC_CAP_DR1,
/*.next = */NULL,
/*.flush = */NULL,
@@ -650,7 +650,7 @@ AVCodec vp6a_decoder = {
vp6_decode_init,
NULL,
vp6_decode_free,
- vp56_decode_frame,
+ ff_vp56_decode_frame,
/*.capabilities = */CODEC_CAP_DR1,
/*.next = */NULL,
/*.flush = */NULL,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
index 2dd086b70..6524a7141 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2010 David Conrad
* Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2010 Jason Garrett-Glaser
*
* This file is part of FFmpeg.
*
@@ -198,8 +199,6 @@ typedef struct {
} prob[2];
} VP8Context;
-#define RL24(p) (AV_RL16(p) + ((p)[2] << 16))
-
static void vp8_decode_flush(AVCodecContext *avctx)
{
VP8Context *s = avctx->priv_data;
@@ -211,6 +210,7 @@ static void vp8_decode_flush(AVCodecContext *avctx)
memset(s->framep, 0, sizeof(s->framep));
av_freep(&s->macroblocks_base);
+ av_freep(&s->filter_strength);
av_freep(&s->intra4x4_pred_mode_base);
av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer);
@@ -307,7 +307,7 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
return -1;
for (i = 0; i < s->num_coeff_partitions-1; i++) {
- int size = RL24(sizes + 3*i);
+ int size = AV_RL24(sizes + 3*i);
if (buf_size - size < 0)
return -1;
@@ -402,7 +402,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
s->keyframe = !(buf[0] & 1);
s->profile = (buf[0]>>1) & 7;
s->invisible = !(buf[0] & 0x10);
- header_size = RL24(buf) >> 5;
+ header_size = AV_RL24(buf) >> 5;
buf += 3;
buf_size -= 3;
@@ -420,8 +420,8 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
}
if (s->keyframe) {
- if (RL24(buf) != 0x2a019d) {
- av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf));
+ if (AV_RL24(buf) != 0x2a019d) {
+ av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
return AVERROR_INVALIDDATA;
}
width = AV_RL16(buf+3) & 0x3fff;
@@ -495,7 +495,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
for (j = 0; j < 8; j++)
for (k = 0; k < 3; k++)
for (l = 0; l < NUM_DCT_TOKENS-1; l++)
- if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l]))
+ if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l]))
s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8);
if ((s->mbskip_enabled = vp8_rac_get(c)))
@@ -516,15 +516,15 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
// 17.2 MV probability update
for (i = 0; i < 2; i++)
for (j = 0; j < 19; j++)
- if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j]))
+ if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
s->prob->mvc[i][j] = vp8_rac_get_nn(c);
}
return 0;
}
-static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src,
- int mb_x, int mb_y)
+static av_always_inline
+void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y)
{
#define MARGIN (16 << 2)
dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
@@ -533,8 +533,9 @@ static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src,
((s->mb_height - 1 - mb_y) << 6) + MARGIN);
}
-static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
- VP56mv near[2], VP56mv *best, uint8_t cnt[4])
+static av_always_inline
+void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+ VP56mv near[2], VP56mv *best, uint8_t cnt[4])
{
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
mb - 1 /* left */,
@@ -589,7 +590,7 @@ static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])
best_idx = CNT_NEAREST;
- clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y);
+ mb->mv = near_mv[best_idx];
near[0] = near_mv[CNT_NEAREST];
near[1] = near_mv[CNT_NEAR];
}
@@ -599,9 +600,9 @@ static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
*/
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
- int x = 0;
+ int bit, x = 0;
- if (vp56_rac_get_prob(c, p[0])) {
+ if (vp56_rac_get_prob_branchy(c, p[0])) {
int i;
for (i = 0; i < 3; i++)
@@ -610,13 +611,23 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
x += vp56_rac_get_prob(c, p[9 + i]) << i;
if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
x += 8;
- } else
- x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]);
+ } else {
+ // small_mvtree
+ const uint8_t *ps = p+2;
+ bit = vp56_rac_get_prob(c, *ps);
+ ps += 1 + 3*bit;
+ x += 4*bit;
+ bit = vp56_rac_get_prob(c, *ps);
+ ps += 1 + bit;
+ x += 2*bit;
+ x += vp56_rac_get_prob(c, *ps);
+ }
return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}
-static const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
+static av_always_inline
+const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
{
if (left == top)
return vp8_submv_prob[4-!!left];
@@ -629,8 +640,8 @@ static const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
* Split motion vector prediction, 16.4.
* @returns the number of motion vectors parsed (2, 4 or 16)
*/
-static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c,
- VP8Macroblock *mb, VP56mv *base_mv)
+static av_always_inline
+int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
{
int part_idx = mb->partitioning =
vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob);
@@ -663,11 +674,11 @@ static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c,
switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) {
case VP8_SUBMVMODE_NEW4X4:
- mb->bmv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]);
- mb->bmv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]);
+ mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
+ mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
break;
case VP8_SUBMVMODE_ZERO4X4:
- AV_WN32A(&mb->bmv[n], 0);
+ AV_ZERO32(&mb->bmv[n]);
break;
case VP8_SUBMVMODE_LEFT4X4:
AV_WN32A(&mb->bmv[n], left);
@@ -681,8 +692,9 @@ static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c,
return num;
}
-static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
- int stride, int keyframe)
+static av_always_inline
+void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
+ int stride, int keyframe)
{
int x, y, t, l, i;
@@ -703,8 +715,9 @@ static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
}
}
-static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
- uint8_t *intra4x4, uint8_t *segment)
+static av_always_inline
+void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+ uint8_t *intra4x4, uint8_t *segment)
{
VP56RangeCoder *c = &s->c;
@@ -724,13 +737,13 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
mb->ref_frame = VP56_FRAME_CURRENT;
- } else if (vp56_rac_get_prob(c, s->prob->intra)) {
+ } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
VP56mv near[2], best;
uint8_t cnt[4] = { 0 };
uint8_t p[4];
// inter MB, 16.2
- if (vp56_rac_get_prob(c, s->prob->last))
+ if (vp56_rac_get_prob_branchy(c, s->prob->last))
mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
else
@@ -746,10 +759,11 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p);
switch (mb->mode) {
case VP8_MVMODE_SPLIT:
- mb->mv = mb->bmv[decode_splitmvs(s, c, mb, &best) - 1];
+ clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y);
+ mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
break;
case VP8_MVMODE_ZERO:
- AV_WN32A(&mb->mv, 0);
+ AV_ZERO32(&mb->mv);
break;
case VP8_MVMODE_NEAREST:
clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y);
@@ -758,8 +772,9 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y);
break;
case VP8_MVMODE_NEW:
- mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]);
- mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]);
+ clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y);
+ mb->mv.y += + read_mv_component(c, s->prob->mvc[0]);
+ mb->mv.x += + read_mv_component(c, s->prob->mvc[1]);
break;
}
if (mb->mode != VP8_MVMODE_SPLIT) {
@@ -776,7 +791,7 @@ static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
mb->ref_frame = VP56_FRAME_CURRENT;
mb->partitioning = VP8_SPLITMVMODE_NONE;
- AV_WN32A(&mb->bmv[0], 0);
+ AV_ZERO32(&mb->bmv[0]);
}
}
@@ -795,41 +810,67 @@ static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
uint8_t probs[8][3][NUM_DCT_TOKENS-1],
int i, int zero_nhood, int16_t qmul[2])
{
- int token, nonzero = 0;
- int offset = 0;
+ uint8_t *token_prob;
+ int nonzero = 0;
+ int coeff;
- for (; i < 16; i++) {
- token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset);
+ do {
+ token_prob = probs[vp8_coeff_band[i]][zero_nhood];
- if (token == DCT_EOB)
- break;
- else if (token >= DCT_CAT1) {
- int cat = token-DCT_CAT1;
- token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
- token += 3 + (2<<cat);
- }
+ if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
+ return nonzero;
- // after the first token, the non-zero prediction context becomes
- // based on the last decoded coeff
- if (!token) {
+skip_eob:
+ if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
zero_nhood = 0;
- offset = 1;
- continue;
- } else if (token == 1)
+ token_prob = probs[vp8_coeff_band[++i]][0];
+ if (i < 16)
+ goto skip_eob;
+ return nonzero; // invalid input; blocks should end with EOB
+ }
+
+ if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
+ coeff = 1;
zero_nhood = 1;
- else
+ } else {
zero_nhood = 2;
+ if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
+ coeff = vp56_rac_get_prob(c, token_prob[4]);
+ if (coeff)
+ coeff += vp56_rac_get_prob(c, token_prob[5]);
+ coeff += 2;
+ } else {
+ // DCT_CAT*
+ if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
+ if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
+ coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
+ } else { // DCT_CAT2
+ coeff = 7;
+ coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
+ coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
+ }
+ } else { // DCT_CAT3 and up
+ int a = vp56_rac_get_prob(c, token_prob[8]);
+ int b = vp56_rac_get_prob(c, token_prob[9+a]);
+ int cat = (a<<1) + b;
+ coeff = 3 + (8<<cat);
+ coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
+ }
+ }
+ }
+
// todo: full [16] qmat? load into register?
- block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i];
- nonzero = i+1;
- offset = 0;
- }
+ block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
+ nonzero = ++i;
+ } while (i < 16);
+
return nonzero;
}
-static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
- uint8_t t_nnz[9], uint8_t l_nnz[9])
+static av_always_inline
+void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+ uint8_t t_nnz[9], uint8_t l_nnz[9])
{
LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
int i, x, y, luma_start = 0, luma_ctx = 3;
@@ -926,21 +967,22 @@ void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_
}
}
-static int check_intra_pred_mode(int mode, int mb_x, int mb_y)
+static av_always_inline
+int check_intra_pred_mode(int mode, int mb_x, int mb_y)
{
if (mode == DC_PRED8x8) {
- if (!(mb_x|mb_y))
- mode = DC_128_PRED8x8;
- else if (!mb_y)
+ if (!mb_x) {
+ mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
+ } else if (!mb_y) {
mode = LEFT_DC_PRED8x8;
- else if (!mb_x)
- mode = TOP_DC_PRED8x8;
+ }
}
return mode;
}
-static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
- uint8_t *intra4x4, int mb_x, int mb_y)
+static av_always_inline
+void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+ uint8_t *intra4x4, int mb_x, int mb_y)
{
int x, y, mode, nnz, tr;
@@ -1022,11 +1064,12 @@ static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
* @param linesize size of a single line of plane data, including padding
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
*/
-static inline void vp8_mc(VP8Context *s, int luma,
- uint8_t *dst, uint8_t *src, const VP56mv *mv,
- int x_off, int y_off, int block_w, int block_h,
- int width, int height, int linesize,
- vp8_mc_func mc_func[3][3])
+static av_always_inline
+void vp8_mc(VP8Context *s, int luma,
+ uint8_t *dst, uint8_t *src, const VP56mv *mv,
+ int x_off, int y_off, int block_w, int block_h,
+ int width, int height, int linesize,
+ vp8_mc_func mc_func[3][3])
{
if (AV_RN32A(mv)) {
static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 };
@@ -1050,11 +1093,12 @@ static inline void vp8_mc(VP8Context *s, int luma,
mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
}
-static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
- AVFrame *ref_frame, int x_off, int y_off,
- int bx_off, int by_off,
- int block_w, int block_h,
- int width, int height, VP56mv *mv)
+static av_always_inline
+void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
+ AVFrame *ref_frame, int x_off, int y_off,
+ int bx_off, int by_off,
+ int block_w, int block_h,
+ int width, int height, VP56mv *mv)
{
VP56mv uvmv = *mv;
@@ -1085,7 +1129,7 @@ static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
/* Fetch pixels for estimated mv 4 macroblocks ahead.
* Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
-static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
+static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
{
/* Don't prefetch refs that haven't been used very often this frame. */
if (s->ref_count[ref-1] > (mb_xy >> 5)) {
@@ -1103,8 +1147,9 @@ static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, i
/**
* Apply motion vectors to prediction buffer, chapter 18.
*/
-static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
- int mb_x, int mb_y)
+static av_always_inline
+void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+ int mb_x, int mb_y)
{
int x_off = mb_x << 4, y_off = mb_y << 4;
int width = 16*s->mb_width, height = 16*s->mb_height;
@@ -1187,7 +1232,7 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
}
}
-static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
+static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
{
int x, y, ch;
@@ -1238,7 +1283,7 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
}
}
-static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
+static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
{
int interior_limit, filter_level;
@@ -1278,7 +1323,7 @@ static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStren
f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
}
-static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
{
int mbedge_lim, bedge_lim, hev_thresh;
int filter_level = f->filter_level;
@@ -1347,7 +1392,7 @@ static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int
}
}
-static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
{
int mbedge_lim, bedge_lim;
int filter_level = f->filter_level;
@@ -1416,7 +1461,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
VP8Context *s = avctx->priv_data;
int ret, mb_x, mb_y, i, y, referenced;
enum AVDiscard skip_thresh;
- AVFrame *curframe = NULL;
+ AVFrame *av_uninit(curframe);
if ((ret = decode_frame_header(s, buf, buf_size)) < 0)
return ret;
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h
index 9f56ab63b..1bdac16be 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/vp8data.h
@@ -118,16 +118,6 @@ static const int8_t vp8_pred16x16_tree_mvinter[4][2] = {
{ -VP8_MVMODE_NEW, -VP8_MVMODE_SPLIT } // '1110', '1111'
};
-static const int8_t vp8_small_mvtree[7][2] = {
- { 1, 4 },
- { 2, 3 },
- { -0, -1 }, // '000', '001'
- { -2, -3 }, // '010', '011'
- { 5, 6 },
- { -4, -5 }, // '100', '101'
- { -6, -7 } // '110', '111'
-};
-
static const uint8_t vp8_mbsplits[5][16] = {
{ 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1 },
@@ -337,21 +327,6 @@ static const uint8_t vp8_coeff_band[16] =
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
};
-static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] =
-{
- { -DCT_EOB, 1 }, // '0'
- { -DCT_0, 2 }, // '10'
- { -DCT_1, 3 }, // '110'
- { 4, 6 },
- { -DCT_2, 5 }, // '11100'
- { -DCT_3, -DCT_4 }, // '111010', '111011'
- { 7, 8 },
- { -DCT_CAT1, -DCT_CAT2 }, // '111100', '111101'
- { 9, 10 },
- { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101'
- { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111'
-};
-
static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 };
static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 };
static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 };
@@ -359,10 +334,9 @@ static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 };
static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
-static const uint8_t * const vp8_dct_cat_prob[6] =
+// only used for cat3 and above; cat 1 and 2 are referenced directly
+static const uint8_t * const vp8_dct_cat_prob[] =
{
- vp8_dct_cat1_prob,
- vp8_dct_cat2_prob,
vp8_dct_cat3_prob,
vp8_dct_cat4_prob,
vp8_dct_cat5_prob,
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
index d1859080c..b158c71d2 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/idct_mmx.c
@@ -23,8 +23,8 @@
#include "libavutil/common.h"
#include "libavcodec/dsputil.h"
+#include "libavutil/x86_cpu.h"
#include "dsputil_mmx.h"
-#include "mmx.h"
#define ROW_SHIFT 11
#define COL_SHIFT 6
@@ -87,104 +87,115 @@ static inline void idct_row (int16_t * row, int offset,
static inline void mmxext_row_head (int16_t * const row, const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ __asm__ volatile(
+ "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq (%1), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+ "movq 8(%1), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
- pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+ "pshufw $0x4e, %%mm2, %%mm2 \n\t" /* mm2 = x2 x0 x6 x4 */
+ :: "r" ((row+offset)), "r" (table)
+ );
}
static inline void mmxext_row (const int16_t * const table,
const int32_t * const rounder)
{
- movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */
- pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
+ __asm__ volatile (
+ "movq 16(%0), %%mm1 \n\t" /* mm1 = -C5 -C1 C3 C1 */
+ "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
- pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
- pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */
+ "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
+ "pshufw $0x4e, %%mm6, %%mm6 \n\t" /* mm6 = x3 x1 x7 x5 */
- movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */
- pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
+ "movq 24(%0), %%mm7 \n\t" /* mm7 = -C7 C3 C7 C5 */
+ "pmaddwd %%mm5, %%mm1 \n\t" /* mm1= -C1*x5-C5*x7 C1*x1+C3*x3 */
- paddd_m2r (*rounder, mm3); /* mm3 += rounder */
- pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
+ "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
+ "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
- pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
- paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+ "pmaddwd 40(%0), %%mm2 \n\t" /* mm2= C4*x0-C2*x2 -C4*x4+C2*x6 */
+ "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
- movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+ "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
+ "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
- paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+ "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
+ "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
- paddd_m2r (*rounder, mm0); /* mm0 += rounder */
- psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+ "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
+ "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
- psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
- paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
+ "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
- paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
- psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+ "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
- paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
- movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */
+ "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
+ "movq %%mm0, %%mm4 \n\t" /* mm4 = a3 a2 + rounder */
- paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
- psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */
+ "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
+ "psubd %%mm5, %%mm4 \n\t" /* mm4 = a3-b3 a2-b2 + rounder */
+ : : "r" (table), "r" (rounder));
}
static inline void mmxext_row_tail (int16_t * const row, const int store)
{
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
- packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+ "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
+ "pshufw $0xb1, %%mm4, %%mm4 \n\t" /* mm4 = y7 y6 y5 y4 */
- /* slot */
+ /* slot */
- movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq %%mm4, 8(%0) \n\t" /* save y7 y6 y5 y4 */
+ :: "r" (row+store)
+ );
}
static inline void mmxext_row_mid (int16_t * const row, const int store,
const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+ "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm4 \n\t" /* mm4 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "packssdw %%mm3, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+ "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
+ "pshufw $0xb1, %%mm4, %%mm4\n\t" /* mm4 = y7 y6 y5 y4 */
- movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
- movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq (%3), %%mm3 \n\t" /* mm3 = -C2 -C4 C2 C4 */
+ "movq %%mm4, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3= -C4*x4-C2*x6 C4*x0+C2*x2 */
- movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
- pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+ "movq 8(%3), %%mm4 \n\t" /* mm4 = C6 C4 C6 C4 */
+ "pshufw $0x4e, %%mm2, %%mm2\n\t" /* mm2 = x2 x0 x6 x4 */
+ :: "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
+ );
}
@@ -202,119 +213,132 @@ static inline void mmxext_row_mid (int16_t * const row, const int store,
static inline void mmx_row_head (int16_t * const row, const int offset,
const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ __asm__ volatile (
+ "movq (%0), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "movq 8(%0), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq (%1), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
+ "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
- movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ "movq 8(%1), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
- movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
- punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+ "movq 16(%1), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
+ "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
+ :: "r" ((row+offset)), "r" (table)
+ );
}
static inline void mmx_row (const int16_t * const table,
const int32_t * const rounder)
{
- pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
- punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */
+ __asm__ volatile (
+ "pmaddwd %%mm2, %%mm4 \n\t" /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
+ "punpckldq %%mm5, %%mm5 \n\t" /* mm5 = x3 x1 x3 x1 */
- pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
- punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */
+ "pmaddwd 32(%0), %%mm0 \n\t" /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
+ "punpckhdq %%mm6, %%mm6 \n\t" /* mm6 = x7 x5 x7 x5 */
- movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */
- pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
+ "movq 24(%0), %%mm7 \n\t" /* mm7 = -C5 -C1 C7 C5 */
+ "pmaddwd %%mm5, %%mm1 \n\t" /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
- paddd_m2r (*rounder, mm3); /* mm3 += rounder */
- pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
+ "paddd (%1), %%mm3 \n\t" /* mm3 += rounder */
+ "pmaddwd %%mm6, %%mm7 \n\t" /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
- pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
- paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+ "pmaddwd 40(%0), %%mm2 \n\t" /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
+ "paddd %%mm4, %%mm3 \n\t" /* mm3 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
- movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+ "pmaddwd 48(%0), %%mm5 \n\t" /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
+ "movq %%mm3, %%mm4 \n\t" /* mm4 = a1 a0 + rounder */
- pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
- paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+ "pmaddwd 56(%0), %%mm6 \n\t" /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
+ "paddd %%mm7, %%mm1 \n\t" /* mm1 = b1 b0 */
- paddd_m2r (*rounder, mm0); /* mm0 += rounder */
- psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+ "paddd (%1), %%mm0 \n\t" /* mm0 += rounder */
+ "psubd %%mm1, %%mm3 \n\t" /* mm3 = a1-b1 a0-b0 + rounder */
- psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
- paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm3 \n\t" /* mm3 = y6 y7 */
+ "paddd %%mm4, %%mm1 \n\t" /* mm1 = a1+b1 a0+b0 + rounder */
- paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
- psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+ "paddd %%mm2, %%mm0 \n\t" /* mm0 = a3 a2 + rounder */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm1 \n\t" /* mm1 = y1 y0 */
- paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
- movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */
+ "paddd %%mm6, %%mm5 \n\t" /* mm5 = b3 b2 */
+ "movq %%mm0, %%mm7 \n\t" /* mm7 = a3 a2 + rounder */
- paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
- psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */
+ "paddd %%mm5, %%mm0 \n\t" /* mm0 = a3+b3 a2+b2 + rounder */
+ "psubd %%mm5, %%mm7 \n\t" /* mm7 = a3-b3 a2-b2 + rounder */
+ :: "r" (table), "r" (rounder)
+ );
}
static inline void mmx_row_tail (int16_t * const row, const int store)
{
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+ __asm__ volatile (
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
- packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */
+ "movq %%mm1, (%0) \n\t" /* save y3 y2 y1 y0 */
+ "movq %%mm7, %%mm4 \n\t" /* mm4 = y6 y7 y4 y5 */
- pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */
+ "pslld $16, %%mm7 \n\t" /* mm7 = y7 0 y5 0 */
- psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */
+ "psrld $16, %%mm4 \n\t" /* mm4 = 0 y6 0 y4 */
- por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */
+ "por %%mm4, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
- /* slot */
+ /* slot */
- movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
+ "movq %%mm7, 8(%0) \n\t" /* save y7 y6 y5 y4 */
+ :: "r" (row+store)
+ );
}
static inline void mmx_row_mid (int16_t * const row, const int store,
const int offset, const int16_t * const table)
{
- movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
- psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
- movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
- psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+ __asm__ volatile (
+ "movq (%0,%1), %%mm2 \n\t" /* mm2 = x6 x4 x2 x0 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm0 \n\t" /* mm0 = y3 y2 */
- packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
- movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+ "movq 8(%0,%1), %%mm5 \n\t" /* mm5 = x7 x5 x3 x1 */
+ "psrad $" AV_STRINGIFY(ROW_SHIFT) ", %%mm7 \n\t" /* mm7 = y4 y5 */
- packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
- movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+ "packssdw %%mm0, %%mm1 \n\t" /* mm1 = y3 y2 y1 y0 */
+ "movq %%mm5, %%mm6 \n\t" /* mm6 = x7 x5 x3 x1 */
- movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
- movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */
+ "packssdw %%mm3, %%mm7 \n\t" /* mm7 = y6 y7 y4 y5 */
+ "movq %%mm2, %%mm0 \n\t" /* mm0 = x6 x4 x2 x0 */
- punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
- psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */
+ "movq %%mm1, (%0,%2) \n\t" /* save y3 y2 y1 y0 */
+ "movq %%mm7, %%mm1 \n\t" /* mm1 = y6 y7 y4 y5 */
- movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
- pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */
+ "punpckldq %%mm0, %%mm0 \n\t" /* mm0 = x2 x0 x2 x0 */
+ "psrld $16, %%mm7 \n\t" /* mm7 = 0 y6 0 y4 */
- movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
- por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */
+ "movq (%3), %%mm3 \n\t" /* mm3 = C6 C4 C2 C4 */
+ "pslld $16, %%mm1 \n\t" /* mm1 = y7 0 y5 0 */
- movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
- punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+ "movq 8(%3), %%mm4 \n\t" /* mm4 = -C2 -C4 C6 C4 */
+ "por %%mm1, %%mm7 \n\t" /* mm7 = y7 y6 y5 y4 */
- movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
- pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ "movq 16(%3), %%mm1 \n\t" /* mm1 = -C7 C3 C3 C1 */
+ "punpckhdq %%mm2, %%mm2 \n\t" /* mm2 = x6 x4 x6 x4 */
+
+ "movq %%mm7, 8(%0,%2) \n\t" /* save y7 y6 y5 y4 */
+ "pmaddwd %%mm0, %%mm3 \n\t" /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+ : : "r" (row), "r" ((x86_reg) (2*offset)), "r" ((x86_reg) (2*store)), "r" (table)
+ );
}
@@ -398,140 +422,145 @@ static inline void idct_col (int16_t * const col, const int offset)
#define T3 43790
#define C4 23170
- DECLARE_ALIGNED(8, static const short, t1_vector)[] = {T1,T1,T1,T1};
- DECLARE_ALIGNED(8, static const short, t2_vector)[] = {T2,T2,T2,T2};
- DECLARE_ALIGNED(8, static const short, t3_vector)[] = {T3,T3,T3,T3};
- DECLARE_ALIGNED(8, static const short, c4_vector)[] = {C4,C4,C4,C4};
+ DECLARE_ALIGNED(8, static const short, t1_vector)[] = {
+ T1,T1,T1,T1,
+ T2,T2,T2,T2,
+ T3,T3,T3,T3,
+ C4,C4,C4,C4
+ };
/* column code adapted from Peter Gubanov */
/* http://www.elecard.com/peter/idct.shtml */
- movq_m2r (*t1_vector, mm0); /* mm0 = T1 */
+ __asm__ volatile (
+ "movq (%0), %%mm0 \n\t" /* mm0 = T1 */
- movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */
- movq_r2r (mm0, mm2); /* mm2 = T1 */
+ "movq 2*8(%1), %%mm1 \n\t" /* mm1 = x1 */
+ "movq %%mm0, %%mm2 \n\t" /* mm2 = T1 */
- movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */
- pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */
+ "movq 7*2*8(%1), %%mm4 \n\t" /* mm4 = x7 */
+ "pmulhw %%mm1, %%mm0 \n\t" /* mm0 = T1*x1 */
- movq_m2r (*t3_vector, mm5); /* mm5 = T3 */
- pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */
+ "movq 16(%0), %%mm5 \n\t" /* mm5 = T3 */
+ "pmulhw %%mm4, %%mm2 \n\t" /* mm2 = T1*x7 */
- movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */
- movq_r2r (mm5, mm7); /* mm7 = T3-1 */
+ "movq 2*5*8(%1), %%mm6 \n\t" /* mm6 = x5 */
+ "movq %%mm5, %%mm7 \n\t" /* mm7 = T3-1 */
- movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */
- psubsw_r2r (mm4, mm0); /* mm0 = v17 */
+ "movq 3*8*2(%1), %%mm3 \n\t" /* mm3 = x3 */
+ "psubsw %%mm4, %%mm0 \n\t" /* mm0 = v17 */
- movq_m2r (*t2_vector, mm4); /* mm4 = T2 */
- pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */
+ "movq 8(%0), %%mm4 \n\t" /* mm4 = T2 */
+ "pmulhw %%mm3, %%mm5 \n\t" /* mm5 = (T3-1)*x3 */
- paddsw_r2r (mm2, mm1); /* mm1 = u17 */
- pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */
+ "paddsw %%mm2, %%mm1 \n\t" /* mm1 = u17 */
+ "pmulhw %%mm6, %%mm7 \n\t" /* mm7 = (T3-1)*x5 */
- /* slot */
+ /* slot */
- movq_r2r (mm4, mm2); /* mm2 = T2 */
- paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */
+ "movq %%mm4, %%mm2 \n\t" /* mm2 = T2 */
+ "paddsw %%mm3, %%mm5 \n\t" /* mm5 = T3*x3 */
- pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
- paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */
+ "pmulhw 2*8*2(%1), %%mm4 \n\t" /* mm4 = T2*x2 */
+ "paddsw %%mm6, %%mm7 \n\t" /* mm7 = T3*x5 */
- psubsw_r2r (mm6, mm5); /* mm5 = v35 */
- paddsw_r2r (mm3, mm7); /* mm7 = u35 */
+ "psubsw %%mm6, %%mm5 \n\t" /* mm5 = v35 */
+ "paddsw %%mm3, %%mm7 \n\t" /* mm7 = u35 */
- movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */
- movq_r2r (mm0, mm6); /* mm6 = v17 */
+ "movq 6*8*2(%1), %%mm3 \n\t" /* mm3 = x6 */
+ "movq %%mm0, %%mm6 \n\t" /* mm6 = v17 */
- pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */
- psubsw_r2r (mm5, mm0); /* mm0 = b3 */
+ "pmulhw %%mm3, %%mm2 \n\t" /* mm2 = T2*x6 */
+ "psubsw %%mm5, %%mm0 \n\t" /* mm0 = b3 */
- psubsw_r2r (mm3, mm4); /* mm4 = v26 */
- paddsw_r2r (mm6, mm5); /* mm5 = v12 */
+ "psubsw %%mm3, %%mm4 \n\t" /* mm4 = v26 */
+ "paddsw %%mm6, %%mm5 \n\t" /* mm5 = v12 */
- movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */
- movq_r2r (mm1, mm6); /* mm6 = u17 */
+ "movq %%mm0, 3*8*2(%1)\n\t" /* save b3 in scratch0 */
+ "movq %%mm1, %%mm6 \n\t" /* mm6 = u17 */
- paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
- paddsw_r2r (mm7, mm6); /* mm6 = b0 */
+ "paddsw 2*8*2(%1), %%mm2 \n\t" /* mm2 = u26 */
+ "paddsw %%mm7, %%mm6 \n\t" /* mm6 = b0 */
- psubsw_r2r (mm7, mm1); /* mm1 = u12 */
- movq_r2r (mm1, mm7); /* mm7 = u12 */
+ "psubsw %%mm7, %%mm1 \n\t" /* mm1 = u12 */
+ "movq %%mm1, %%mm7 \n\t" /* mm7 = u12 */
- movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */
- paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */
+ "movq 0*8(%1), %%mm3 \n\t" /* mm3 = x0 */
+ "paddsw %%mm5, %%mm1 \n\t" /* mm1 = u12+v12 */
- movq_m2r (*c4_vector, mm0); /* mm0 = C4/2 */
- psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */
+ "movq 24(%0), %%mm0 \n\t" /* mm0 = C4/2 */
+ "psubsw %%mm5, %%mm7 \n\t" /* mm7 = u12-v12 */
- movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */
- pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */
+ "movq %%mm6, 5*8*2(%1)\n\t" /* save b0 in scratch1 */
+ "pmulhw %%mm0, %%mm1 \n\t" /* mm1 = b1/2 */
- movq_r2r (mm4, mm6); /* mm6 = v26 */
- pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */
+ "movq %%mm4, %%mm6 \n\t" /* mm6 = v26 */
+ "pmulhw %%mm0, %%mm7 \n\t" /* mm7 = b2/2 */
- movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */
- movq_r2r (mm3, mm0); /* mm0 = x0 */
+ "movq 4*8*2(%1), %%mm5 \n\t" /* mm5 = x4 */
+ "movq %%mm3, %%mm0 \n\t" /* mm0 = x0 */
- psubsw_r2r (mm5, mm3); /* mm3 = v04 */
- paddsw_r2r (mm5, mm0); /* mm0 = u04 */
+ "psubsw %%mm5, %%mm3 \n\t" /* mm3 = v04 */
+ "paddsw %%mm5, %%mm0 \n\t" /* mm0 = u04 */
- paddsw_r2r (mm3, mm4); /* mm4 = a1 */
- movq_r2r (mm0, mm5); /* mm5 = u04 */
+ "paddsw %%mm3, %%mm4 \n\t" /* mm4 = a1 */
+ "movq %%mm0, %%mm5 \n\t" /* mm5 = u04 */
- psubsw_r2r (mm6, mm3); /* mm3 = a2 */
- paddsw_r2r (mm2, mm5); /* mm5 = a0 */
+ "psubsw %%mm6, %%mm3 \n\t" /* mm3 = a2 */
+ "paddsw %%mm2, %%mm5 \n\t" /* mm5 = a0 */
- paddsw_r2r (mm1, mm1); /* mm1 = b1 */
- psubsw_r2r (mm2, mm0); /* mm0 = a3 */
+ "paddsw %%mm1, %%mm1 \n\t" /* mm1 = b1 */
+ "psubsw %%mm2, %%mm0 \n\t" /* mm0 = a3 */
- paddsw_r2r (mm7, mm7); /* mm7 = b2 */
- movq_r2r (mm3, mm2); /* mm2 = a2 */
+ "paddsw %%mm7, %%mm7 \n\t" /* mm7 = b2 */
+ "movq %%mm3, %%mm2 \n\t" /* mm2 = a2 */
- movq_r2r (mm4, mm6); /* mm6 = a1 */
- paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */
+ "movq %%mm4, %%mm6 \n\t" /* mm6 = a1 */
+ "paddsw %%mm7, %%mm3 \n\t" /* mm3 = a2+b2 */
- psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */
- paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y2 */
+ "paddsw %%mm1, %%mm4\n\t" /* mm4 = a1+b1 */
- psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */
- psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y1 */
+ "psubsw %%mm1, %%mm6 \n\t" /* mm6 = a1-b1 */
- movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */
- psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */
+ "movq 5*8*2(%1), %%mm1 \n\t" /* mm1 = b0 */
+ "psubsw %%mm7, %%mm2 \n\t" /* mm2 = a2-b2 */
- psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */
- movq_r2r (mm5, mm7); /* mm7 = a0 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm6\n\t" /* mm6 = y6 */
+ "movq %%mm5, %%mm7 \n\t" /* mm7 = a0 */
- movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */
- psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */
+ "movq %%mm4, 1*8*2(%1)\n\t" /* save y1 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm2\n\t" /* mm2 = y5 */
- movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */
- paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */
+ "movq %%mm3, 2*8*2(%1)\n\t" /* save y2 */
+ "paddsw %%mm1, %%mm5 \n\t" /* mm5 = a0+b0 */
- movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */
- psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */
+ "movq 3*8*2(%1), %%mm4 \n\t" /* mm4 = b3 */
+ "psubsw %%mm1, %%mm7 \n\t" /* mm7 = a0-b0 */
- psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */
- movq_r2r (mm0, mm3); /* mm3 = a3 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm5\n\t" /* mm5 = y0 */
+ "movq %%mm0, %%mm3 \n\t" /* mm3 = a3 */
- movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */
- psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */
+ "movq %%mm2, 5*8*2(%1)\n\t" /* save y5 */
+ "psubsw %%mm4, %%mm3 \n\t" /* mm3 = a3-b3 */
- psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */
- paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm7\n\t" /* mm7 = y7 */
+ "paddsw %%mm0, %%mm4 \n\t" /* mm4 = a3+b3 */
- movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */
- psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */
+ "movq %%mm5, 0*8*2(%1)\n\t" /* save y0 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm3\n\t" /* mm3 = y4 */
- movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */
- psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */
+ "movq %%mm6, 6*8*2(%1)\n\t" /* save y6 */
+ "psraw $" AV_STRINGIFY(COL_SHIFT) ", %%mm4\n\t" /* mm4 = y3 */
- movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */
+ "movq %%mm7, 7*8*2(%1)\n\t" /* save y7 */
- movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */
+ "movq %%mm3, 4*8*2(%1)\n\t" /* save y4 */
- movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */
+ "movq %%mm4, 3*8*2(%1)\n\t" /* save y3 */
+ :: "r" (t1_vector), "r" (col+offset)
+ );
#undef T1
#undef T2
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h
deleted file mode 100644
index ef064e3e3..000000000
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/mmx.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * mmx.h
- * Copyright (C) 1997-2001 H. Dietz and R. Fisher
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef AVCODEC_X86_MMX_H
-#define AVCODEC_X86_MMX_H
-
-#warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
-
-
-#define mmx_i2r(op,imm,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_m2r(op,mem,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem))
-
-#define mmx_r2m(op,reg,mem) \
- __asm__ volatile (#op " %%" #reg ", %0" \
- : "=m" (mem) \
- : /* nothing */ )
-
-#define mmx_r2r(op,regs,regd) \
- __asm__ volatile (#op " %" #regs ", %" #regd)
-
-
-#define emms() __asm__ volatile ("emms")
-
-#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
-#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
-#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
-
-#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
-#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
-#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
-
-#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
-#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
-#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
-#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
-
-#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
-#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
-
-#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
-#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
-#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
-#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
-#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
-#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
-
-#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
-#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
-#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
-#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
-
-#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
-#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
-#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
-#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
-
-#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
-#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
-
-#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
-#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
-
-#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
-#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
-#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
-#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
-#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
-#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
-
-#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
-#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
-#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
-#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
-#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
-#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
-
-#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
-#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
-
-#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
-#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
-
-#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
-#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
-
-#define por_m2r(var,reg) mmx_m2r (por, var, reg)
-#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
-
-#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
-#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
-#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
-#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
-#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
-#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
-#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
-#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
-#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
-
-#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
-#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
-#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
-#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
-#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
-#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
-
-#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
-#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
-#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
-#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
-#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
-#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
-#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
-#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
-#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
-
-#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
-#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
-#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
-#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
-#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
-#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
-
-#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
-#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
-#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
-#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
-
-#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
-#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
-#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
-#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
-
-#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
-#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
-#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
-#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
-#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
-#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
-
-#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
-#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
-#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
-#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
-#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
-#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
-
-#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
-#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
-
-
-/* 3DNOW extensions */
-
-#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
-#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
-
-
-/* AMD MMX extensions - also available in intel SSE */
-
-
-#define mmx_m2ri(op,mem,reg,imm) \
- __asm__ volatile (#op " %1, %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem), "i" (imm))
-#define mmx_r2ri(op,regs,regd,imm) \
- __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_fetch(mem,hint) \
- __asm__ volatile ("prefetch" #hint " %0" \
- : /* nothing */ \
- : "m" (mem))
-
-
-#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
-
-#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
-
-#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
-#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
-#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
-#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
-
-#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
-
-#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
-
-#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
-#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
-
-#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
-#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
-
-#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
-#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
-
-#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
-#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
-
-#define pmovmskb(mmreg,reg) \
- __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
-
-#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
-#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
-
-#define prefetcht0(mem) mmx_fetch (mem, t0)
-#define prefetcht1(mem) mmx_fetch (mem, t1)
-#define prefetcht2(mem) mmx_fetch (mem, t2)
-#define prefetchnta(mem) mmx_fetch (mem, nta)
-
-#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
-#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
-
-#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
-#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
-
-#define sfence() __asm__ volatile ("sfence\n\t")
-
-/* SSE2 */
-#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
-#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
-#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
-#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
-
-#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
-
-#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
-#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
-#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
-#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
-#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
-#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
-
-#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
-
-#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
-#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
-
-#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
-#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
-
-
-#endif /* AVCODEC_X86_MMX_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h
new file mode 100644
index 000000000..95f96e365
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp56_arith.h
@@ -0,0 +1,54 @@
+/**
+ * VP5 and VP6 compatible video decoder (arith decoder)
+ *
+ * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2010 Eli Friedman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP56_ARITH_H
+#define AVCODEC_X86_VP56_ARITH_H
+
+#if HAVE_FAST_CMOV
+#define vp56_rac_get_prob vp56_rac_get_prob
+static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+ unsigned int code_word = vp56_rac_renorm(c);
+ unsigned int high = c->high;
+ unsigned int low = 1 + (((high - 1) * prob) >> 8);
+ unsigned int low_shift = low << 8;
+ int bit = 0;
+
+ __asm__(
+ "subl %4, %1 \n\t"
+ "subl %3, %2 \n\t"
+ "leal (%2, %3), %3 \n\t"
+ "setae %b0 \n\t"
+ "cmovb %4, %1 \n\t"
+ "cmovb %3, %2 \n\t"
+ : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift)
+ : "r"(low)
+ );
+
+ c->high = high;
+ c->code_word = code_word;
+ return bit;
+}
+#endif
+
+#endif /* AVCODEC_X86_VP56_ARITH_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
index e06da5e42..dd7dc696e 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp-init.c
@@ -350,7 +350,6 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#endif
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
- c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
@@ -362,6 +361,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
if (mm_flags & FF_MM_SSE2) {
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
+
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
@@ -396,6 +397,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
if (mm_flags & FF_MM_SSE4) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
+ c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4;
}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
index 4aa901e27..4f430d80c 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcodec/x86/vp8dsp.asm
@@ -145,6 +145,10 @@ filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11
pw_20091: times 4 dw 20091
pw_17734: times 4 dw 17734
+pb_27_63: times 8 db 27, 63
+pb_18_63: times 8 db 18, 63
+pb_9_63: times 8 db 9, 63
+
cextern pb_1
cextern pw_3
cextern pb_3
@@ -438,48 +442,43 @@ cglobal put_vp8_epel4_h6_mmxext, 6, 6
jg .nextrow
REP_RET
-; 4x4 block, H-only 4-tap filter
INIT_XMM
-cglobal put_vp8_epel8_h4_sse2, 6, 6, 8
- shl r5d, 4
+cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
+ shl r5d, 5
%ifdef PIC
- lea r11, [fourtap_filter_hw_m]
+ lea r11, [fourtap_filter_v_m]
%endif
- mova m5, [fourtap_filter_hw+r5-16] ; set up 4tap filter in words
- mova m6, [fourtap_filter_hw+r5]
+ lea r5, [fourtap_filter_v+r5-32]
pxor m7, m7
-
+ mova m4, [pw_64]
+ mova m5, [r5+ 0]
+ mova m6, [r5+16]
+%ifdef m8
+ mova m8, [r5+32]
+ mova m9, [r5+48]
+%endif
.nextrow
- movh m0, [r2-1]
- punpcklbw m0, m7 ; ABCDEFGH
- mova m1, m0
- mova m2, m0
- mova m3, m0
- psrldq m1, 2 ; BCDEFGH
- psrldq m2, 4 ; CDEFGH
- psrldq m3, 6 ; DEFGH
- punpcklwd m0, m1 ; ABBCCDDE
- punpcklwd m2, m3 ; CDDEEFFG
- pmaddwd m0, m5
- pmaddwd m2, m6
- paddd m0, m2
-
- movh m1, [r2+3]
- punpcklbw m1, m7 ; ABCDEFGH
- mova m2, m1
- mova m3, m1
- mova m4, m1
- psrldq m2, 2 ; BCDEFGH
- psrldq m3, 4 ; CDEFGH
- psrldq m4, 6 ; DEFGH
- punpcklwd m1, m2 ; ABBCCDDE
- punpcklwd m3, m4 ; CDDEEFFG
- pmaddwd m1, m5
- pmaddwd m3, m6
- paddd m1, m3
-
- packssdw m0, m1
- paddsw m0, [pw_64]
+ movq m0, [r2-1]
+ movq m1, [r2-0]
+ movq m2, [r2+1]
+ movq m3, [r2+2]
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ punpcklbw m2, m7
+ punpcklbw m3, m7
+ pmullw m0, m5
+ pmullw m1, m6
+%ifdef m8
+ pmullw m2, m8
+ pmullw m3, m9
+%else
+ pmullw m2, [r5+32]
+ pmullw m3, [r5+48]
+%endif
+ paddsw m0, m1
+ paddsw m2, m3
+ paddsw m0, m2
+ paddsw m0, m4
psraw m0, 7
packuswb m0, m7
movh [r0], m0 ; store
@@ -491,62 +490,57 @@ cglobal put_vp8_epel8_h4_sse2, 6, 6, 8
jg .nextrow
REP_RET
-cglobal put_vp8_epel8_h6_sse2, 6, 6, 8
+cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
lea r5d, [r5*3]
+ shl r5d, 4
%ifdef PIC
- lea r11, [sixtap_filter_hw_m]
+ lea r11, [sixtap_filter_v_m]
%endif
- lea r5, [sixtap_filter_hw+r5*8]
+ lea r5, [sixtap_filter_v+r5-96]
pxor m7, m7
-
+ mova m6, [pw_64]
+%ifdef m8
+ mova m8, [r5+ 0]
+ mova m9, [r5+16]
+ mova m10, [r5+32]
+ mova m11, [r5+48]
+ mova m12, [r5+64]
+ mova m13, [r5+80]
+%endif
.nextrow
- movu m0, [r2-2]
- mova m6, m0
- mova m4, m0
- punpcklbw m0, m7 ; ABCDEFGHI
- mova m1, m0
- mova m2, m0
- mova m3, m0
- psrldq m1, 2 ; BCDEFGH
- psrldq m2, 4 ; CDEFGH
- psrldq m3, 6 ; DEFGH
- psrldq m4, 4
- punpcklbw m4, m7 ; EFGH
- mova m5, m4
- psrldq m5, 2 ; FGH
- punpcklwd m0, m1 ; ABBCCDDE
- punpcklwd m2, m3 ; CDDEEFFG
- punpcklwd m4, m5 ; EFFGGHHI
- pmaddwd m0, [r5-48]
- pmaddwd m2, [r5-32]
- pmaddwd m4, [r5-16]
- paddd m0, m2
- paddd m0, m4
-
- psrldq m6, 4
- mova m4, m6
- punpcklbw m6, m7 ; ABCDEFGHI
- mova m1, m6
- mova m2, m6
- mova m3, m6
- psrldq m1, 2 ; BCDEFGH
- psrldq m2, 4 ; CDEFGH
- psrldq m3, 6 ; DEFGH
- psrldq m4, 4
- punpcklbw m4, m7 ; EFGH
- mova m5, m4
- psrldq m5, 2 ; FGH
- punpcklwd m6, m1 ; ABBCCDDE
- punpcklwd m2, m3 ; CDDEEFFG
- punpcklwd m4, m5 ; EFFGGHHI
- pmaddwd m6, [r5-48]
- pmaddwd m2, [r5-32]
- pmaddwd m4, [r5-16]
- paddd m6, m2
- paddd m6, m4
-
- packssdw m0, m6
- paddsw m0, [pw_64]
+ movq m0, [r2-2]
+ movq m1, [r2-1]
+ movq m2, [r2-0]
+ movq m3, [r2+1]
+ movq m4, [r2+2]
+ movq m5, [r2+3]
+ punpcklbw m0, m7
+ punpcklbw m1, m7
+ punpcklbw m2, m7
+ punpcklbw m3, m7
+ punpcklbw m4, m7
+ punpcklbw m5, m7
+%ifdef m8
+ pmullw m0, m8
+ pmullw m1, m9
+ pmullw m2, m10
+ pmullw m3, m11
+ pmullw m4, m12
+ pmullw m5, m13
+%else
+ pmullw m0, [r5+ 0]
+ pmullw m1, [r5+16]
+ pmullw m2, [r5+32]
+ pmullw m3, [r5+48]
+ pmullw m4, [r5+64]
+ pmullw m5, [r5+80]
+%endif
+ paddsw m1, m4
+ paddsw m0, m5
+ paddsw m1, m2
+ paddsw m0, m3
+ paddsw m0, m1
+ paddsw m0, m6
psraw m0, 7
packuswb m0, m7
movh [r0], m0 ; store
@@ -1360,44 +1354,123 @@ cglobal vp8_luma_dc_wht_mmx, 2,3
movd [%7+%9*2], m%4
%endmacro
-%macro SPLATB_REG 3-4
+; write 4 or 8 words in the mmx/xmm registers as 8 lines
+; 1 and 2 are the registers to write, this can be the same (for SSE2)
+; for pre-SSE4:
+; 3 is a general-purpose register that we will clobber
+; for SSE4:
+; 3 is a pointer to the destination's 5th line
+; 4 is a pointer to the destination's 4th line
+; 5/6 is -stride and +stride
+%macro WRITE_2x4W 6
+ movd %3, %1
+ punpckhdq %1, %1
+ mov [%4+%5*4], %3w
+ shr %3, 16
+ add %4, %6
+ mov [%4+%5*4], %3w
+
+ movd %3, %1
+ add %4, %5
+ mov [%4+%5*2], %3w
+ shr %3, 16
+ mov [%4+%5 ], %3w
+
+ movd %3, %2
+ punpckhdq %2, %2
+ mov [%4 ], %3w
+ shr %3, 16
+ mov [%4+%6 ], %3w
+
+ movd %3, %2
+ add %4, %6
+ mov [%4+%6 ], %3w
+ shr %3, 16
+ mov [%4+%6*2], %3w
+ add %4, %5
+%endmacro
+
+%macro WRITE_8W_SSE2 5
+ movd %2, %1
+ psrldq %1, 4
+ mov [%3+%4*4], %2w
+ shr %2, 16
+ add %3, %5
+ mov [%3+%4*4], %2w
+
+ movd %2, %1
+ psrldq %1, 4
+ add %3, %4
+ mov [%3+%4*2], %2w
+ shr %2, 16
+ mov [%3+%4 ], %2w
+
+ movd %2, %1
+ psrldq %1, 4
+ mov [%3 ], %2w
+ shr %2, 16
+ mov [%3+%5 ], %2w
+
+ movd %2, %1
+ add %3, %5
+ mov [%3+%5 ], %2w
+ shr %2, 16
+ mov [%3+%5*2], %2w
+%endmacro
+
+%macro WRITE_8W_SSE4 5
+ pextrw [%3+%4*4], %1, 0
+ pextrw [%2+%4*4], %1, 1
+ pextrw [%3+%4*2], %1, 2
+ pextrw [%3+%4 ], %1, 3
+ pextrw [%3 ], %1, 4
+ pextrw [%2 ], %1, 5
+ pextrw [%2+%5 ], %1, 6
+ pextrw [%2+%5*2], %1, 7
+%endmacro
+
+%macro SPLATB_REG_MMX 2-3
movd %1, %2
-%ifidn %3, ssse3
- pshufb %1, %4
-%else
punpcklbw %1, %1
-%if mmsize == 16 ; sse2
- pshuflw %1, %1, 0x0
- punpcklqdq %1, %1
-%elifidn %3, mmx
punpcklwd %1, %1
punpckldq %1, %1
-%else ; mmxext
+%endmacro
+
+%macro SPLATB_REG_MMXEXT 2-3
+ movd %1, %2
+ punpcklbw %1, %1
pshufw %1, %1, 0x0
-%endif
-%endif
+%endmacro
+
+%macro SPLATB_REG_SSE2 2-3
+ movd %1, %2
+ punpcklbw %1, %1
+ pshuflw %1, %1, 0x0
+ punpcklqdq %1, %1
+%endmacro
+
+%macro SPLATB_REG_SSSE3 3
+ movd %1, %2
+ pshufb %1, %3
%endmacro
%macro SIMPLE_LOOPFILTER 3
cglobal vp8_%2_loop_filter_simple_%1, 3, %3
-%ifidn %2, h
- mov r5, rsp ; backup stack pointer
- and rsp, ~(mmsize-1) ; align stack
-%endif
%if mmsize == 8 ; mmx/mmxext
mov r3, 2
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2
+%if mmsize == 16
pxor m0, m0
%endif
- SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register
+%endif
+ SPLATB_REG m7, r2, m0 ; splat "flim" into register
; set up indexes to address 4 rows
mov r2, r1
neg r1
%ifidn %2, h
lea r0, [r0+4*r2-2]
- sub rsp, mmsize*2 ; (aligned) storage space for saving p1/q1
%endif
%if mmsize == 8 ; mmx / mmxext
@@ -1418,9 +1491,6 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, r0, r4, r1, r2, r3
%endif
TRANSPOSE4x4W 0, 1, 2, 3, 4
-
- mova [rsp], m0 ; store p1
- mova [rsp+mmsize], m3 ; store q1
%endif
; simple_limit
@@ -1491,17 +1561,21 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
mova [r0], m4
mova [r0+r1], m6
%else ; h
- mova m0, [rsp] ; p1
- SWAP 2, 4 ; p0
- SWAP 1, 6 ; q0
- mova m3, [rsp+mmsize] ; q1
+ inc r0
+ SBUTTERFLY bw, 6, 4, 0
- TRANSPOSE4x4B 0, 1, 2, 3, 4
%if mmsize == 16 ; sse2
- add r3, r1 ; change from r4*8*stride to r0+8*stride
- WRITE_4x4D 0, 1, 2, 3, r0, r4, r3, r1, r2, 16
+%ifidn %1, sse4
+ inc r4
+%endif
+ WRITE_8W m6, r4, r0, r1, r2
+ lea r4, [r3+r1+1]
+%ifidn %1, sse4
+ inc r3
+%endif
+ WRITE_8W m4, r3, r4, r1, r2
%else ; mmx/mmxext
- WRITE_4x2D 0, 1, 2, 3, r0, r4, r1, r2
+ WRITE_2x4W m6, m4, r4, r0, r1, r2
%endif
%endif
@@ -1510,34 +1584,33 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
%ifidn %2, v
add r0, 8 ; advance 8 cols = pixels
%else ; h
- lea r0, [r0+r2*8] ; advance 8 rows = lines
+ lea r0, [r0+r2*8-1] ; advance 8 rows = lines
%endif
dec r3
jg .next8px
-%ifidn %2, v
REP_RET
-%else ; h
- mov rsp, r5 ; restore stack pointer
- RET
-%endif
%else ; sse2
-%ifidn %2, h
- mov rsp, r5 ; restore stack pointer
-%endif
RET
%endif
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
SIMPLE_LOOPFILTER mmx, v, 4
-SIMPLE_LOOPFILTER mmx, h, 6
+SIMPLE_LOOPFILTER mmx, h, 5
+%define SPLATB_REG SPLATB_REG_MMXEXT
SIMPLE_LOOPFILTER mmxext, v, 4
-SIMPLE_LOOPFILTER mmxext, h, 6
+SIMPLE_LOOPFILTER mmxext, h, 5
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
+%define WRITE_8W WRITE_8W_SSE2
SIMPLE_LOOPFILTER sse2, v, 3
-SIMPLE_LOOPFILTER sse2, h, 6
+SIMPLE_LOOPFILTER sse2, h, 5
+%define SPLATB_REG SPLATB_REG_SSSE3
SIMPLE_LOOPFILTER ssse3, v, 3
-SIMPLE_LOOPFILTER ssse3, h, 6
+SIMPLE_LOOPFILTER ssse3, h, 5
+%define WRITE_8W WRITE_8W_SSE4
+SIMPLE_LOOPFILTER sse4, h, 5
;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
@@ -1573,15 +1646,17 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2
+%if mmsize == 16
pxor m7, m7
%endif
+%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
@@ -1614,9 +1689,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%define q0backup m8
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2028,17 +2103,20 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
INNER_LOOPFILTER mmx, v, 6, 16, 0
INNER_LOOPFILTER mmx, h, 6, 16, 0
-INNER_LOOPFILTER mmxext, v, 6, 16, 0
-INNER_LOOPFILTER mmxext, h, 6, 16, 0
-
INNER_LOOPFILTER mmx, v, 6, 8, 0
INNER_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+INNER_LOOPFILTER mmxext, v, 6, 16, 0
+INNER_LOOPFILTER mmxext, h, 6, 16, 0
INNER_LOOPFILTER mmxext, v, 6, 8, 0
INNER_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
INNER_LOOPFILTER sse2, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER sse2, h, 5, 16, 13
@@ -2048,6 +2126,7 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13
INNER_LOOPFILTER sse2, v, 6, 8, 13
INNER_LOOPFILTER sse2, h, 6, 8, 13
+%define SPLATB_REG SPLATB_REG_SSSE3
INNER_LOOPFILTER ssse3, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER ssse3, h, 5, 16, 13
@@ -2062,67 +2141,6 @@ INNER_LOOPFILTER ssse3, h, 6, 8, 13
; int flimE, int flimI, int hev_thr);
;-----------------------------------------------------------------------------
-; write 4 or 8 words in the mmx/xmm registers as 8 lines
-; 1 and 2 are the registers to write, this can be the same (for SSE2)
-; for pre-SSE4:
-; 3 is a general-purpose register that we will clobber
-; for SSE4:
-; 3 is a pointer to the destination's 5th line
-; 4 is a pointer to the destination's 4th line
-; 5/6 is -stride and +stride
-; 7 is optimization string
-%macro WRITE_8W 7
-%ifidn %7, sse4
- pextrw [%4+%5*4], %1, 0
- pextrw [%3+%5*4], %1, 1
- pextrw [%4+%5*2], %1, 2
- pextrw [%4+%5 ], %1, 3
- pextrw [%4 ], %1, 4
- pextrw [%3 ], %1, 5
- pextrw [%3+%6 ], %1, 6
- pextrw [%3+%6*2], %1, 7
-%else
- movd %3, %1
-%if mmsize == 8
- punpckhdq %1, %1
-%else
- psrldq %1, 4
-%endif
- mov [%4+%5*4], %3w
- shr %3, 16
- add %4, %6
- mov [%4+%5*4], %3w
-
- movd %3, %1
-%if mmsize == 16
- psrldq %1, 4
-%endif
- add %4, %5
- mov [%4+%5*2], %3w
- shr %3, 16
- mov [%4+%5 ], %3w
-
- movd %3, %2
-%if mmsize == 8
- punpckhdq %2, %2
-%else
- psrldq %2, 4
-%endif
- mov [%4 ], %3w
- shr %3, 16
- mov [%4+%6 ], %3w
-
- movd %3, %2
- add %4, %6
- mov [%4+%6 ], %3w
- shr %3, 16
- mov [%4+%6*2], %3w
-%if mmsize == 8
- add %4, %5
-%endif
-%endif
-%endmacro
-
%macro MBEDGE_LOOPFILTER 5
%if %4 == 8 ; chroma
cglobal vp8_%2_loop_filter8uv_mbedge_%1, 6, %3, %5
@@ -2152,24 +2170,35 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%define ssse3_or_higher 0
+%ifnidn %1, sse2
+%if mmsize == 16
+%define ssse3_or_higher 1
+%endif
+%endif
+
+%if ssse3_or_higher
pxor m7, m7
%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
and rsp, ~(mmsize-1) ; align stack
+%if mmsize == 16
+ sub rsp, mmsize * 7
+%else
sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr
; [3]=hev() result
; [4]=filter tmp result
; [5]/[6] = p2/q2 backup
; [7]=lim_res sign result
+%endif
%define flim_E [rsp]
%define flim_I [rsp+mmsize]
@@ -2180,7 +2209,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define q0backup [rsp+mmsize*4]
%define p2backup [rsp+mmsize*5]
%define q2backup [rsp+mmsize*6]
+%if mmsize == 16
+%define lim_sign [rsp]
+%else
%define lim_sign [rsp+mmsize*7]
+%endif
mova flim_E, m0
mova flim_I, m1
@@ -2197,12 +2230,12 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%define q0backup m8
%define p2backup m13
%define q2backup m14
-%define lim_sign m15
+%define lim_sign m9
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
@@ -2543,7 +2576,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
paddusb m4, m1 ; q0-f1
; filter_mbedge (m2-m5 = p1-q1; lim_res carries w)
+%if ssse3_or_higher
+ mova m7, [pb_1]
+%else
mova m7, [pw_63]
+%endif
%ifdef m8
SWAP 1, 8
%else
@@ -2552,15 +2589,40 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
pxor m0, m0
mova m6, m1
pcmpgtb m0, m1 ; which are negative
+%if ssse3_or_higher
+ punpcklbw m6, m7 ; interleave with "1" for rounding
+ punpckhbw m1, m7
+%else
punpcklbw m6, m0 ; signed byte->word
punpckhbw m1, m0
+%endif
mova lim_sign, m0
+%if ssse3_or_higher
+ mova m7, [pb_27_63]
+%ifndef m8
+ mova lim_res, m1
+%endif
+%ifdef m10
+ SWAP 0, 10 ; don't lose lim_sign copy
+%endif
+ mova m0, m7
+ pmaddubsw m7, m6
+ SWAP 6, 7
+ pmaddubsw m0, m1
+ SWAP 1, 0
+%ifdef m10
+ SWAP 0, 10
+%else
+ mova m0, lim_sign
+%endif
+%else
mova mask_res, m6 ; backup for later in filter
mova lim_res, m1
pmullw m6, [pw_27]
pmullw m1, [pw_27]
paddw m6, m7
paddw m1, m7
+%endif
psraw m6, 7
psraw m1, 7
packsswb m6, m1 ; a0
@@ -2568,18 +2630,39 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
psubb m1, m6
pand m1, m0 ; -a0
pandn m0, m6 ; +a0
+%if ssse3_or_higher
+ mova m6, [pb_18_63] ; pipelining
+%endif
psubusb m3, m1
paddusb m4, m1
paddusb m3, m0 ; p0+a0
psubusb m4, m0 ; q0-a0
- mova m6, mask_res
+%if ssse3_or_higher
+ SWAP 6, 7
+%ifdef m10
+ SWAP 1, 10
+%else
mova m1, lim_res
+%endif
+ mova m0, m7
+ pmaddubsw m7, m6
+ SWAP 6, 7
+ pmaddubsw m0, m1
+ SWAP 1, 0
+%ifdef m10
+ SWAP 0, 10
+%endif
mova m0, lim_sign
+%else
+ mova m6, mask_res
+ mova m1, lim_res
pmullw m6, [pw_18]
pmullw m1, [pw_18]
paddw m6, m7
paddw m1, m7
+%endif
+ mova m0, lim_sign
psraw m6, 7
psraw m1, 7
packsswb m6, m1 ; a1
@@ -2587,11 +2670,27 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
psubb m1, m6
pand m1, m0 ; -a1
pandn m0, m6 ; +a1
+%if ssse3_or_higher
+ mova m6, [pb_9_63]
+%endif
psubusb m2, m1
paddusb m5, m1
paddusb m2, m0 ; p1+a1
psubusb m5, m0 ; q1-a1
+%if ssse3_or_higher
+ SWAP 6, 7
+%ifdef m10
+ SWAP 1, 10
+%else
+ mova m1, lim_res
+%endif
+ mova m0, m7
+ pmaddubsw m7, m6
+ SWAP 6, 7
+ pmaddubsw m0, m1
+ SWAP 1, 0
+%else
%ifdef m8
SWAP 6, 12
SWAP 1, 8
@@ -2603,8 +2702,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
pmullw m1, [pw_9]
paddw m6, m7
paddw m1, m7
-%ifdef m15
- SWAP 7, 15
+%endif
+%ifdef m9
+ SWAP 7, 9
%else
mova m7, lim_sign
%endif
@@ -2656,17 +2756,20 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%if mmsize == 8 ; mmx/mmxext (h)
WRITE_4x2D 1, 2, 3, 4, dst_reg, dst2_reg, mstride_reg, stride_reg
add dst_reg, 4
- WRITE_8W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg, %4
+ WRITE_2x4W m5, m6, dst2_reg, dst_reg, mstride_reg, stride_reg
%else ; sse2 (h)
lea dst8_reg, [dst8_reg+mstride_reg+1]
WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4
lea dst_reg, [dst2_reg+mstride_reg+4]
lea dst8_reg, [dst8_reg+mstride_reg+4]
- WRITE_8W m5, m5, dst2_reg, dst_reg, mstride_reg, stride_reg, %2
-%ifidn %2, sse4
- lea dst_reg, [dst8_reg+ stride_reg]
+%ifidn %1, sse4
+ add dst2_reg, 4
+%endif
+ WRITE_8W m5, dst2_reg, dst_reg, mstride_reg, stride_reg
+%ifidn %1, sse4
+ lea dst2_reg, [dst8_reg+ stride_reg]
%endif
- WRITE_8W m6, m6, dst2_reg, dst8_reg, mstride_reg, stride_reg, %2
+ WRITE_8W m6, dst2_reg, dst8_reg, mstride_reg, stride_reg
%endif
%endif
@@ -2696,38 +2799,44 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
MBEDGE_LOOPFILTER mmx, v, 6, 16, 0
MBEDGE_LOOPFILTER mmx, h, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
-
MBEDGE_LOOPFILTER mmx, v, 6, 8, 0
MBEDGE_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
+MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
-MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
+%define SPLATB_REG SPLATB_REG_SSE2
+%define WRITE_8W WRITE_8W_SSE2
+MBEDGE_LOOPFILTER sse2, v, 5, 16, 15
%ifdef m8
-MBEDGE_LOOPFILTER sse2, h, 5, 16, 16
+MBEDGE_LOOPFILTER sse2, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER sse2, h, 6, 16, 16
+MBEDGE_LOOPFILTER sse2, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER sse2, v, 6, 8, 16
-MBEDGE_LOOPFILTER sse2, h, 6, 8, 16
+MBEDGE_LOOPFILTER sse2, v, 6, 8, 15
+MBEDGE_LOOPFILTER sse2, h, 6, 8, 15
-MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16
+%define SPLATB_REG SPLATB_REG_SSSE3
+MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15
%ifdef m8
-MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16
+MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16
+MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16
-MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16
+MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15
+MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15
+%define WRITE_8W WRITE_8W_SSE4
%ifdef m8
-MBEDGE_LOOPFILTER sse4, h, 5, 16, 16
+MBEDGE_LOOPFILTER sse4, h, 5, 16, 15
%else
-MBEDGE_LOOPFILTER sse4, h, 6, 16, 16
+MBEDGE_LOOPFILTER sse4, h, 6, 16, 15
%endif
-MBEDGE_LOOPFILTER sse4, h, 6, 8, 16
+MBEDGE_LOOPFILTER sse4, h, 6, 8, 15
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h
new file mode 100644
index 000000000..5a2a7c73f
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore.h
@@ -0,0 +1,58 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCORE_AVCORE_H
+#define AVCORE_AVCORE_H
+
+/**
+ * @file
+ * shared media utilities for the libav* libraries
+ */
+
+#include <libavutil/avutil.h>
+
+#define LIBAVCORE_VERSION_MAJOR 0
+#define LIBAVCORE_VERSION_MINOR 2
+#define LIBAVCORE_VERSION_MICRO 0
+
+#define LIBAVCORE_VERSION_INT AV_VERSION_INT(LIBAVCORE_VERSION_MAJOR, \
+ LIBAVCORE_VERSION_MINOR, \
+ LIBAVCORE_VERSION_MICRO)
+#define LIBAVCORE_VERSION AV_VERSION(LIBAVCORE_VERSION_MAJOR, \
+ LIBAVCORE_VERSION_MINOR, \
+ LIBAVCORE_VERSION_MICRO)
+#define LIBAVCORE_BUILD LIBAVCORE_VERSION_INT
+
+#define LIBAVCORE_IDENT "Lavcore" AV_STRINGIFY(LIBAVCORE_VERSION)
+
+/**
+ * Return the LIBAVCORE_VERSION_INT constant.
+ */
+unsigned avcore_version(void);
+
+/**
+ * Return the libavcore build-time configuration.
+ */
+const char *avcore_configuration(void);
+
+/**
+ * Return the libavcore license.
+ */
+const char *avcore_license(void);
+
+#endif /* AVCORE_AVCORE_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c
new file mode 100644
index 000000000..badb9ee36
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/avcore_utils.c
@@ -0,0 +1,43 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "avcore.h"
+
+/**
+ * @file
+ * various utility functions
+ */
+
+unsigned avcore_version(void)
+{
+ return LIBAVCORE_VERSION_INT;
+}
+
+#if 0
+const char *avcore_configuration(void)
+{
+ return FFMPEG_CONFIGURATION;
+}
+
+const char *avcore_license(void)
+{
+#define LICENSE_PREFIX "libavcore license: "
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+#endif
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
new file mode 100644
index 000000000..b14dd32a8
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.c
@@ -0,0 +1,97 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * misc image utilities
+ */
+
+#include "imgutils.h"
+#include "libavutil/pixdesc.h"
+
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width)
+{
+ int i;
+ const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+ int max_step [4]; /* max pixel step for each plane */
+ int max_step_comp[4]; /* the component for each plane which has the max pixel step */
+
+ memset(linesizes, 0, 4*sizeof(linesizes[0]));
+
+ if (desc->flags & PIX_FMT_HWACCEL)
+ return AVERROR(EINVAL);
+
+ if (desc->flags & PIX_FMT_BITSTREAM) {
+ linesizes[0] = (width * (desc->comp[0].step_minus1+1) + 7) >> 3;
+ return 0;
+ }
+
+ memset(max_step , 0, sizeof(max_step ));
+ memset(max_step_comp, 0, sizeof(max_step_comp));
+ for (i = 0; i < 4; i++) {
+ const AVComponentDescriptor *comp = &(desc->comp[i]);
+ if ((comp->step_minus1+1) > max_step[comp->plane]) {
+ max_step [comp->plane] = comp->step_minus1+1;
+ max_step_comp[comp->plane] = i;
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ int s = (max_step_comp[i] == 1 || max_step_comp[i] == 2) ? desc->log2_chroma_w : 0;
+ linesizes[i] = max_step[i] * (((width + (1 << s) - 1)) >> s);
+ }
+
+ return 0;
+}
+
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4])
+{
+ int i, total_size, size[4], has_plane[4];
+
+ const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
+ memset(data , 0, sizeof(data[0])*4);
+ memset(size , 0, sizeof(size));
+ memset(has_plane, 0, sizeof(has_plane));
+
+ if (desc->flags & PIX_FMT_HWACCEL)
+ return AVERROR(EINVAL);
+
+ data[0] = ptr;
+ size[0] = linesizes[0] * height;
+
+ if (desc->flags & PIX_FMT_PAL) {
+ size[0] = (size[0] + 3) & ~3;
+ data[1] = ptr + size[0]; /* palette is stored here as 256 32 bits words */
+ return size[0] + 256 * 4;
+ }
+
+ for (i = 0; i < 4; i++)
+ has_plane[desc->comp[i].plane] = 1;
+
+ total_size = size[0];
+ for (i = 1; has_plane[i] && i < 4; i++) {
+ int h, s = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
+ data[i] = data[i-1] + size[i-1];
+ h = (height + (1 << s) - 1) >> s;
+ size[i] = h * linesizes[i];
+ total_size += size[i];
+ }
+
+ return total_size;
+}
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
new file mode 100644
index 000000000..b8024de8e
--- /dev/null
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavcore/imgutils.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCORE_IMGUTILS_H
+#define AVCORE_IMGUTILS_H
+
+/**
+ * @file
+ * misc image utilities
+ */
+
+#include "libavutil/pixfmt.h"
+#include "avcore.h"
+
+/**
+ * Fill plane linesizes for an image with pixel format pix_fmt and
+ * width width.
+ *
+ * @param linesizes array to be filled with the linesize for each plane
+ * @return >= 0 in case of success, a negative error code otherwise
+ */
+int av_fill_image_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int width);
+
+/**
+ * Fill plane data pointers for an image with pixel format pix_fmt and
+ * height height.
+ *
+ * @param data pointers array to be filled with the pointer for each image plane
+ * @param ptr the pointer to a buffer which will contain the image
+ * @param linesizes[4] the array containing the linesize for each
+ * plane, should be filled by av_fill_image_linesizes()
+ * @return the size in bytes required for the image buffer, a negative
+ * error code in case of failure
+ */
+int av_fill_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int height,
+ uint8_t *ptr, const int linesizes[4]);
+
+#endif /* AVCORE_IMGUTILS_H */
diff --git a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
index b40d0e591..b9f4e902a 100644
--- a/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
+++ b/src/filters/transform/MPCVideoDec/ffmpeg/libavutil/log.c
@@ -63,6 +63,7 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
print_prefix= line[strlen(line)-1] == '\n';
if(print_prefix && !strcmp(line, prev)){
count++;
+ fprintf(stderr, " Last message repeated %d times\r", count);
return;
}
if(count>0){