From 29cfdd37674e3444557c385eaffef06c1b325414 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 11 Aug 2012 22:45:53 +0200
Subject: x86: avcodec: Appropriately name files containing only init functions

---
 libavcodec/x86/Makefile          |   8 +-
 libavcodec/x86/ac3dsp_init.c     |  93 ++++++++++
 libavcodec/x86/ac3dsp_mmx.c      |  93 ----------
 libavcodec/x86/fft.c             |  72 --------
 libavcodec/x86/fft_init.c        |  72 ++++++++
 libavcodec/x86/fmtconvert_init.c | 147 +++++++++++++++
 libavcodec/x86/fmtconvert_mmx.c  | 147 ---------------
 libavcodec/x86/h264dsp_init.c    | 385 +++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/h264dsp_mmx.c     | 385 ---------------------------------------
 9 files changed, 701 insertions(+), 701 deletions(-)
 create mode 100644 libavcodec/x86/ac3dsp_init.c
 delete mode 100644 libavcodec/x86/ac3dsp_mmx.c
 delete mode 100644 libavcodec/x86/fft.c
 create mode 100644 libavcodec/x86/fft_init.c
 create mode 100644 libavcodec/x86/fmtconvert_init.c
 delete mode 100644 libavcodec/x86/fmtconvert_mmx.c
 create mode 100644 libavcodec/x86/h264dsp_init.c
 delete mode 100644 libavcodec/x86/h264dsp_mmx.c

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 57e73d8b2f..4d06685975 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -5,7 +5,7 @@ OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
 
 MMX-OBJS                               += x86/dsputil_mmx.o             \
                                           x86/fdct_mmx.o                \
-                                          x86/fmtconvert_mmx.o          \
+                                          x86/fmtconvert_init.o         \
                                           x86/idct_mmx_xvid.o           \
                                           x86/idct_sse2_xvid.o          \
                                           x86/motion_est_mmx.o          \
@@ -13,13 +13,13 @@ MMX-OBJS                               += x86/dsputil_mmx.o             \
                                           x86/simple_idct_mmx.o         \
 
 MMX-OBJS-$(CONFIG_AAC_DECODER)         += x86/sbrdsp_init.o
-MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
+MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_init.o
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
 MMX-OBJS-$(CONFIG_DNXHD_ENCODER)       += x86/dnxhd_mmx.o
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
-MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
-MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_mmx.o
+MMX-OBJS-$(CONFIG_FFT)                 += x86/fft_init.o
+MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_init.o
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
 MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
new file mode 100644
index 0000000000..f3db67a84f
--- /dev/null
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -0,0 +1,93 @@
+/*
+ * x86-optimized AC-3 DSP utils
+ * Copyright (c) 2011 Justin Ruggles
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/asm.h"
+#include "dsputil_mmx.h"
+#include "libavcodec/ac3dsp.h"
+
+extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+
+extern int ff_ac3_max_msb_abs_int16_mmx  (const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
+
+extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
+extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
+
+extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
+extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
+
+extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
+extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
+extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
+
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
+extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+
+av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
+{
+#if HAVE_YASM
+    int mm_flags = av_get_cpu_flags();
+
+    if (mm_flags & AV_CPU_FLAG_MMX) {
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
+        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
+        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
+    }
+    if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
+        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
+        if (!bit_exact) {
+            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
+        }
+    }
+    if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
+    }
+    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
+        c->float_to_fixed24 = ff_float_to_fixed24_sse;
+    }
+    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
+        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
+        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
+        c->extract_exponents = ff_ac3_extract_exponents_sse2;
+        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
+            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
+            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
+        }
+    }
+    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
+        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
+            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
+        }
+    }
+#endif
+}
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
deleted file mode 100644
index f3db67a84f..0000000000
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * x86-optimized AC-3 DSP utils
- * Copyright (c) 2011 Justin Ruggles
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86/asm.h"
-#include "dsputil_mmx.h"
-#include "libavcodec/ac3dsp.h"
-
-extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
-extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
-extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
-
-extern int ff_ac3_max_msb_abs_int16_mmx  (const int16_t *src, int len);
-extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
-extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
-extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
-
-extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
-extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
-
-extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
-extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
-
-extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
-extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
-extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
-
-extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
-
-extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
-extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
-extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
-
-av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
-{
-#if HAVE_YASM
-    int mm_flags = av_get_cpu_flags();
-
-    if (mm_flags & AV_CPU_FLAG_MMX) {
-        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
-        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
-        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
-    }
-    if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
-        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
-        if (!bit_exact) {
-            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
-        }
-    }
-    if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
-        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
-    }
-    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
-        c->float_to_fixed24 = ff_float_to_fixed24_sse;
-    }
-    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
-        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
-        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
-        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
-        c->extract_exponents = ff_ac3_extract_exponents_sse2;
-        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
-            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
-            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
-        }
-    }
-    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
-        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
-            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
-        }
-    }
-#endif
-}
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
deleted file mode 100644
index fcde3fa797..0000000000
--- a/libavcodec/x86/fft.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/cpu.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/dct.h"
-#include "fft.h"
-
-av_cold void ff_fft_init_mmx(FFTContext *s)
-{
-#if HAVE_YASM
-    int has_vectors = av_get_cpu_flags();
-#if ARCH_X86_32
-    if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
-        /* 3DNow! for K6-2/3 */
-        s->imdct_calc = ff_imdct_calc_3dnow;
-        s->imdct_half = ff_imdct_half_3dnow;
-        s->fft_calc   = ff_fft_calc_3dnow;
-    }
-    if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
-        /* 3DNowEx for K7 */
-        s->imdct_calc = ff_imdct_calc_3dnowext;
-        s->imdct_half = ff_imdct_half_3dnowext;
-        s->fft_calc   = ff_fft_calc_3dnowext;
-    }
-#endif
-    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
-        /* SSE for P3/P4/K8 */
-        s->imdct_calc  = ff_imdct_calc_sse;
-        s->imdct_half  = ff_imdct_half_sse;
-        s->fft_permute = ff_fft_permute_sse;
-        s->fft_calc    = ff_fft_calc_sse;
-        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
-    }
-    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
-        /* AVX for SB */
-        s->imdct_half      = ff_imdct_half_avx;
-        s->fft_calc        = ff_fft_calc_avx;
-        s->fft_permutation = FF_FFT_PERM_AVX;
-    }
-#endif
-}
-
-#if CONFIG_DCT
-av_cold void ff_dct_init_mmx(DCTContext *s)
-{
-#if HAVE_YASM
-    int has_vectors = av_get_cpu_flags();
-    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
-        s->dct32 = ff_dct32_float_sse;
-    if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE)
-        s->dct32 = ff_dct32_float_sse2;
-    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
-        s->dct32 = ff_dct32_float_avx;
-#endif
-}
-#endif
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
new file mode 100644
index 0000000000..fcde3fa797
--- /dev/null
+++ b/libavcodec/x86/fft_init.c
@@ -0,0 +1,72 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/dct.h"
+#include "fft.h"
+
+av_cold void ff_fft_init_mmx(FFTContext *s)
+{
+#if HAVE_YASM
+    int has_vectors = av_get_cpu_flags();
+#if ARCH_X86_32
+    if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
+        /* 3DNow! for K6-2/3 */
+        s->imdct_calc = ff_imdct_calc_3dnow;
+        s->imdct_half = ff_imdct_half_3dnow;
+        s->fft_calc   = ff_fft_calc_3dnow;
+    }
+    if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
+        /* 3DNowEx for K7 */
+        s->imdct_calc = ff_imdct_calc_3dnowext;
+        s->imdct_half = ff_imdct_half_3dnowext;
+        s->fft_calc   = ff_fft_calc_3dnowext;
+    }
+#endif
+    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
+        /* SSE for P3/P4/K8 */
+        s->imdct_calc  = ff_imdct_calc_sse;
+        s->imdct_half  = ff_imdct_half_sse;
+        s->fft_permute = ff_fft_permute_sse;
+        s->fft_calc    = ff_fft_calc_sse;
+        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
+    }
+    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
+        /* AVX for SB */
+        s->imdct_half      = ff_imdct_half_avx;
+        s->fft_calc        = ff_fft_calc_avx;
+        s->fft_permutation = FF_FFT_PERM_AVX;
+    }
+#endif
+}
+
+#if CONFIG_DCT
+av_cold void ff_dct_init_mmx(DCTContext *s)
+{
+#if HAVE_YASM
+    int has_vectors = av_get_cpu_flags();
+    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
+        s->dct32 = ff_dct32_float_sse;
+    if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE)
+        s->dct32 = ff_dct32_float_sse2;
+    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
+        s->dct32 = ff_dct32_float_avx;
+#endif
+}
+#endif
diff --git a/libavcodec/x86/fmtconvert_init.c b/libavcodec/x86/fmtconvert_init.c
new file mode 100644
index 0000000000..6f3d14aedc
--- /dev/null
+++ b/libavcodec/x86/fmtconvert_init.c
@@ -0,0 +1,147 @@
+/*
+ * Format Conversion Utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/fmtconvert.h"
+#include "libavcodec/dsputil.h"
+
+#if HAVE_YASM
+
+void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
+void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
+
+void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
+void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
+void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
+
+void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
+void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
+
+void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
+void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
+void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
+
+void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
+
+#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
+
+#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
+/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
+static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
+    int c;\
+    for(c=0; c<channels; c++){\
+        ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
+    }\
+}\
+\
+static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
+    if(channels==1)\
+        ff_float_to_int16_##cpu(dst, src[0], len);\
+    else if(channels==2){\
+        ff_float_to_int16_interleave2_##cpu(dst, src, len);\
+    }else if(channels==6){\
+        ff_float_to_int16_interleave6_##cpu(dst, src, len);\
+    }else\
+        float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
+}
+
+FLOAT_TO_INT16_INTERLEAVE(3dnow)
+FLOAT_TO_INT16_INTERLEAVE(sse)
+FLOAT_TO_INT16_INTERLEAVE(sse2)
+
+static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
+                                               long len, int channels)
+{
+    if(channels==6)
+        ff_float_to_int16_interleave6_3dnowext(dst, src, len);
+    else
+        float_to_int16_interleave_3dnow(dst, src, len, channels);
+}
+
+void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
+
+void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
+void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
+
+static void float_interleave_mmx(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_mmx(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_mmx(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
+static void float_interleave_sse(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 2) {
+        ff_float_interleave2_sse(dst, src, len);
+    } else if (channels == 6)
+        ff_float_interleave6_sse(dst, src, len);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+#endif
+
+void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
+{
+#if HAVE_YASM
+    int mm_flags = av_get_cpu_flags();
+
+    if (mm_flags & AV_CPU_FLAG_MMX) {
+        c->float_interleave = float_interleave_mmx;
+
+        if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->float_to_int16 = ff_float_to_int16_3dnow;
+                c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
+            }
+        }
+        if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
+            }
+        }
+        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
+            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
+            c->float_to_int16 = ff_float_to_int16_sse;
+            c->float_to_int16_interleave = float_to_int16_interleave_sse;
+            c->float_interleave = float_interleave_sse;
+        }
+        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
+            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
+            c->float_to_int16 = ff_float_to_int16_sse2;
+            c->float_to_int16_interleave = float_to_int16_interleave_sse2;
+        }
+    }
+#endif
+}
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
deleted file mode 100644
index 6f3d14aedc..0000000000
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Format Conversion Utils
- * Copyright (c) 2000, 2001 Fabrice Bellard
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
- */
-
-#include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavcodec/fmtconvert.h"
-#include "libavcodec/dsputil.h"
-
-#if HAVE_YASM
-
-void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
-void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
-
-void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
-void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
-void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
-
-void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
-void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
-void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
-
-void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
-void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
-void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
-
-void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
-void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
-void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
-
-#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
-
-#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
-/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
-static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
-    int c;\
-    for(c=0; c<channels; c++){\
-        ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
-    }\
-}\
-\
-static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
-    if(channels==1)\
-        ff_float_to_int16_##cpu(dst, src[0], len);\
-    else if(channels==2){\
-        ff_float_to_int16_interleave2_##cpu(dst, src, len);\
-    }else if(channels==6){\
-        ff_float_to_int16_interleave6_##cpu(dst, src, len);\
-    }else\
-        float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
-}
-
-FLOAT_TO_INT16_INTERLEAVE(3dnow)
-FLOAT_TO_INT16_INTERLEAVE(sse)
-FLOAT_TO_INT16_INTERLEAVE(sse2)
-
-static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
-                                               long len, int channels)
-{
-    if(channels==6)
-        ff_float_to_int16_interleave6_3dnowext(dst, src, len);
-    else
-        float_to_int16_interleave_3dnow(dst, src, len, channels);
-}
-
-void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
-void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
-
-void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
-void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
-
-static void float_interleave_mmx(float *dst, const float **src,
-                                 unsigned int len, int channels)
-{
-    if (channels == 2) {
-        ff_float_interleave2_mmx(dst, src, len);
-    } else if (channels == 6)
-        ff_float_interleave6_mmx(dst, src, len);
-    else
-        ff_float_interleave_c(dst, src, len, channels);
-}
-
-static void float_interleave_sse(float *dst, const float **src,
-                                 unsigned int len, int channels)
-{
-    if (channels == 2) {
-        ff_float_interleave2_sse(dst, src, len);
-    } else if (channels == 6)
-        ff_float_interleave6_sse(dst, src, len);
-    else
-        ff_float_interleave_c(dst, src, len, channels);
-}
-#endif
-
-void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
-{
-#if HAVE_YASM
-    int mm_flags = av_get_cpu_flags();
-
-    if (mm_flags & AV_CPU_FLAG_MMX) {
-        c->float_interleave = float_interleave_mmx;
-
-        if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
-            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
-                c->float_to_int16 = ff_float_to_int16_3dnow;
-                c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
-            }
-        }
-        if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
-            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
-                c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
-            }
-        }
-        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
-            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
-            c->float_to_int16 = ff_float_to_int16_sse;
-            c->float_to_int16_interleave = float_to_int16_interleave_sse;
-            c->float_interleave = float_interleave_sse;
-        }
-        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
-            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
-            c->float_to_int16 = ff_float_to_int16_sse2;
-            c->float_to_int16_interleave = float_to_int16_interleave_sse2;
-        }
-    }
-#endif
-}
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
new file mode 100644
index 0000000000..f24f751fb3
--- /dev/null
+++ b/libavcodec/x86/h264dsp_init.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/h264dsp.h"
+#include "dsputil_mmx.h"
+
+/***********************************/
+/* IDCT */
+#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)                                  \
+void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst,    \
+                                                       int16_t *block,  \
+                                                       int stride);
+
+IDCT_ADD_FUNC(, 8, mmx)
+IDCT_ADD_FUNC(, 10, sse2)
+IDCT_ADD_FUNC(_dc, 8, mmx2)
+IDCT_ADD_FUNC(_dc, 10, mmx2)
+IDCT_ADD_FUNC(8_dc, 8, mmx2)
+IDCT_ADD_FUNC(8_dc, 10, sse2)
+IDCT_ADD_FUNC(8, 8, mmx)
+IDCT_ADD_FUNC(8, 8, sse2)
+IDCT_ADD_FUNC(8, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_FUNC(, 10, avx)
+IDCT_ADD_FUNC(8_dc, 10, avx)
+IDCT_ADD_FUNC(8, 10, avx)
+#endif
+
+
+#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)                         \
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT       \
+    (uint8_t *dst, const int *block_offset,                             \
+     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
+
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
+IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
+IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
+IDCT_ADD_REP_FUNC(8, 4, 10, avx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16, 8, sse2)
+IDCT_ADD_REP_FUNC(, 16, 10, sse2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
+IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
+IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_REP_FUNC(, 16, 10, avx)
+IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
+#endif
+
+
+#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)                      \
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT     \
+    (uint8_t **dst, const int *block_offset,                          \
+     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
+
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
+IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
+IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
+#if HAVE_AVX
+IDCT_ADD_REP_FUNC2(, 8, 10, avx)
+#endif
+
+void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
+void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
+
+/***********************************/
+/* deblocking */
+
+void ff_h264_loop_filter_strength_mmx2(int16_t bS[2][4][4], uint8_t nnz[40],
+                                       int8_t ref[2][40], int16_t mv[2][40][2],
+                                       int bidir, int edges, int step,
+                                       int mask_mv0, int mask_mv1, int field);
+
+#define LF_FUNC(DIR, TYPE, DEPTH, OPT)                                        \
+void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
+                                                               int stride,    \
+                                                               int alpha,     \
+                                                               int beta,      \
+                                                               int8_t *tc0);
+#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
+void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
+                                                               int stride,    \
+                                                               int alpha,     \
+                                                               int beta);
+
+#define LF_FUNCS(type, depth)                   \
+LF_FUNC(h,  chroma,       depth, mmx2)          \
+LF_IFUNC(h, chroma_intra, depth, mmx2)          \
+LF_FUNC(v,  chroma,       depth, mmx2)          \
+LF_IFUNC(v, chroma_intra, depth, mmx2)          \
+LF_FUNC(h,  luma,         depth, mmx2)          \
+LF_IFUNC(h, luma_intra,   depth, mmx2)          \
+LF_FUNC(h,  luma,         depth, sse2)          \
+LF_IFUNC(h, luma_intra,   depth, sse2)          \
+LF_FUNC(v,  luma,         depth, sse2)          \
+LF_IFUNC(v, luma_intra,   depth, sse2)          \
+LF_FUNC(h,  chroma,       depth, sse2)          \
+LF_IFUNC(h, chroma_intra, depth, sse2)          \
+LF_FUNC(v,  chroma,       depth, sse2)          \
+LF_IFUNC(v, chroma_intra, depth, sse2)          \
+LF_FUNC(h,  luma,         depth, avx)           \
+LF_IFUNC(h, luma_intra,   depth, avx)           \
+LF_FUNC(v,  luma,         depth, avx)           \
+LF_IFUNC(v, luma_intra,   depth, avx)           \
+LF_FUNC(h,  chroma,       depth, avx)           \
+LF_IFUNC(h, chroma_intra, depth, avx)           \
+LF_FUNC(v,  chroma,       depth, avx)           \
+LF_IFUNC(v, chroma_intra, depth, avx)
+
+LF_FUNCS(uint8_t,   8)
+LF_FUNCS(uint16_t, 10)
+
+#if ARCH_X86_32
+LF_FUNC(v8, luma, 8, mmx2)
+static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha,
+                                     int beta, int8_t *tc0)
+{
+    if ((tc0[0] & tc0[1]) >= 0)
+        ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
+    if ((tc0[2] & tc0[3]) >= 0)
+        ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
+}
+
+LF_IFUNC(v8, luma_intra, 8, mmx2)
+static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride,
+                                           int alpha, int beta)
+{
+    ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
+    ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
+}
+#endif /* ARCH_X86_32 */
+
+LF_FUNC(v,  luma,       10, mmx2)
+LF_IFUNC(v, luma_intra, 10, mmx2)
+
+/***********************************/
+/* weighted prediction */
+
+#define H264_WEIGHT(W, OPT)                                             \
+void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride,         \
+                                      int height, int log2_denom,       \
+                                      int weight, int offset);
+
+#define H264_BIWEIGHT(W, OPT)                                           \
+void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src,     \
+                                        int stride, int height,         \
+                                        int log2_denom, int weightd,    \
+                                        int weights, int offset);
+
+#define H264_BIWEIGHT_MMX(W)                    \
+    H264_WEIGHT(W, mmx2)                        \
+    H264_BIWEIGHT(W, mmx2)
+
+#define H264_BIWEIGHT_MMX_SSE(W)                \
+    H264_BIWEIGHT_MMX(W)                        \
+    H264_WEIGHT(W, sse2)                        \
+    H264_BIWEIGHT(W, sse2)                      \
+    H264_BIWEIGHT(W, ssse3)
+
+H264_BIWEIGHT_MMX_SSE(16)
+H264_BIWEIGHT_MMX_SSE(8)
+H264_BIWEIGHT_MMX(4)
+
+#define H264_WEIGHT_10(W, DEPTH, OPT)                                   \
+void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
+                                                    int stride,         \
+                                                    int height,         \
+                                                    int log2_denom,     \
+                                                    int weight,         \
+                                                    int offset);
+
+#define H264_BIWEIGHT_10(W, DEPTH, OPT)                                 \
+void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,     \
+                                                      uint8_t *src,     \
+                                                      int stride,       \
+                                                      int height,       \
+                                                      int log2_denom,   \
+                                                      int weightd,      \
+                                                      int weights,      \
+                                                      int offset);
+
+#define H264_BIWEIGHT_10_SSE(W, DEPTH)          \
+    H264_WEIGHT_10(W, DEPTH, sse2)              \
+    H264_WEIGHT_10(W, DEPTH, sse4)              \
+    H264_BIWEIGHT_10(W, DEPTH, sse2)            \
+    H264_BIWEIGHT_10(W, DEPTH, sse4)
+
+H264_BIWEIGHT_10_SSE(16, 10)
+H264_BIWEIGHT_10_SSE(8,  10)
+H264_BIWEIGHT_10_SSE(4,  10)
+
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc)
+{
+#if HAVE_YASM
+    int mm_flags = av_get_cpu_flags();
+
+    if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
+        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
+
+    if (bit_depth == 8) {
+        if (mm_flags & AV_CPU_FLAG_MMX) {
+            c->h264_idct_dc_add   =
+            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
+            c->h264_idct8_dc_add  =
+            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;
+
+            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
+            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
+            if (chroma_format_idc == 1)
+                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
+            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
+            if (mm_flags & AV_CPU_FLAG_CMOV)
+                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
+
+            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
+                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmx2;
+                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
+                c->h264_idct_add16   = ff_h264_idct_add16_8_mmx2;
+                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmx2;
+                if (chroma_format_idc == 1)
+                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
+                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx2;
+
+                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmx2;
+                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmx2;
+                if (chroma_format_idc == 1) {
+                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmx2;
+                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
+                }
+#if ARCH_X86_32
+                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmx2;
+                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmx2;
+                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
+                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
+#endif /* ARCH_X86_32 */
+                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
+                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmx2;
+                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmx2;
+
+                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmx2;
+                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
+                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
+
+                if (mm_flags & AV_CPU_FLAG_SSE2) {
+                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;
+
+                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
+                    if (chroma_format_idc == 1)
+                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
+                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
+                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
+
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
+
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
+
+#if HAVE_ALIGNED_STACK
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
+#endif /* HAVE_ALIGNED_STACK */
+                }
+                if (mm_flags & AV_CPU_FLAG_SSSE3) {
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
+                }
+                if (mm_flags & AV_CPU_FLAG_AVX) {
+#if HAVE_ALIGNED_STACK
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
+#endif /* HAVE_ALIGNED_STACK */
+                }
+            }
+        }
+    } else if (bit_depth == 10) {
+        if (mm_flags & AV_CPU_FLAG_MMX) {
+            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
+#if ARCH_X86_32
+                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmx2;
+                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
+                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmx2;
+                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmx2;
+                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmx2;
+                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmx2;
+#endif /* ARCH_X86_32 */
+                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
+                if (mm_flags & AV_CPU_FLAG_SSE2) {
+                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
+                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
+
+                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
+                    if (chroma_format_idc == 1)
+                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
+                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
+#if HAVE_ALIGNED_STACK
+                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
+#endif /* HAVE_ALIGNED_STACK */
+
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
+                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
+
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
+                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
+
+                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
+                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
+#if HAVE_ALIGNED_STACK
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
+#endif /* HAVE_ALIGNED_STACK */
+                }
+                if (mm_flags & AV_CPU_FLAG_SSE4) {
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
+                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
+
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
+                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
+                }
+#if HAVE_AVX
+                if (mm_flags & AV_CPU_FLAG_AVX) {
+                    c->h264_idct_dc_add  =
+                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
+                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
+
+                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
+                    if (chroma_format_idc == 1)
+                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
+                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
+#if HAVE_ALIGNED_STACK
+                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
+#endif /* HAVE_ALIGNED_STACK */
+
+                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
+                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
+#if HAVE_ALIGNED_STACK
+                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
+                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
+                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
+                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
+#endif /* HAVE_ALIGNED_STACK */
+                }
+#endif /* HAVE_AVX */
+            }
+        }
+    }
+#endif /* HAVE_YASM */
+}
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
deleted file mode 100644
index f24f751fb3..0000000000
--- a/libavcodec/x86/h264dsp_mmx.c
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavcodec/h264dsp.h"
-#include "dsputil_mmx.h"
-
-/***********************************/
-/* IDCT */
-#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)                                  \
-void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst,    \
-                                                       int16_t *block,  \
-                                                       int stride);
-
-IDCT_ADD_FUNC(, 8, mmx)
-IDCT_ADD_FUNC(, 10, sse2)
-IDCT_ADD_FUNC(_dc, 8, mmx2)
-IDCT_ADD_FUNC(_dc, 10, mmx2)
-IDCT_ADD_FUNC(8_dc, 8, mmx2)
-IDCT_ADD_FUNC(8_dc, 10, sse2)
-IDCT_ADD_FUNC(8, 8, mmx)
-IDCT_ADD_FUNC(8, 8, sse2)
-IDCT_ADD_FUNC(8, 10, sse2)
-#if HAVE_AVX
-IDCT_ADD_FUNC(, 10, avx)
-IDCT_ADD_FUNC(8_dc, 10, avx)
-IDCT_ADD_FUNC(8, 10, avx)
-#endif
-
-
-#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)                         \
-void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT       \
-    (uint8_t *dst, const int *block_offset,                             \
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
-
-IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
-IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
-IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
-IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
-IDCT_ADD_REP_FUNC(8, 4, 10, avx)
-IDCT_ADD_REP_FUNC(, 16, 8, mmx)
-IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
-IDCT_ADD_REP_FUNC(, 16, 8, sse2)
-IDCT_ADD_REP_FUNC(, 16, 10, sse2)
-IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
-IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
-IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
-IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
-#if HAVE_AVX
-IDCT_ADD_REP_FUNC(, 16, 10, avx)
-IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
-#endif
-
-
-#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)                      \
-void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT     \
-    (uint8_t **dst, const int *block_offset,                          \
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
-
-IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
-IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
-IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
-IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
-#if HAVE_AVX
-IDCT_ADD_REP_FUNC2(, 8, 10, avx)
-#endif
-
-void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
-void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
-
-/***********************************/
-/* deblocking */
-
-void ff_h264_loop_filter_strength_mmx2(int16_t bS[2][4][4], uint8_t nnz[40],
-                                       int8_t ref[2][40], int16_t mv[2][40][2],
-                                       int bidir, int edges, int step,
-                                       int mask_mv0, int mask_mv1, int field);
-
-#define LF_FUNC(DIR, TYPE, DEPTH, OPT)                                        \
-void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
-                                                               int stride,    \
-                                                               int alpha,     \
-                                                               int beta,      \
-                                                               int8_t *tc0);
-#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
-void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
-                                                               int stride,    \
-                                                               int alpha,     \
-                                                               int beta);
-
-#define LF_FUNCS(type, depth)                   \
-LF_FUNC(h,  chroma,       depth, mmx2)          \
-LF_IFUNC(h, chroma_intra, depth, mmx2)          \
-LF_FUNC(v,  chroma,       depth, mmx2)          \
-LF_IFUNC(v, chroma_intra, depth, mmx2)          \
-LF_FUNC(h,  luma,         depth, mmx2)          \
-LF_IFUNC(h, luma_intra,   depth, mmx2)          \
-LF_FUNC(h,  luma,         depth, sse2)          \
-LF_IFUNC(h, luma_intra,   depth, sse2)          \
-LF_FUNC(v,  luma,         depth, sse2)          \
-LF_IFUNC(v, luma_intra,   depth, sse2)          \
-LF_FUNC(h,  chroma,       depth, sse2)          \
-LF_IFUNC(h, chroma_intra, depth, sse2)          \
-LF_FUNC(v,  chroma,       depth, sse2)          \
-LF_IFUNC(v, chroma_intra, depth, sse2)          \
-LF_FUNC(h,  luma,         depth, avx)           \
-LF_IFUNC(h, luma_intra,   depth, avx)           \
-LF_FUNC(v,  luma,         depth, avx)           \
-LF_IFUNC(v, luma_intra,   depth, avx)           \
-LF_FUNC(h,  chroma,       depth, avx)           \
-LF_IFUNC(h, chroma_intra, depth, avx)           \
-LF_FUNC(v,  chroma,       depth, avx)           \
-LF_IFUNC(v, chroma_intra, depth, avx)
-
-LF_FUNCS(uint8_t,   8)
-LF_FUNCS(uint16_t, 10)
-
-#if ARCH_X86_32
-LF_FUNC(v8, luma, 8, mmx2)
-static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha,
-                                     int beta, int8_t *tc0)
-{
-    if ((tc0[0] & tc0[1]) >= 0)
-        ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
-    if ((tc0[2] & tc0[3]) >= 0)
-        ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
-}
-
-LF_IFUNC(v8, luma_intra, 8, mmx2)
-static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride,
-                                           int alpha, int beta)
-{
-    ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
-    ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
-}
-#endif /* ARCH_X86_32 */
-
-LF_FUNC(v,  luma,       10, mmx2)
-LF_IFUNC(v, luma_intra, 10, mmx2)
-
-/***********************************/
-/* weighted prediction */
-
-#define H264_WEIGHT(W, OPT)                                             \
-void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride,         \
-                                      int height, int log2_denom,       \
-                                      int weight, int offset);
-
-#define H264_BIWEIGHT(W, OPT)                                           \
-void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src,     \
-                                        int stride, int height,         \
-                                        int log2_denom, int weightd,    \
-                                        int weights, int offset);
-
-#define H264_BIWEIGHT_MMX(W)                    \
-    H264_WEIGHT(W, mmx2)                        \
-    H264_BIWEIGHT(W, mmx2)
-
-#define H264_BIWEIGHT_MMX_SSE(W)                \
-    H264_BIWEIGHT_MMX(W)                        \
-    H264_WEIGHT(W, sse2)                        \
-    H264_BIWEIGHT(W, sse2)                      \
-    H264_BIWEIGHT(W, ssse3)
-
-H264_BIWEIGHT_MMX_SSE(16)
-H264_BIWEIGHT_MMX_SSE(8)
-H264_BIWEIGHT_MMX(4)
-
-#define H264_WEIGHT_10(W, DEPTH, OPT)                                   \
-void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
-                                                    int stride,         \
-                                                    int height,         \
-                                                    int log2_denom,     \
-                                                    int weight,         \
-                                                    int offset);
-
-#define H264_BIWEIGHT_10(W, DEPTH, OPT)                                 \
-void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,     \
-                                                      uint8_t *src,     \
-                                                      int stride,       \
-                                                      int height,       \
-                                                      int log2_denom,   \
-                                                      int weightd,      \
-                                                      int weights,      \
-                                                      int offset);
-
-#define H264_BIWEIGHT_10_SSE(W, DEPTH)          \
-    H264_WEIGHT_10(W, DEPTH, sse2)              \
-    H264_WEIGHT_10(W, DEPTH, sse4)              \
-    H264_BIWEIGHT_10(W, DEPTH, sse2)            \
-    H264_BIWEIGHT_10(W, DEPTH, sse4)
-
-H264_BIWEIGHT_10_SSE(16, 10)
-H264_BIWEIGHT_10_SSE(8,  10)
-H264_BIWEIGHT_10_SSE(4,  10)
-
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
-                         const int chroma_format_idc)
-{
-#if HAVE_YASM
-    int mm_flags = av_get_cpu_flags();
-
-    if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
-        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
-
-    if (bit_depth == 8) {
-        if (mm_flags & AV_CPU_FLAG_MMX) {
-            c->h264_idct_dc_add   =
-            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
-            c->h264_idct8_dc_add  =
-            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;
-
-            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
-            if (chroma_format_idc == 1)
-                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
-            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
-            if (mm_flags & AV_CPU_FLAG_CMOV)
-                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
-
-            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
-                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmx2;
-                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
-                c->h264_idct_add16   = ff_h264_idct_add16_8_mmx2;
-                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmx2;
-                if (chroma_format_idc == 1)
-                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
-                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx2;
-
-                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmx2;
-                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmx2;
-                if (chroma_format_idc == 1) {
-                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmx2;
-                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
-                }
-#if ARCH_X86_32
-                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmx2;
-                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmx2;
-                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
-                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
-#endif /* ARCH_X86_32 */
-                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
-                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmx2;
-                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmx2;
-
-                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmx2;
-                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
-                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
-
-                if (mm_flags & AV_CPU_FLAG_SSE2) {
-                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;
-
-                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
-                    if (chroma_format_idc == 1)
-                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
-                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
-                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
-
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
-
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
-
-#if HAVE_ALIGNED_STACK
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
-#endif /* HAVE_ALIGNED_STACK */
-                }
-                if (mm_flags & AV_CPU_FLAG_SSSE3) {
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
-                }
-                if (mm_flags & AV_CPU_FLAG_AVX) {
-#if HAVE_ALIGNED_STACK
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
-#endif /* HAVE_ALIGNED_STACK */
-                }
-            }
-        }
-    } else if (bit_depth == 10) {
-        if (mm_flags & AV_CPU_FLAG_MMX) {
-            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
-#if ARCH_X86_32
-                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmx2;
-                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
-                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmx2;
-                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmx2;
-                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmx2;
-                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmx2;
-#endif /* ARCH_X86_32 */
-                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
-                if (mm_flags & AV_CPU_FLAG_SSE2) {
-                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
-                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
-
-                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
-                    if (chroma_format_idc == 1)
-                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
-                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
-#if HAVE_ALIGNED_STACK
-                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
-#endif /* HAVE_ALIGNED_STACK */
-
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
-                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
-
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
-                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
-
-                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
-                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
-#if HAVE_ALIGNED_STACK
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
-#endif /* HAVE_ALIGNED_STACK */
-                }
-                if (mm_flags & AV_CPU_FLAG_SSE4) {
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
-                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
-
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
-                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
-                }
-#if HAVE_AVX
-                if (mm_flags & AV_CPU_FLAG_AVX) {
-                    c->h264_idct_dc_add  =
-                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
-                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
-
-                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
-                    if (chroma_format_idc == 1)
-                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
-                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
-#if HAVE_ALIGNED_STACK
-                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
-#endif /* HAVE_ALIGNED_STACK */
-
-                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
-                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
-#if HAVE_ALIGNED_STACK
-                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
-                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
-                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
-                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
-#endif /* HAVE_ALIGNED_STACK */
-                }
-#endif /* HAVE_AVX */
-            }
-        }
-    }
-#endif /* HAVE_YASM */
-}
-- 
cgit v1.2.3