From 6054cd25b4d7dce97c4fa3cc6e4757ba1e59ab86 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Thu, 2 Jun 2011 14:00:50 -0400 Subject: ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions. --- libavcodec/dsputil.c | 17 ++++++ libavcodec/dsputil.h | 16 ++++++ libavcodec/x86/dsputil_mmx.c | 23 ++++++++ libavcodec/x86/dsputil_yasm.asm | 115 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 4389289d82..4f17b435d1 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2676,6 +2676,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input, } } +static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len) +{ + do { + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + len -= 8; + } while (len > 0); +} + #define W0 2048 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ @@ -3122,6 +3138,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->scalarproduct_int16 = scalarproduct_int16_c; c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; c->apply_window_int16 = apply_window_int16_c; + c->vector_clip_int32 = vector_clip_int32_c; c->scalarproduct_float = scalarproduct_float_c; c->butterflies_float = butterflies_float_c; c->vector_fmul_scalar = vector_fmul_scalar_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index ea135ca1ba..ef2956eecb 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -555,6 +555,22 @@ typedef struct DSPContext { void (*apply_window_int16)(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len); + /** + * Clip each element in an array of int32_t to a given minimum and maximum value. + * @param dst destination array + * constraints: 16-byte aligned + * @param src source array + * constraints: 16-byte aligned + * @param min minimum value + * constraints: must in the the range [-(1<<24), 1<<24] + * @param max maximum value + * constraints: must in the the range [-(1<<24), 1<<24] + * @param len number of elements in the array + * constraints: multiple of 32 greater than zero + */ + void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + /* rv30 functions */ qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 4d4bbc5b6f..03c094533f 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2429,6 +2429,15 @@ int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); +void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); +void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); +void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); +void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); @@ -2570,6 +2579,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; + + c->vector_clip_int32 = ff_vector_clip_int32_mmx; #endif if (mm_flags & AV_CPU_FLAG_MMX2) { @@ -2855,6 +2866,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + if (mm_flags & AV_CPU_FLAG_ATOM) { + c->vector_clip_int32 = ff_vector_clip_int32_sse2_int; + } else { + c->vector_clip_int32 = ff_vector_clip_int32_sse2; + } if (avctx->flags & CODEC_FLAG_BITEXACT) { c->apply_window_int16 = ff_apply_window_int16_sse2_ba; } else { @@ -2880,6 +2896,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #endif } + + if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { +#if HAVE_YASM + c->vector_clip_int32 = ff_vector_clip_int32_sse41; +#endif + } + #if HAVE_AVX && HAVE_YASM if (mm_flags & AV_CPU_FLAG_AVX) { if (bit_depth == 10) { diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 8b19cc1441..4e1ec24a7a 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -1048,3 +1048,118 @@ emu_edge sse %ifdef ARCH_X86_32 emu_edge mmx %endif + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +%macro PMINSD_MMX 3 ; dst, src, tmp + mova %3, %2 + pcmpgtd %3, %1 + pxor %1, %2 + pand %1, %3 + pxor %1, %2 +%endmacro + +%macro PMAXSD_MMX 3 ; dst, src, tmp + mova %3, %1 + pcmpgtd %3, %2 + pand %1, %3 + pandn %3, %2 + por %1, %3 +%endmacro + +%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp + PMINSD_MMX %1, %3, %4 + PMAXSD_MMX %1, %2, %4 +%endmacro + +%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused + cvtdq2ps %1, %1 + minps %1, %3 + maxps %1, %2 + cvtps2dq %1, %1 +%endmacro + +%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused + pminsd %1, %3 + pmaxsd %1, %2 +%endmacro + +%macro SPLATD_MMX 1 + punpckldq %1, %1 +%endmacro + +%macro SPLATD_SSE2 1 + pshufd %1, %1, 0 +%endmacro + +%macro VECTOR_CLIP_INT32 4 +cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len +%ifidn %1, sse2 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 1 +%rep %3 + mova m0, [srcq+mmsize*0*%%i] + mova m1, [srcq+mmsize*1*%%i] + mova m2, [srcq+mmsize*2*%%i] + mova m3, [srcq+mmsize*3*%%i] +%if %4 + mova m7, [srcq+mmsize*4*%%i] + mova m8, [srcq+mmsize*5*%%i] + mova m9, [srcq+mmsize*6*%%i] + mova m10, [srcq+mmsize*7*%%i] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %4 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*0*%%i], m0 + mova [dstq+mmsize*1*%%i], m1 + mova [dstq+mmsize*2*%%i], m2 + mova [dstq+mmsize*3*%%i], m3 +%if %4 + mova [dstq+mmsize*4*%%i], m7 + mova [dstq+mmsize*5*%%i], m8 + mova [dstq+mmsize*6*%%i], m9 + mova [dstq+mmsize*7*%%i], m10 +%endif +%assign %%i %%i+1 +%endrep + add srcq, mmsize*4*(%3+%4) + add dstq, mmsize*4*(%3+%4) + sub lend, mmsize*(%3+%4) + jg .loop + REP_RET +%endmacro + +INIT_MMX +%define SPLATD SPLATD_MMX +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 mmx, 0, 1, 0 +INIT_XMM +%define SPLATD SPLATD_SSE2 +VECTOR_CLIP_INT32 sse2_int, 6, 1, 0 +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 sse2, 6, 2, 0 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 sse41, 11, 1, 1 +%else +VECTOR_CLIP_INT32 sse41, 6, 1, 0 +%endif -- cgit v1.2.3 From 523b7eba19590652b7ba19c5bdd85dd257bfe4f7 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Mon, 27 Jun 2011 14:29:33 -0400 Subject: ac3enc: clip coefficients after MDCT. This ensures that any processing between the MDCT and exponent extraction will be using clipped coefficients. --- libavcodec/ac3enc.h | 4 ++++ libavcodec/ac3enc_fixed.c | 9 +++++++++ libavcodec/ac3enc_float.c | 9 +++++++++ libavcodec/ac3enc_template.c | 16 +++++++++++++--- 4 files changed, 35 insertions(+), 3 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h index be62656650..54f427a523 100644 --- a/libavcodec/ac3enc.h +++ b/libavcodec/ac3enc.h @@ -50,12 +50,16 @@ #if CONFIG_AC3ENC_FLOAT #define AC3_NAME(x) ff_ac3_float_ ## x #define MAC_COEF(d,a,b) ((d)+=(a)*(b)) +#define COEF_MIN (-16777215.0/16777216.0) +#define COEF_MAX ( 16777215.0/16777216.0) typedef float SampleType; typedef float CoefType; typedef float CoefSumType; #else #define AC3_NAME(x) ff_ac3_fixed_ ## x #define MAC_COEF(d,a,b) MAC64(d,a,b) +#define COEF_MIN -16777215 +#define COEF_MAX 16777215 typedef int16_t SampleType; typedef int32_t CoefType; typedef int64_t CoefSumType; diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index d55720eb80..ea3a46cdfa 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s) } +/** + * Clip MDCT coefficients to allowable range. + */ +static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) +{ + dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); +} + + static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx) { AC3EncodeContext *s = avctx->priv_data; diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 12d6b19241..718cc1f2b2 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s) } +/** + * Clip MDCT coefficients to allowable range. + */ +static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) +{ + dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); +} + + #if CONFIG_AC3_ENCODER AVCodec ff_ac3_encoder = { "ac3", diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c index 85eea54a4a..c7243c7644 100644 --- a/libavcodec/ac3enc_template.c +++ b/libavcodec/ac3enc_template.c @@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output, static int normalize_samples(AC3EncodeContext *s); +static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); + int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s) { @@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s) cpl_coef[i] += ch_coef[i]; } - /* coefficients must be clipped to +/- 1.0 in order to be encoded */ - s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs); + /* coefficients must be clipped in order to be encoded */ + clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); /* scale coupling coefficients from float to 24-bit fixed-point */ s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start], @@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) if (!block->cpl_in_use || !block->new_cpl_coords) continue; + clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16); s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1], cpl_coords[blk][1], s->fbw_channels * 16); @@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame, apply_mdct(s); - scale_coefficients(s); + if (s->fixed_point) + scale_coefficients(s); + + clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], + AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels); s->cpl_on = s->cpl_enabled; ff_ac3_compute_coupling_strategy(s); @@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame, compute_rematrixing_strategy(s); + if (!s->fixed_point) + scale_coefficients(s); + ff_ac3_apply_rematrixing(s); ff_ac3_process_exponents(s); -- cgit v1.2.3 From 8b7b2d6aaee8cef0051beb3a4cf3e1c5a87cf40f Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Thu, 16 Jun 2011 16:55:33 -0400 Subject: ac3dsp: simplify extract_exponents() now that it does not need to do clipping. --- libavcodec/ac3dsp.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 8ce5f8d2c5..98c73573cb 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs) int i; for (i = 0; i < nb_coefs; i++) { - int e; int v = abs(coef[i]); - if (v == 0) - e = 24; - else { - e = 23 - av_log2(v); - if (e >= 24) { - e = 24; - coef[i] = 0; - } else if (e < 0) { - e = 0; - coef[i] = av_clip(coef[i], -16777215, 16777215); - } - } - exp[i] = e; + exp[i] = v ? 23 - av_log2(v) : 24; } } -- cgit v1.2.3 From f99a5ef92e5aba87a2d861822274147c994041d5 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Thu, 30 Jun 2011 17:48:44 -0400 Subject: ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents(). --- libavcodec/x86/ac3dsp.asm | 102 ++++++++++++++++++++++++++++++++++++++++++++ libavcodec/x86/ac3dsp_mmx.c | 9 ++++ 2 files changed, 111 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 6892ec2765..c1b0906a85 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -32,6 +32,11 @@ cextern ac3_bap_bits pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 +; used in ff_ac3_extract_exponents() +pd_1: times 4 dd 1 +pd_151: times 4 dd 151 +pb_shuf_4dwb: db 0, 4, 8, 12 + SECTION .text ;----------------------------------------------------------------------------- @@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum movd eax, m0 add eax, sumd RET + +;------------------------------------------------------------------------------ +; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) +;------------------------------------------------------------------------------ + +%macro PABSD_MMX 2 ; src/dst, tmp + pxor %2, %2 + pcmpgtd %2, %1 + pxor %1, %2 + psubd %1, %2 +%endmacro + +%macro PABSD_SSSE3 1-2 ; src/dst, unused + pabsd %1, %1 +%endmacro + +%ifdef HAVE_AMD3DNOW +INIT_MMX +cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len + add expq, lenq + lea coefq, [coefq+4*lenq] + neg lenq + movq m3, [pd_1] + movq m4, [pd_151] +.loop: + movq m0, [coefq+4*lenq ] + movq m1, [coefq+4*lenq+8] + PABSD_MMX m0, m2 + PABSD_MMX m1, m2 + pslld m0, 1 + por m0, m3 + pi2fd m2, m0 + psrld m2, 23 + movq m0, m4 + psubd m0, m2 + pslld m1, 1 + por m1, m3 + pi2fd m2, m1 + psrld m2, 23 + movq m1, m4 + psubd m1, m2 + packssdw m0, m0 + packuswb m0, m0 + packssdw m1, m1 + packuswb m1, m1 + punpcklwd m0, m1 + movd [expq+lenq], m0 + add lenq, 4 + jl .loop + REP_RET +%endif + +%macro AC3_EXTRACT_EXPONENTS 1 +cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len + add expq, lenq + lea coefq, [coefq+4*lenq] + neg lenq + mova m2, [pd_1] + mova m3, [pd_151] +%ifidn %1, ssse3 ; + movd m4, [pb_shuf_4dwb] +%endif +.loop: + ; move 4 32-bit coefs to xmm0 + mova m0, [coefq+4*lenq] + ; absolute value + PABSD m0, m1 + ; convert to float and extract exponents + pslld m0, 1 + por m0, m2 + cvtdq2ps m1, m0 + psrld m1, 23 + mova m0, m3 + psubd m0, m1 + ; move the lowest byte in each of 4 dwords to the low dword +%ifidn %1, ssse3 + pshufb m0, m4 +%else + packssdw m0, m0 + packuswb m0, m0 +%endif + movd [expq+lenq], m0 + + add lenq, 4 + jl .loop + REP_RET +%endmacro + +%ifdef HAVE_SSE +INIT_XMM +%define PABSD PABSD_MMX +AC3_EXTRACT_EXPONENTS sse2 +%ifdef HAVE_SSSE3 +%define PABSD PABSD_SSSE3 +AC3_EXTRACT_EXPONENTS ssse3 +%endif +%endif diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 2664736bb6..692d240d4c 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); +extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs); +extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs); +extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs); + av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int mm_flags = av_get_cpu_flags(); @@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; } if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { + c->extract_exponents = ff_ac3_extract_exponents_3dnow; if (!bit_exact) { c->float_to_fixed24 = ff_float_to_fixed24_3dnow; } @@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; + c->extract_exponents = ff_ac3_extract_exponents_sse2; if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; @@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) } if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; + if (!(mm_flags & AV_CPU_FLAG_ATOM)) { + c->extract_exponents = ff_ac3_extract_exponents_ssse3; + } } #endif } -- cgit v1.2.3 From 23ce6e72123a40895baaeefeb27c7c18748bd67e Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 1 Jul 2011 12:47:36 +0100 Subject: get_bits: remove x86 inline asm in A32 bitstream reader x86 does not use this variant so having inline asm there is pointless. Signed-off-by: Mans Rullgard --- libavcodec/get_bits.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h index b592b9a9a5..d2ae345315 100644 --- a/libavcodec/get_bits.h +++ b/libavcodec/get_bits.h @@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){ } \ } while (0) -#if ARCH_X86 -# define SKIP_CACHE(name, gb, num) \ - __asm__("shldl %2, %1, %0 \n\t" \ - "shll %2, %1 \n\t" \ - : "+r" (name##_cache0), "+r" (name##_cache1) \ - : "Ic" ((uint8_t)(num))) -#else # define SKIP_CACHE(name, gb, num) do { \ name##_cache0 <<= (num); \ name##_cache0 |= NEG_USR32(name##_cache1,num); \ name##_cache1 <<= (num); \ } while (0) -#endif # define SKIP_COUNTER(name, gb, num) name##_bit_count += (num) -- cgit v1.2.3