From a75529e81ea14c6d7c449e205aef97a0acb2645f Mon Sep 17 00:00:00 2001 From: Alexander Strange Date: Mon, 28 Mar 2011 21:44:38 -0700 Subject: mimic: implement multithreading. --- libavcodec/mimic.c | 60 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c index a12f6383dd..133d26ffd7 100644 --- a/libavcodec/mimic.c +++ b/libavcodec/mimic.c @@ -27,6 +27,7 @@ #include "get_bits.h" #include "bytestream.h" #include "dsputil.h" +#include "thread.h" #define MIMIC_HEADER_SIZE 20 @@ -51,6 +52,10 @@ typedef struct { ScanTable scantable; DSPContext dsp; VLC vlc; + + /* Kept in the context so multithreading can have a constant to read from */ + int next_cur_index; + int next_prev_index; } MimicContext; static const uint32_t huffcodes[] = { @@ -121,6 +126,23 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx) return 0; } +static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCodecContext *avctx_from) +{ + MimicContext *dst = avctx->priv_data, *src = avctx_from->priv_data; + + if (avctx == avctx_from) return 0; + + dst->cur_index = src->next_cur_index; + dst->prev_index = src->next_prev_index; + + memcpy(dst->buf_ptrs, src->buf_ptrs, sizeof(src->buf_ptrs)); + memcpy(dst->flipped_ptrs, src->flipped_ptrs, sizeof(src->flipped_ptrs)); + + memset(&dst->buf_ptrs[dst->cur_index], 0, sizeof(AVFrame)); + + return 0; +} + static const int8_t vlcdec_lookup[9][64] = { { 0, }, { -1, 1, }, @@ -205,7 +227,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale) static int decode(MimicContext *ctx, int quality, int num_coeffs, int is_iframe) { - int y, x, plane; + int y, x, plane, cur_row = 0; for(plane = 0; plane < 3; plane++) { const int is_chroma = !!plane; @@ -236,6 +258,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs, int index = (ctx->cur_index+backref)&15; uint8_t *p = ctx->flipped_ptrs[index].data[0]; + ff_thread_await_progress(&ctx->buf_ptrs[index], cur_row, 0); if(p) { p += src - ctx->flipped_ptrs[ctx->prev_index].data[plane]; @@ -246,6 +269,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs, } } } else { + ff_thread_await_progress(&ctx->buf_ptrs[ctx->prev_index], cur_row, 0); ctx->dsp.put_pixels_tab[1][0](dst, src, stride, 8); } src += 8; @@ -253,6 +277,8 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs, } src += (stride - ctx->num_hblocks[plane])<<3; dst += (stride - ctx->num_hblocks[plane])<<3; + + ff_thread_report_progress(&ctx->buf_ptrs[ctx->cur_index], cur_row++, 0); } } @@ -326,14 +352,20 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, } ctx->buf_ptrs[ctx->cur_index].reference = 1; - if(avctx->get_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index])) { + ctx->buf_ptrs[ctx->cur_index].pict_type = is_pframe ? FF_P_TYPE:FF_I_TYPE; + if(ff_thread_get_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index])) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } + ctx->next_prev_index = ctx->cur_index; + ctx->next_cur_index = (ctx->cur_index - 1) & 15; + prepare_avpic(ctx, &ctx->flipped_ptrs[ctx->cur_index], (AVPicture*) &ctx->buf_ptrs[ctx->cur_index]); + ff_thread_finish_setup(avctx); + av_fast_malloc(&ctx->swap_buf, &ctx->swap_buf_size, swap_buf_size + FF_INPUT_BUFFER_PADDING_SIZE); if(!ctx->swap_buf) @@ -345,21 +377,23 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3); if(!decode(ctx, quality, num_coeffs, !is_pframe)) { - avctx->release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); - return -1; + if (avctx->active_thread_type&FF_THREAD_FRAME) + ff_thread_report_progress(&ctx->buf_ptrs[ctx->cur_index], INT_MAX, 0); + else { + ff_thread_release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); + return -1; + } } - ctx->buf_ptrs[ctx->cur_index].pict_type = is_pframe ? FF_P_TYPE:FF_I_TYPE; *(AVFrame*)data = ctx->buf_ptrs[ctx->cur_index]; *data_size = sizeof(AVFrame); - ctx->prev_index = ctx->cur_index; - ctx->cur_index--; - ctx->cur_index &= 15; + ctx->prev_index = ctx->next_prev_index; + ctx->cur_index = ctx->next_cur_index; /* Only release frames that aren't used for backreferences anymore */ if(ctx->buf_ptrs[ctx->cur_index].data[0]) - avctx->release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); + ff_thread_release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); return buf_size; } @@ -370,9 +404,12 @@ static av_cold int mimic_decode_end(AVCodecContext *avctx) int i; av_free(ctx->swap_buf); + + if(avctx->is_copy) return 0; + for(i = 0; i < 16; i++) if(ctx->buf_ptrs[i].data[0]) - avctx->release_buffer(avctx, &ctx->buf_ptrs[i]); + ff_thread_release_buffer(avctx, &ctx->buf_ptrs[i]); free_vlc(&ctx->vlc); return 0; @@ -387,6 +424,7 @@ AVCodec ff_mimic_decoder = { NULL, mimic_decode_end, mimic_decode_frame, - CODEC_CAP_DR1, + CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS, .long_name = NULL_IF_CONFIG_SMALL("Mimic"), + .update_thread_context = ONLY_IF_THREADS_ENABLED(mimic_decode_update_thread_context) }; -- cgit v1.2.3 From d38345878cbb89e4d8d33bd79f47836d4e9cd637 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 29 Mar 2011 07:14:44 -0700 Subject: dfa: protect pointer range checks against overflows. --- libavcodec/dfa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c index 1023197c38..b149791136 100644 --- a/libavcodec/dfa.c +++ b/libavcodec/dfa.c @@ -81,7 +81,7 @@ static int decode_tsw1(uint8_t *frame, int width, int height, v = bytestream_get_le16(&src); offset = (v & 0x1FFF) << 1; count = ((v >> 13) + 2) << 1; - if (frame - offset < frame_start || frame_end - frame < count) + if (frame - frame_start < offset || frame_end - frame < count) return -1; av_memcpy_backptr(frame, offset, count); frame += count; @@ -117,7 +117,7 @@ static int decode_dsw1(uint8_t *frame, int width, int height, v = bytestream_get_le16(&src); offset = (v & 0x1FFF) << 1; count = ((v >> 13) + 2) << 1; - if (frame - offset < frame_start || frame_end - frame < count) + if (frame - frame_start < offset || frame_end - frame < count) return -1; // can't use av_memcpy_backptr() since it can overwrite following pixels for (v = 0; v < count; v++) @@ -157,7 +157,7 @@ static int decode_dds1(uint8_t *frame, int width, int height, v = bytestream_get_le16(&src); offset = (v & 0x1FFF) << 2; count = ((v >> 13) + 2) << 1; - if (frame - offset < frame_start || frame_end - frame < count*2 + width) + if (frame - frame_start < offset || frame_end - frame < count*2 + width) return -1; for (i = 0; i < count; i++) { frame[0] = frame[1] = -- cgit v1.2.3 From 6d9f52b2cd760eacf6cc6b7d694b0b00d991f1de Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 11 Mar 2011 02:49:55 +0000 Subject: ac3: move ff_ac3_bit_alloc_calc_bap to ac3dsp Signed-off-by: Mans Rullgard --- libavcodec/ac3.c | 46 +++++++++++----------------------------------- libavcodec/ac3.h | 19 ------------------- libavcodec/ac3dec.c | 3 ++- libavcodec/ac3dec.h | 2 ++ libavcodec/ac3dsp.c | 27 +++++++++++++++++++++++++++ libavcodec/ac3dsp.h | 19 +++++++++++++++++++ libavcodec/ac3enc.c | 2 +- libavcodec/ac3tab.h | 8 ++++++++ 8 files changed, 70 insertions(+), 56 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ac3.c b/libavcodec/ac3.c index c8659fe6f5..704c6e03a6 100644 --- a/libavcodec/ac3.c +++ b/libavcodec/ac3.c @@ -31,7 +31,7 @@ /** * Starting frequency coefficient bin for each critical band. */ -static const uint8_t band_start_tab[AC3_CRITICAL_BANDS+1] = { +const uint8_t ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, @@ -44,7 +44,7 @@ static const uint8_t band_start_tab[AC3_CRITICAL_BANDS+1] = { /** * Map each frequency coefficient bin to the critical band that contains it. */ -static const uint8_t bin_to_band_tab[253] = { +const uint8_t ff_ac3_bin_to_band_tab[253] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, @@ -70,7 +70,7 @@ static const uint8_t bin_to_band_tab[253] = { }; #else /* CONFIG_HARDCODED_TABLES */ -static uint8_t bin_to_band_tab[253]; +uint8_t ff_ac3_bin_to_band_tab[253]; #endif static inline int calc_lowcomp1(int a, int b0, int b1, int c) @@ -106,10 +106,10 @@ void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd, /* PSD integration */ bin = start; - band = bin_to_band_tab[start]; + band = ff_ac3_bin_to_band_tab[start]; do { int v = psd[bin++]; - int band_end = FFMIN(band_start_tab[band+1], end); + int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end); for (; bin < band_end; bin++) { int max = FFMAX(v, psd[bin]); /* logadd */ @@ -117,7 +117,7 @@ void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd, v = max + ff_ac3_log_add_tab[adr]; } band_psd[band++] = v; - } while (end > band_start_tab[band]); + } while (end > ff_ac3_band_start_tab[band]); } int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd, @@ -132,8 +132,8 @@ int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd, int lowcomp, fastleak, slowleak; /* excitation function */ - band_start = bin_to_band_tab[start]; - band_end = bin_to_band_tab[end-1] + 1; + band_start = ff_ac3_bin_to_band_tab[start]; + band_end = ff_ac3_bin_to_band_tab[end-1] + 1; if (band_start == 0) { lowcomp = 0; @@ -212,30 +212,6 @@ int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd, return 0; } -void ff_ac3_bit_alloc_calc_bap(int16_t *mask, int16_t *psd, int start, int end, - int snr_offset, int floor, - const uint8_t *bap_tab, uint8_t *bap) -{ - int bin, band; - - /* special case, if snr offset is -960, set all bap's to zero */ - if (snr_offset == -960) { - memset(bap, 0, AC3_MAX_COEFS); - return; - } - - bin = start; - band = bin_to_band_tab[start]; - do { - int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor; - int band_end = FFMIN(band_start_tab[band+1], end); - for (; bin < band_end; bin++) { - int address = av_clip((psd[bin] - m) >> 5, 0, 63); - bap[bin] = bap_tab[address]; - } - } while (end > band_start_tab[band++]); -} - /** * Initialize some tables. * note: This function must remain thread safe because it is called by the @@ -244,12 +220,12 @@ void ff_ac3_bit_alloc_calc_bap(int16_t *mask, int16_t *psd, int start, int end, av_cold void ff_ac3_common_init(void) { #if !CONFIG_HARDCODED_TABLES - /* compute bin_to_band_tab from band_start_tab */ + /* compute ff_ac3_bin_to_band_tab from ff_ac3_band_start_tab */ int bin = 0, band; for (band = 0; band < AC3_CRITICAL_BANDS; band++) { - int band_end = band_start_tab[band+1]; + int band_end = ff_ac3_band_start_tab[band+1]; while (bin < band_end) - bin_to_band_tab[bin++] = band; + ff_ac3_bin_to_band_tab[bin++] = band; } #endif /* !CONFIG_HARDCODED_TABLES */ } diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h index 133c5b9a78..4ed8c2523b 100644 --- a/libavcodec/ac3.h +++ b/libavcodec/ac3.h @@ -175,23 +175,4 @@ int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd, uint8_t *dba_lengths, uint8_t *dba_values, int16_t *mask); -/** - * Calculate bit allocation pointers. - * The SNR is the difference between the masking curve and the signal. AC-3 - * uses this value for each frequency bin to allocate bits. The snroffset - * parameter is a global adjustment to the SNR for all bins. - * - * @param[in] mask masking curve - * @param[in] psd signal power for each frequency bin - * @param[in] start starting bin location - * @param[in] end ending bin location - * @param[in] snr_offset SNR adjustment - * @param[in] floor noise floor - * @param[in] bap_tab look-up table for bit allocation pointers - * @param[out] bap bit allocation pointers - */ -void ff_ac3_bit_alloc_calc_bap(int16_t *mask, int16_t *psd, int start, int end, - int snr_offset, int floor, - const uint8_t *bap_tab, uint8_t *bap); - #endif /* AVCODEC_AC3_H */ diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index e08efa104c..396df87fed 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -184,6 +184,7 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx) ff_mdct_init(&s->imdct_512, 9, 1, 1.0); ff_kbd_window_init(s->window, 5.0, 256); dsputil_init(&s->dsp, avctx); + ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); ff_fmt_convert_init(&s->fmt_conv, avctx); av_lfg_init(&s->dith_state, 0); @@ -1213,7 +1214,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk) /* Compute bit allocation */ const uint8_t *bap_tab = s->channel_uses_aht[ch] ? ff_eac3_hebap_tab : ff_ac3_bap_tab; - ff_ac3_bit_alloc_calc_bap(s->mask[ch], s->psd[ch], + s->ac3dsp.bit_alloc_calc_bap(s->mask[ch], s->psd[ch], s->start_freq[ch], s->end_freq[ch], s->snr_offset[ch], s->bit_alloc_params.floor, diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index 9d0ffc313e..3459441bbd 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -52,6 +52,7 @@ #include "libavutil/lfg.h" #include "ac3.h" +#include "ac3dsp.h" #include "get_bits.h" #include "dsputil.h" #include "fft.h" @@ -192,6 +193,7 @@ typedef struct { ///@defgroup opt optimization DSPContext dsp; ///< for optimization + AC3DSPContext ac3dsp; FmtConvertContext fmt_conv; ///< optimized conversion functions float mul_bias; ///< scaling for float_to_int16 conversion ///@} diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 42b44f7db8..77250a351b 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -20,6 +20,7 @@ */ #include "avcodec.h" +#include "ac3.h" #include "ac3dsp.h" static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) @@ -101,6 +102,31 @@ static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len) } while (len > 0); } +static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, + int start, int end, + int snr_offset, int floor, + const uint8_t *bap_tab, uint8_t *bap) +{ + int bin, band; + + /* special case, if snr offset is -960, set all bap's to zero */ + if (snr_offset == -960) { + memset(bap, 0, AC3_MAX_COEFS); + return; + } + + bin = start; + band = ff_ac3_bin_to_band_tab[start]; + do { + int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor; + int band_end = FFMIN(ff_ac3_band_start_tab[band+1], end); + for (; bin < band_end; bin++) { + int address = av_clip((psd[bin] - m) >> 5, 0, 63); + bap[bin] = bap_tab[address]; + } + } while (end > ff_ac3_band_start_tab[band++]); +} + av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) { c->ac3_exponent_min = ac3_exponent_min_c; @@ -108,6 +134,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) c->ac3_lshift_int16 = ac3_lshift_int16_c; c->ac3_rshift_int32 = ac3_rshift_int32_c; c->float_to_fixed24 = float_to_fixed24_c; + c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; if (ARCH_ARM) ff_ac3dsp_init_arm(c, bit_exact); diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index eeaa56cbbf..bf4fc144d7 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -81,6 +81,25 @@ typedef struct AC3DSPContext { * constraints: multiple of 32 greater than zero */ void (*float_to_fixed24)(int32_t *dst, const float *src, unsigned int len); + + /** + * Calculate bit allocation pointers. + * The SNR is the difference between the masking curve and the signal. AC-3 + * uses this value for each frequency bin to allocate bits. The snroffset + * parameter is a global adjustment to the SNR for all bins. + * + * @param[in] mask masking curve + * @param[in] psd signal power for each frequency bin + * @param[in] start starting bin location + * @param[in] end ending bin location + * @param[in] snr_offset SNR adjustment + * @param[in] floor noise floor + * @param[in] bap_tab look-up table for bit allocation pointers + * @param[out] bap bit allocation pointers + */ + void (*bit_alloc_calc_bap)(int16_t *mask, int16_t *psd, int start, int end, + int snr_offset, int floor, + const uint8_t *bap_tab, uint8_t *bap); } AC3DSPContext; void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact); diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index c641400a2d..e62ded62c8 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -1047,7 +1047,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset) whenever we reuse exponents. */ block = s->blocks[blk].exp_ref_block[ch]; if (s->exp_strategy[ch][blk] != EXP_REUSE) { - ff_ac3_bit_alloc_calc_bap(block->mask[ch], block->psd[ch], 0, + s->ac3dsp.bit_alloc_calc_bap(block->mask[ch], block->psd[ch], 0, s->nb_coefs[ch], snr_offset, s->bit_alloc.floor, ff_ac3_bap_tab, block->bap[ch]); diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h index e3fda9715a..292ce0d32f 100644 --- a/libavcodec/ac3tab.h +++ b/libavcodec/ac3tab.h @@ -25,6 +25,12 @@ #include "libavutil/common.h" #include "ac3.h" +#if CONFIG_HARDCODED_TABLES +# define HCONST const +#else +# define HCONST +#endif + extern const uint16_t ff_ac3_frame_size_tab[38][3]; extern const uint8_t ff_ac3_channels_tab[8]; extern const uint16_t ff_ac3_channel_layout_tab[8]; @@ -44,6 +50,8 @@ extern const uint16_t ff_ac3_db_per_bit_tab[4]; extern const int16_t ff_ac3_floor_tab[8]; extern const uint16_t ff_ac3_fast_gain_tab[8]; extern const uint16_t ff_eac3_default_chmap[8]; +extern const uint8_t ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1]; +extern HCONST uint8_t ff_ac3_bin_to_band_tab[253]; /** Custom channel map locations bitmask * Other channels described in documentation: -- cgit v1.2.3 From d782bca4156cfd91cd3d9a7d4eda6ff580915170 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 29 Mar 2011 02:30:13 +0100 Subject: ac3enc: NEON optimised float_to_fixed24 Signed-off-by: Mans Rullgard --- libavcodec/arm/ac3dsp_init_arm.c | 2 ++ libavcodec/arm/ac3dsp_neon.S | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'libavcodec') diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index 6874546c7c..03200e64cd 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -27,6 +27,7 @@ void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs); int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len); void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); +void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { @@ -35,5 +36,6 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon; c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon; c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon; + c->float_to_fixed24 = ff_float_to_fixed24_neon; } } diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S index 43d4e612af..d33d978d7c 100644 --- a/libavcodec/arm/ac3dsp_neon.S +++ b/libavcodec/arm/ac3dsp_neon.S @@ -78,3 +78,17 @@ function ff_ac3_rshift_int32_neon, export=1 bgt 1b bx lr endfunc + +function ff_float_to_fixed24_neon, export=1 +1: vld1.32 {q0-q1}, [r1,:128]! + vcvt.s32.f32 q0, q0, #24 + vld1.32 {q2-q3}, [r1,:128]! + vcvt.s32.f32 q1, q1, #24 + vcvt.s32.f32 q2, q2, #24 + vst1.32 {q0-q1}, [r0,:128]! + vcvt.s32.f32 q3, q3, #24 + vst1.32 {q2-q3}, [r0,:128]! + subs r2, r2, #16 + bgt 1b + bx lr +endfunc -- cgit v1.2.3 From 6f718471eaae9d25c4fd2dd8d8defa412e2d041c Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sun, 13 Mar 2011 16:22:07 +0000 Subject: ac3enc: move mant*_cnt and qmant*_ptr out of AC3EncodeContext These fields are only used in quantize_mantissas() and reset on each call, no need to store them in the main context. Signed-off-by: Mans Rullgard --- libavcodec/ac3enc.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index e62ded62c8..fc591d9716 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -178,10 +178,6 @@ typedef struct AC3EncodeContext { int frame_bits; ///< all frame bits except exponents and mantissas int exponent_bits; ///< number of bits used for exponents - /* mantissa encoding */ - int mant1_cnt, mant2_cnt, mant4_cnt; ///< mantissa counts for bap=1,2,4 - uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; ///< mantissa pointers for bap=1,2,4 - SampleType **planar_samples; uint8_t *bap_buffer; uint8_t *bap1_buffer; @@ -199,6 +195,10 @@ typedef struct AC3EncodeContext { DECLARE_ALIGNED(16, SampleType, windowed_samples)[AC3_WINDOW_SIZE]; } AC3EncodeContext; +typedef struct AC3Mant { + uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; ///< mantissa pointers for bap=1,2,4 + int mant1_cnt, mant2_cnt, mant4_cnt; ///< mantissa counts for bap=1,2,4 +} AC3Mant; #define CMIXLEV_NUM_OPTIONS 3 static const float cmixlev_options[CMIXLEV_NUM_OPTIONS] = { @@ -1248,7 +1248,7 @@ static inline int asym_quant(int c, int e, int qbits) /** * Quantize a set of mantissas for a single channel in a single block. */ -static void quantize_mantissas_blk_ch(AC3EncodeContext *s, int32_t *fixed_coef, +static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef, uint8_t *exp, uint8_t *bap, uint16_t *qmant, int n) { @@ -1350,12 +1350,11 @@ static void quantize_mantissas(AC3EncodeContext *s) for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; AC3Block *ref_block; - s->mant1_cnt = s->mant2_cnt = s->mant4_cnt = 0; - s->qmant1_ptr = s->qmant2_ptr = s->qmant4_ptr = NULL; + AC3Mant m = { 0 }; for (ch = 0; ch < s->channels; ch++) { ref_block = block->exp_ref_block[ch]; - quantize_mantissas_blk_ch(s, block->fixed_coef[ch], + quantize_mantissas_blk_ch(&m, block->fixed_coef[ch], ref_block->exp[ch], ref_block->bap[ch], block->qmant[ch], s->nb_coefs[ch]); } -- cgit v1.2.3 From 52fd16a264d1eb14b1a84b7b38041da1756fb216 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 11 Mar 2011 17:16:27 +0000 Subject: ac3enc: move compute_mantissa_size() to ac3dsp Signed-off-by: Mans Rullgard --- libavcodec/ac3dsp.c | 23 +++++++++++++++++++++++ libavcodec/ac3dsp.h | 5 +++++ libavcodec/ac3enc.c | 27 +-------------------------- 3 files changed, 29 insertions(+), 26 deletions(-) (limited to 'libavcodec') diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 77250a351b..cd4d30b8f2 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -127,6 +127,28 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, } while (end > ff_ac3_band_start_tab[band++]); } +static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap, + int nb_coefs) +{ + int bits, b, i; + + bits = 0; + for (i = 0; i < nb_coefs; i++) { + b = bap[i]; + if (b <= 4) { + // bap=1 to bap=4 will be counted in compute_mantissa_size_final + mant_cnt[b]++; + } else if (b <= 13) { + // bap=5 to bap=13 use (bap-1) bits + bits += b - 1; + } else { + // bap=14 uses 14 bits and bap=15 uses 16 bits + bits += (b == 14) ? 14 : 16; + } + } + return bits; +} + av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) { c->ac3_exponent_min = ac3_exponent_min_c; @@ -135,6 +157,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ac3_rshift_int32_c; c->float_to_fixed24 = float_to_fixed24_c; c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; + c->compute_mantissa_size = ac3_compute_mantissa_size_c; if (ARCH_ARM) ff_ac3dsp_init_arm(c, bit_exact); diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index bf4fc144d7..aa605a8b29 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -100,6 +100,11 @@ typedef struct AC3DSPContext { void (*bit_alloc_calc_bap)(int16_t *mask, int16_t *psd, int start, int end, int snr_offset, int floor, const uint8_t *bap_tab, uint8_t *bap); + + /** + * Calculate the number of bits needed to encode a set of mantissas. + */ + int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs); } AC3DSPContext; void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact); diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index fc591d9716..18e4dae26a 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -927,31 +927,6 @@ static void count_frame_bits(AC3EncodeContext *s) } -/** - * Calculate the number of bits needed to encode a set of mantissas. - */ -static int compute_mantissa_size(int mant_cnt[5], uint8_t *bap, int nb_coefs) -{ - int bits, b, i; - - bits = 0; - for (i = 0; i < nb_coefs; i++) { - b = bap[i]; - if (b <= 4) { - // bap=1 to bap=4 will be counted in compute_mantissa_size_final - mant_cnt[b]++; - } else if (b <= 13) { - // bap=5 to bap=13 use (bap-1) bits - bits += b - 1; - } else { - // bap=14 uses 14 bits and bap=15 uses 16 bits - bits += (b == 14) ? 14 : 16; - } - } - return bits; -} - - /** * Finalize the mantissa bit count by adding in the grouped mantissas. */ @@ -1052,7 +1027,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset) s->bit_alloc.floor, ff_ac3_bap_tab, block->bap[ch]); } - mantissa_bits += compute_mantissa_size(mant_cnt, block->bap[ch], s->nb_coefs[ch]); + mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt, block->bap[ch], s->nb_coefs[ch]); } mantissa_bits += compute_mantissa_size_final(mant_cnt); } -- cgit v1.2.3