From 92fb52d9060a146f31da6f07ea9ce7867294e153 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 6 Oct 2011 08:03:38 -0700 Subject: prores: extract idct into its own dspcontext and merge with put_pixels. --- libavcodec/Makefile | 2 +- libavcodec/dsputil.c | 65 +++++++++++++++++++++---------------- libavcodec/dsputil.h | 2 ++ libavcodec/proresdec.c | 84 +++++++++++------------------------------------- libavcodec/proresdsp.c | 61 +++++++++++++++++++++++++++++++++++ libavcodec/proresdsp.h | 38 ++++++++++++++++++++++ libavcodec/simple_idct.c | 17 ++++++++++ libavcodec/simple_idct.h | 6 ++++ 8 files changed, 180 insertions(+), 95 deletions(-) create mode 100644 libavcodec/proresdsp.c create mode 100644 libavcodec/proresdsp.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3c4e2f84b9..b7b512478c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER) += png.o pngdec.o OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o OBJS-$(CONFIG_PPM_DECODER) += pnmdec.o pnm.o OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o -OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o +OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o OBJS-$(CONFIG_PTX_DECODER) += ptx.o OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o celp_math.o \ celp_filters.o acelp_vectors.o \ diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 967406eedf..182063ca2b 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s } } +void ff_init_scantable_permutation(uint8_t *idct_permutation, + int idct_permutation_type) +{ + int i; + + switch(idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + case FF_PARTTRANS_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); + break; + case FF_SSE2_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; + break; + default: + av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); + } +} + static int pix_sum_c(uint8_t * pix, int line_size) { int s, i, j; @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; } - switch(c->idct_permutation_type){ - case FF_NO_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= i; - break; - case FF_LIBMPEG2_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); - break; - case FF_SIMPLE_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= simple_mmx_permutation[i]; - break; - case FF_TRANSPOSE_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= ((i&7)<<3) | (i>>3); - break; - case FF_PARTTRANS_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); - break; - case FF_SSE2_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; - break; - default: - av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); - } + ff_init_scantable_permutation(c->idct_permutation, + c->idct_permutation_type); } diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 73830f8190..bef2cdd4e8 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -202,6 +202,8 @@ typedef struct ScanTable{ } ScanTable; void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); +void ff_init_scantable_permutation(uint8_t *idct_permutation, + int idct_permutation_type); #define EMULATED_EDGE(depth) \ void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\ diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index c70d145345..cbbd068a10 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -34,17 +34,11 @@ #include "libavutil/intmath.h" #include "avcodec.h" -#include "dsputil.h" +#include "proresdsp.h" #include "get_bits.h" -#define BITS_PER_SAMPLE 10 ///< output precision of that decoder -#define BIAS (1 << (BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels - - typedef struct { - DSPContext dsp; + ProresDSPContext dsp; AVFrame picture; ScanTable scantable; int scantable_type; ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced @@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx) avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format - avctx->bits_per_raw_sample = BITS_PER_SAMPLE; - dsputil_init(&ctx->dsp, avctx); + avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE; + ff_proresdsp_init(&ctx->dsp); avctx->coded_frame = &ctx->picture; avcodec_get_frame_defaults(&ctx->picture); @@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out, } -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX)) - -/** - * Add bias value, clamp and output pixels of a slice - */ -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride, - int mbs_per_slice, int blocks_per_mb) -{ - int mb, x, y, src_offset, dst_offset; - const DCTELEM *src1, *src2; - uint16_t *dst1, *dst2; - - src1 = in; - src2 = in + (blocks_per_mb << 5); - dst1 = out; - dst2 = out + (stride << 3); - - for (mb = 0; mb < mbs_per_slice; mb++) { - for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) { - for (x = 0; x < 8; x++) { - src_offset = (y << 3) + x; - - dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]); - dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]); - - if (blocks_per_mb > 2) { - dst1[dst_offset + x + 8] = - CLIP_AND_BIAS(src1[src_offset + 64]); - dst2[dst_offset + x + 8] = - CLIP_AND_BIAS(src2[src_offset + 64]); - } - } - } - - src1 += blocks_per_mb << 6; - src2 += blocks_per_mb << 6; - dst1 += blocks_per_mb << 2; - dst2 += blocks_per_mb << 2; - } -} - - /** * Decode a slice plane (luma or chroma). */ @@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, { GetBitContext gb; DCTELEM *block_ptr; - int i, blk_num, blocks_per_slice; + int mb_num, blocks_per_slice; blocks_per_slice = mbs_per_slice * blocks_per_mb; @@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, /* inverse quantization, inverse transform and output */ block_ptr = ctx->blocks; - for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) { - /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1 - * and the input of the inverse transform should be scaled by 2 - * in order to avoid rounding errors. - * Due to the fact the existing Libav transforms are incompatible with - * that input I temporally introduced the coarse solution below... */ - for (i = 0; i < 64; i++) - block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2; - - ctx->dsp.idct(block_ptr); + for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) { + ctx->dsp.idct_put(out_ptr, linesize, block_ptr, qmat); + block_ptr += 64; + if (blocks_per_mb > 2) { + ctx->dsp.idct_put(out_ptr + 8, linesize, block_ptr, qmat); + block_ptr += 64; + } + ctx->dsp.idct_put(out_ptr + linesize * 4, linesize, block_ptr, qmat); + block_ptr += 64; + if (blocks_per_mb > 2) { + ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat); + block_ptr += 64; + } } - - put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice, - blocks_per_mb); } diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c new file mode 100644 index 0000000000..3038176003 --- /dev/null +++ b/libavcodec/proresdsp.c @@ -0,0 +1,61 @@ +/* + * Apple ProRes compatible decoder + * + * Copyright (c) 2010-2011 Maxim Poliakovski + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "proresdsp.h" +#include "simple_idct.h" + +#define BIAS (1 << (PRORES_BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones +#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels +#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels + +#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX)) + +/** + * Add bias value, clamp and output pixels of a slice + */ +static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in) +{ + int x, y, src_offset, dst_offset; + + for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) { + for (x = 0; x < 8; x++) { + src_offset = (y << 3) + x; + + dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]); + } + } +} + +static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat) +{ + ff_prores_idct(block, qmat); + put_pixels(out, linesize >> 1, block); +} + +void ff_proresdsp_init(ProresDSPContext *dsp) +{ + dsp->idct_put = prores_idct_put_c; + dsp->idct_permutation_type = FF_NO_IDCT_PERM; + + ff_init_scantable_permutation(dsp->idct_permutation, + dsp->idct_permutation_type); +} diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h new file mode 100644 index 0000000000..18d6bf5830 --- /dev/null +++ b/libavcodec/proresdsp.h @@ -0,0 +1,38 @@ +/* + * Apple ProRes compatible decoder + * + * Copyright (c) 2010-2011 Maxim Poliakovski + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PRORESDSP_H +#define AVCODEC_PRORESDSP_H + +#include "dsputil.h" + +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder + +typedef struct { + int idct_permutation_type; + uint8_t idct_permutation[64]; + void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat); +} ProresDSPContext; + +void ff_proresdsp_init(ProresDSPContext *dsp); + +#endif /* AVCODEC_PRORESDSP_H */ diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index b62658bdb3..c6cd9080df 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block) idct4col_add(dest + i, line_size, block + i); } } + +void ff_prores_idct(DCTELEM *block, const int16_t *qmat) +{ + int i; + + for (i = 0; i < 64; i++) + block[i] *= qmat[i]; + + for (i = 0; i < 8; i++) + idctRowCondDC_10(block + i*8); + + for (i = 0; i < 64; i++) + block[i] >>= 2; + + for (i = 0; i < 8; i++) + idctSparseCol_10(block + i); +} diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index a33eb964ce..6e22158b0b 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block); void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block); void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block); void ff_simple_idct_10(DCTELEM *block); +/** + * Special version of ff_simple_idct_10() which does dequantization + * and scales by a factor of 2 more between the two IDCTs to account + * for larger scale of input coefficients. + */ +void ff_prores_idct(DCTELEM *block, const int16_t *qmat); void ff_simple_idct_mmx(int16_t *block); void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block); -- cgit v1.2.3