Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2014-04-11 23:02:08 +0400
committerMichael Niedermayer <michaelni@gmx.at>2014-04-20 20:25:36 +0400
commit319235c67c59d6abaa78d5af57121ab9816f937d (patch)
tree4025d69a59c3e70b8cad858d486983b747671236 /libavcodec/vc1dec.c
parentde9cd5884822375d492ff4dcc98e55317a66c196 (diff)
vc1dsp: introduce cases for 8x8 and 16x16
This allows further unrolling the DSP implementation where possible. x86 and ARM DSP modified by simply moving the multiple calls from vc1dec to the DSP code. Decoding improvements should only occurs because of the compiler actually able to unroll more. Decoding time: ~8.80s -> 8.64s (ie around 2%) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/vc1dec.c')
-rw-r--r--libavcodec/vc1dec.c16
1 files changed, 4 insertions, 12 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 1a45e40d88..d5e8a37d78 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -510,11 +510,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
- srcY += s->linesize * 8;
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd);
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+ v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0] , srcY , s->linesize, v->rnd);
} else { // hpel mc - always used for luma
dxy = (my & 2) | ((mx & 2) >> 1);
if (!v->rnd)
@@ -728,9 +724,9 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
if (avg)
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+ v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
else
- v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+ v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
} else { // hpel mc - always used for luma
dxy = (my & 2) | ((mx & 2) >> 1);
if (!v->rnd)
@@ -2039,11 +2035,7 @@ static void vc1_interp_mc(VC1Context *v)
if (s->mspel) {
dxy = ((my & 3) << 2) | (mx & 3);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off , srcY , s->linesize, v->rnd);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8, srcY + 8, s->linesize, v->rnd);
- srcY += s->linesize * 8;
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize , srcY , s->linesize, v->rnd);
- v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+ v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0] + off , srcY , s->linesize, v->rnd);
} else { // hpel mc
dxy = (my & 2) | ((mx & 2) >> 1);