From e470ef7641a41b6acd0765b7031ad03c041ec186 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 27 Jan 2010 11:14:29 +0000 Subject: Use table to speedup access to non_zero_count in MBAFF with differing interlacing. ~4 cpu cycles speedup Originally committed as revision 21474 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/h264_loopfilter.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'libavcodec/h264_loopfilter.c') diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 28557531e4..5d2f62f2c5 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -632,6 +632,16 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint *(uint64_t*)&bS[0]= *(uint64_t*)&bS[4]= 0x0004000400040004ULL; else { + static const uint8_t offset[2][2][8]={ + { + {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, + {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, + },{ + {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, + {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, + } + }; + const uint8_t *off= offset[MB_FIELD][mb_y&1]; for( i = 0; i < 8; i++ ) { int j= MB_FIELD ? i>>2 : i&1; int mbn_xy = h->left_mb_xy[j]; @@ -644,7 +654,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) : - h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8])); + h->non_zero_count[mbn_xy][ off[i] ])); } } } -- cgit v1.2.3