From 4320a309ce10a7eec93aef239a0776a33b1a5a34 Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Mon, 27 Jun 2011 13:07:26 -0700 Subject: H.264: make filter_mb_fast support the case of unavailable top mb Significantly faster deblocking in streams with lots of slices. --- libavcodec/h264_loopfilter.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'libavcodec/h264_loopfilter.c') diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index bdfbce79f0..1575b1b3d9 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -215,19 +215,20 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { MpegEncContext * const s = &h->s; int mb_xy; - int mb_type, left_type; + int mb_type, left_type, top_type; int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); int chroma444 = CHROMA444; mb_xy = h->mb_xy; - if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { + if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); return; } assert(!FRAME_MBAFF); left_type= h->left_type[0]; + top_type= h->top_type; mb_type = s->current_picture.mb_type[mb_xy]; qp = s->current_picture.qscale_table[mb_xy]; @@ -253,13 +254,17 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); if( IS_8x8DCT(mb_type) ) { filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); - filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); + if(top_type){ + filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); + } filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); } else { filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); - filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); + if(top_type){ + filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); + } filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); @@ -273,8 +278,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, if( IS_8x8DCT(mb_type) ) { filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h); filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h); - filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); - filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + if(top_type){ + filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + } filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h); filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h); } else { @@ -284,8 +291,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h); filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h); filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h); - filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); - filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + if(top_type){ + filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + } filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h); filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h); filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h); @@ -300,9 +309,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, } filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + if(top_type){ + filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + } filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); } } @@ -326,7 +337,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, } if( IS_INTRA(left_type) ) AV_WN64A(bS[0][0], 0x0004000400040004ULL); - if( IS_INTRA(h->top_type) ) + if( IS_INTRA(top_type) ) AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL); #define FILTER(hv,dir,edge)\ @@ -345,16 +356,19 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, if(left_type) FILTER(v,0,0); if( edges == 1 ) { - FILTER(h,1,0); + if(top_type) + FILTER(h,1,0); } else if( IS_8x8DCT(mb_type) ) { FILTER(v,0,2); - FILTER(h,1,0); + if(top_type) + FILTER(h,1,0); FILTER(h,1,2); } else { FILTER(v,0,1); FILTER(v,0,2); FILTER(v,0,3); - FILTER(h,1,0); + if(top_type) + FILTER(h,1,0); FILTER(h,1,1); FILTER(h,1,2); FILTER(h,1,3); -- cgit v1.2.3 From 556f8a066cb33241bf29e85d7e24c9acf7ea9043 Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Wed, 29 Jun 2011 15:38:39 -0700 Subject: H.264: template left MB handling Faster H.264 decoding with ALLOW_INTERLACE off. --- libavcodec/h264_loopfilter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'libavcodec/h264_loopfilter.c') diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 1575b1b3d9..e6b6141f13 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -227,7 +227,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, return; } assert(!FRAME_MBAFF); - left_type= h->left_type[0]; + left_type= h->left_type[LTOP]; top_type= h->top_type; mb_type = s->current_picture.mb_type[mb_xy]; @@ -329,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, AV_WN64A(bS[1][2], 0x0002000200020002ULL); } else { int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; - int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; + int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, @@ -411,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u int edge; int chroma_qp_avg[2]; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; - const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; + const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; // how often to recheck mv-based bS when iterating between edges static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, @@ -647,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint if (FRAME_MBAFF // and current and left pair do not have the same interlaced type - && IS_INTERLACED(mb_type^h->left_type[0]) + && IS_INTERLACED(mb_type^h->left_type[LTOP]) // and left mb is in available to us - && h->left_type[0]) { + && h->left_type[LTOP]) { /* First vertical edge is different in MBAFF frames * There are 8 different bS to compute and 2 different Qp */ @@ -677,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint const uint8_t *off= offset[MB_FIELD][mb_y&1]; for( i = 0; i < 8; i++ ) { int j= MB_FIELD ? i>>2 : i&1; - int mbn_xy = h->left_mb_xy[j]; - int mbn_type= h->left_type[j]; + int mbn_xy = h->left_mb_xy[LEFT(j)]; + int mbn_type= h->left_type[LEFT(j)]; if( IS_INTRA( mbn_type ) ) bS[i] = 4; -- cgit v1.2.3