diff options
Diffstat (limited to 'libavcodec/h264_mvpred.h')
-rw-r--r-- | libavcodec/h264_mvpred.h | 123 |
1 files changed, 109 insertions, 14 deletions
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h index a0886d5d47..dc146a4803 100644 --- a/libavcodec/h264_mvpred.h +++ b/libavcodec/h264_mvpred.h @@ -35,7 +35,7 @@ //#undef NDEBUG #include <assert.h> -static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ +static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; MpegEncContext *s = &h->s; @@ -92,7 +92,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ +static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ const int index8= scan8[n]; const int top_ref= h->ref_cache[list][ index8 - 8 ]; const int left_ref= h->ref_cache[list][ index8 - 1 ]; @@ -147,7 +147,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ +static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ if(n==0){ const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; @@ -182,7 +182,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ +static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ if(n==0){ const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; @@ -213,22 +213,117 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int pred_motion(h, n, 2, list, ref, mx, my); } -static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ - const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; - const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; +#define FIX_MV_MBAFF(type, refn, mvn, idx)\ + if(FRAME_MBAFF){\ + if(MB_FIELD){\ + if(!IS_INTERLACED(type)){\ + refn <<= 1;\ + AV_COPY32(mvbuf[idx], mvn);\ + mvbuf[idx][1] /= 2;\ + mvn = mvbuf[idx];\ + }\ + }else{\ + if(IS_INTERLACED(type)){\ + refn >>= 1;\ + AV_COPY32(mvbuf[idx], mvn);\ + mvbuf[idx][1] <<= 1;\ + mvn = mvbuf[idx];\ + }\ + }\ + } - tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); +static av_always_inline void pred_pskip_motion(H264Context * const h){ + DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0}; + DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2]; + MpegEncContext * const s = &h->s; + int8_t *ref = s->current_picture.ref_index[0]; + int16_t (*mv)[2] = s->current_picture.motion_val[0]; + int top_ref, left_ref, diagonal_ref, match_count, mx, my; + const int16_t *A, *B, *C; + int b_stride = h->b_stride; + + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + + /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. + * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's + * faster this way. Is there a way to avoid this duplication? + */ + if(USES_LIST(h->left_type[LTOP], 0)){ + left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)]; + A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]]; + FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0); + if(!(left_ref | AV_RN32A(A))){ + goto zeromv; + } + }else if(h->left_type[LTOP]){ + left_ref = LIST_NOT_USED; + A = zeromv; + }else{ + goto zeromv; + } - if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE - || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ])) - || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){ + if(USES_LIST(h->top_type, 0)){ + top_ref = ref[4*h->top_mb_xy + 2]; + B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride]; + FIX_MV_MBAFF(h->top_type, top_ref, B, 1); + if(!(top_ref | AV_RN32A(B))){ + goto zeromv; + } + }else if(h->top_type){ + top_ref = LIST_NOT_USED; + B = zeromv; + }else{ + goto zeromv; + } + + tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); - *mx = *my = 0; - return; + if(USES_LIST(h->topright_type, 0)){ + diagonal_ref = ref[4*h->topright_mb_xy + 2]; + C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride]; + FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2); + }else if(h->topright_type){ + diagonal_ref = LIST_NOT_USED; + C = zeromv; + }else{ + if(USES_LIST(h->topleft_type, 0)){ + diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)]; + C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)]; + FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2); + }else if(h->topleft_type){ + diagonal_ref = LIST_NOT_USED; + C = zeromv; + }else{ + diagonal_ref = PART_NOT_AVAILABLE; + C = zeromv; + } } - pred_motion(h, 0, 4, 0, 0, mx, my); + match_count= !diagonal_ref + !top_ref + !left_ref; + tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count); + if(match_count > 1){ + mx = mid_pred(A[0], B[0], C[0]); + my = mid_pred(A[1], B[1], C[1]); + }else if(match_count==1){ + if(!left_ref){ + mx = A[0]; + my = A[1]; + }else if(!top_ref){ + mx = B[0]; + my = B[1]; + }else{ + mx = C[0]; + my = C[1]; + } + }else{ + mx = mid_pred(A[0], B[0], C[0]); + my = mid_pred(A[1], B[1], C[1]); + } + fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + return; +zeromv: + fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); return; } |