1 files changed, 109 insertions, 14 deletions
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index a0886d5d47..dc146a4803 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -35,7 +35,7 @@
 //#undef NDEBUG
 #include <assert.h>
 
-static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
+static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
     MpegEncContext *s = &h->s;
 
@@ -92,7 +92,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
     const int index8= scan8[n];
     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
@@ -147,7 +147,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
     if(n==0){
         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
@@ -182,7 +182,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
     if(n==0){
         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
@@ -213,22 +213,117 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
     pred_motion(h, n, 2, list, ref, mx, my);
 }
 
-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
-    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
-    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
+#define FIX_MV_MBAFF(type, refn, mvn, idx)\
+    if(FRAME_MBAFF){\
+        if(MB_FIELD){\
+            if(!IS_INTERLACED(type)){\
+                refn <<= 1;\
+                AV_COPY32(mvbuf[idx], mvn);\
+                mvbuf[idx][1] /= 2;\
+                mvn = mvbuf[idx];\
+            }\
+        }else{\
+            if(IS_INTERLACED(type)){\
+                refn >>= 1;\
+                AV_COPY32(mvbuf[idx], mvn);\
+                mvbuf[idx][1] <<= 1;\
+                mvn = mvbuf[idx];\
+            }\
+        }\
+    }
 
-    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+static av_always_inline void pred_pskip_motion(H264Context * const h){
+    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
+    DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
+    MpegEncContext * const s = &h->s;
+    int8_t *ref = s->current_picture.ref_index[0];
+    int16_t (*mv)[2] = s->current_picture.motion_val[0];
+    int top_ref, left_ref, diagonal_ref, match_count, mx, my;
+    const int16_t *A, *B, *C;
+    int b_stride = h->b_stride;
+
+    fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+
+    /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
+     * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
+     * faster this way.  Is there a way to avoid this duplication?
+     */
+    if(USES_LIST(h->left_type[LTOP], 0)){
+        left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
+        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
+        FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
+        if(!(left_ref | AV_RN32A(A))){
+            goto zeromv;
+        }
+    }else if(h->left_type[LTOP]){
+        left_ref = LIST_NOT_USED;
+        A = zeromv;
+    }else{
+        goto zeromv;
+    }
 
-    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
-       || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
-       || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
+    if(USES_LIST(h->top_type, 0)){
+        top_ref = ref[4*h->top_mb_xy + 2];
+        B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
+        FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
+        if(!(top_ref | AV_RN32A(B))){
+            goto zeromv;
+        }
+    }else if(h->top_type){
+        top_ref = LIST_NOT_USED;
+        B = zeromv;
+    }else{
+        goto zeromv;
+    }
+
+    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 
-        *mx = *my = 0;
-        return;
+    if(USES_LIST(h->topright_type, 0)){
+        diagonal_ref = ref[4*h->topright_mb_xy + 2];
+        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
+        FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
+    }else if(h->topright_type){
+        diagonal_ref = LIST_NOT_USED;
+        C = zeromv;
+    }else{
+        if(USES_LIST(h->topleft_type, 0)){
+            diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
+            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
+            FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
+        }else if(h->topleft_type){
+            diagonal_ref = LIST_NOT_USED;
+            C = zeromv;
+        }else{
+            diagonal_ref = PART_NOT_AVAILABLE;
+            C = zeromv;
+        }
     }
 
-    pred_motion(h, 0, 4, 0, 0, mx, my);
+    match_count= !diagonal_ref + !top_ref + !left_ref;
+    tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
+    if(match_count > 1){
+        mx = mid_pred(A[0], B[0], C[0]);
+        my = mid_pred(A[1], B[1], C[1]);
+    }else if(match_count==1){
+        if(!left_ref){
+            mx = A[0];
+            my = A[1];
+        }else if(!top_ref){
+            mx = B[0];
+            my = B[1];
+        }else{
+            mx = C[0];
+            my = C[1];
+        }
+    }else{
+        mx = mid_pred(A[0], B[0], C[0]);
+        my = mid_pred(A[1], B[1], C[1]);
+    }
 
+    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+    return;
+zeromv:
+    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
     return;
 }