avutil/mips: refactor msa load and store macros.

Replace STnxm_UB and LDnxm_SH with new macros ST_{H/W/D}{1/2/4/8}. The old macros are difficult to use because they don't follow the same parameter passing rules. Changing details as following: 1. remove LD4x4_SH. 2. replace ST2x4_UB with ST_H4. 3. replace ST4x2_UB with ST_W2. 4. replace ST4x4_UB with ST_W4. 5. replace ST4x8_UB with ST_W8. 6. replace ST6x4_UB with ST_W2 and ST_H2. 7. replace ST8x1_UB with ST_D1. 8. replace ST8x2_UB with ST_D2. 9. replace ST8x4_UB with ST_D4. 10. replace ST8x8_UB with ST_D8. 11. replace ST12x4_UB with ST_D4 and ST_W4. Examples of new macro: ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) ST_H4 store four half-word elements in vector 'in' to pdst with stride. About the macro name: 1) 'ST' means store operation. 2) 'H/W/D' means type of vector element is 'half-word/word/double-word'. 3) Number '1/2/4/8' means how many elements will be stored. About the macro parameter: 1) 'in0, in1...' 128-bits vector. 2) 'idx0, idx1...' elements index. 3) 'pdst' destination pointer to store to 4) 'stride' stride of each store operation. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: Shiyou Yin <yinshiyou-hf@loongson.cn> 2019-07-17 12:35:00 +0300
committer: Michael Niedermayer <michael@niedermayer.cc> 2019-07-19 02:23:23 +0300
commit: 153c60752558369b98dce0b7a0ca7acc687fa630 (patch)
tree: b1dc51fc656ac92b1ac3f377ee75c6808bec0369 /libavcodec/mips/hevcpred_msa.c
parent: 00ed04d6149691a9abf486b2f88172fd6341d801 (diff)
1 files changed, 18 insertions, 27 deletions
diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
index 963c64c861..b8df089e0c 100644
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -589,7 +589,7 @@ static void hevc_intra_pred_plane_4x4_msa(const uint8_t *src_top,
     PCKEV_D2_SH(res1, res0, res3, res2, res0, res1);
     SRARI_H2_SH(res0, res1, 3);
     src_vec0 = __msa_pckev_b((v16i8) res1, (v16i8) res0);
-    ST4x4_UB(src_vec0, src_vec0, 0, 1, 2, 3, dst, stride);
+    ST_W4(src_vec0, 0, 1, 2, 3, dst, stride);
 }
 
 static void hevc_intra_pred_plane_8x8_msa(const uint8_t *src_top,
@@ -656,7 +656,8 @@ static void hevc_intra_pred_plane_8x8_msa(const uint8_t *src_top,
     PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
                 src_vec0, src_vec1, src_vec2, src_vec3);
 
-    ST8x8_UB(src_vec0, src_vec1, src_vec2, src_vec3, dst, stride);
+    ST_D8(src_vec0, src_vec1, src_vec2, src_vec3, 0, 1, 0, 1,
+          0, 1, 0, 1, dst, stride);
 }
 
 static void hevc_intra_pred_plane_16x16_msa(const uint8_t *src_top,
@@ -1007,7 +1008,7 @@ static void hevc_intra_pred_angular_upper_4width_msa(const uint8_t *src_top,
 
     SRARI_H2_SH(diff1, diff3, 5);
     dst_val0 = __msa_pckev_b((v16i8) diff3, (v16i8) diff1);
-    ST4x4_UB(dst_val0, dst_val0, 0, 1, 2, 3, dst, stride);
+    ST_W4(dst_val0, 0, 1, 2, 3, dst, stride);
 }
 
 static void hevc_intra_pred_angular_upper_8width_msa(const uint8_t *src_top,
@@ -1104,7 +1105,7 @@ static void hevc_intra_pred_angular_upper_8width_msa(const uint8_t *src_top,
 
         SRARI_H4_SH(diff1, diff3, diff5, diff7, 5);
         PCKEV_B2_UB(diff3, diff1, diff7, diff5, dst_val0, dst_val1);
-        ST8x4_UB(dst_val0, dst_val1, dst, stride);
+        ST_D4(dst_val0, dst_val1, 0, 1, 0, 1, dst, stride);
         dst += (4 * stride);
     }
 }
@@ -1425,9 +1426,8 @@ static void hevc_intra_pred_angular_lower_4width_msa(const uint8_t *src_top,
     dst_val0 = __msa_pckev_b((v16i8) diff2, (v16i8) diff2);
     dst_val1 = __msa_pckod_b((v16i8) diff2, (v16i8) diff2);
 
-    ST4x2_UB(dst_val0, dst, stride);
-    dst += (2 * stride);
-    ST4x2_UB(dst_val1, dst, stride);
+    ST_W2(dst_val0, 0, 1, dst, stride);
+    ST_W2(dst_val1, 0, 1, dst + 2 * stride, stride);
 }
 
 static void hevc_intra_pred_angular_lower_8width_msa(const uint8_t *src_top,
@@ -1526,7 +1526,7 @@ static void hevc_intra_pred_angular_lower_8width_msa(const uint8_t *src_top,
                     dst_val0, dst_val1, dst_val2, dst_val3);
         ILVR_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff0, diff1);
         ILVRL_H2_SH(diff1, diff0, diff3, diff4);
-        ST4x8_UB(diff3, diff4, dst_org, stride);
+        ST_W8(diff3, diff4, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride);
         dst += 4;
     }
 }
@@ -1640,9 +1640,9 @@ static void hevc_intra_pred_angular_lower_16width_msa(const uint8_t *src_top,
         ILVL_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff2, diff3);
         ILVRL_H2_SH(diff1, diff0, diff4, diff5);
         ILVRL_H2_SH(diff3, diff2, diff6, diff7);
-        ST4x8_UB(diff4, diff5, dst_org, stride);
+        ST_W8(diff4, diff5, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride);
         dst_org += (8 * stride);
-        ST4x8_UB(diff6, diff7, dst_org, stride);
+        ST_W8(diff6, diff7, 0, 1, 2, 3, 0, 1, 2, 3, dst_org, stride);
         dst += 4;
     }
 }
@@ -1746,23 +1746,14 @@ static void hevc_intra_pred_angular_lower_32width_msa(const uint8_t *src_top,
         ILVRL_B2_SH(dst_val2, dst_val0, diff0, diff1);
         ILVRL_B2_SH(dst_val3, dst_val1, diff2, diff3);
 
-        ST2x4_UB(diff0, 0, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff0, 4, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff1, 0, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff1, 4, dst_org, stride);
-        dst_org += (4 * stride);
-
-        ST2x4_UB(diff2, 0, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff2, 4, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff3, 0, dst_org, stride);
-        dst_org += (4 * stride);
-        ST2x4_UB(diff3, 4, dst_org, stride);
-        dst_org += (4 * stride);
+        ST_H8(diff0, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride)
+        dst_org += (8 * stride);
+        ST_H8(diff1, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride)
+        dst_org += (8 * stride);
+        ST_H8(diff2, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride)
+        dst_org += (8 * stride);
+        ST_H8(diff3, 0, 1, 2, 3, 4, 5, 6, 7, dst_org, stride)
+        dst_org += (8 * stride);
 
         dst += 2;
     }
author	Shiyou Yin <yinshiyou-hf@loongson.cn>	2019-07-17 12:35:00 +0300
committer	Michael Niedermayer <michael@niedermayer.cc>	2019-07-19 02:23:23 +0300
commit	153c60752558369b98dce0b7a0ca7acc687fa630 (patch)
tree	b1dc51fc656ac92b1ac3f377ee75c6808bec0369 /libavcodec/mips/hevcpred_msa.c
parent	00ed04d6149691a9abf486b2f88172fd6341d801 (diff)