diff options
author | gxw <guxiwei-hf@loongson.cn> | 2018-09-05 13:31:06 +0300 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2018-09-09 13:01:07 +0300 |
commit | 090647da84f975c7ffb163436040cc8aecf46a9c (patch) | |
tree | 42c921c977bf61c7081050cd0b615e2a33387570 /libavutil/mips | |
parent | 8ef7fb86d62c9d44697c8eef0ddc424be4a3612b (diff) |
avcodec/mips: [loongson] optimize vp8 decoding in vp8dsp.
Optimize vp8 loop filter with mmi, four functions optimized:
1. ff_vp8_h_loop_filter8uv_mmi.
2. ff_vp8_v_loop_filter8uv_mmi.
3. ff_vp8_h_loop_filter16_mmi.
4. ff_vp8_v_loop_filter16_mmi.
Vp8 decoding speed improved about 50%(from 73fps to 110fps, Tested on loongson 3A3000).
Signed-off-by: Shiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r-- | libavutil/mips/mmiutils.h | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h index 2b1a52105e..b16edc4ba1 100644 --- a/libavutil/mips/mmiutils.h +++ b/libavutil/mips/mmiutils.h @@ -275,6 +275,34 @@ "punpcklwd "#m3", "#t2", "#t4" \n\t" \ "punpckhwd "#m4", "#t2", "#t4" \n\t" +/** + * brief: Parallel SRA for 8 byte packaged data. + * fr_i0: src + * fr_i1: SRA number(SRAB number + 8) + * fr_t0, fr_t1: temporary register + * fr_d0: dst + */ +#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \ + "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \ + "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \ + "psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \ + "psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \ + "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t" + +/** + * brief: Parallel SRL for 8 byte packaged data. + * fr_i0: src + * fr_i1: SRL number(SRLB number + 8) + * fr_t0, fr_t1: temporary register + * fr_d0: dst + */ +#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \ + "punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \ + "punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \ + "psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \ + "psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \ + "packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t" + #define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \ "psrah "#fp1", "#fp1", "#shift" \n\t" \ |