Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/FFmpeg/FFmpeg.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2022-09-26 17:52:23 +0300
committerLynne <dev@lynne.ee>2022-09-27 14:19:52 +0300
commit04d092e7d5204f1aebb7e61f92bb263873e0f735 (patch)
treed1047a4697e6b3d7ec746d0fbf16dac161eb441c /libavcodec/riscv
parent746f1ff36ac0d232687820fbde4e4efc79093af7 (diff)
lavc/audiodsp: RISC-V F vector_clipf
RV64G supports MIN & MAX instructions natively only on floating point registers, not general purpose ones. The later would require the Zbb extension. Due to that, it is actually faster to perform the clipping "properly" in FPU. Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech): audiodsp.vector_clipf_c: 29551.5 audiodsp.vector_clipf_rvf: 17871.0 Also tried unrolling with 2 or 8 elements but it gets worse either way.
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r--libavcodec/riscv/Makefile2
-rw-r--r--libavcodec/riscv/audiodsp_init.c33
-rw-r--r--libavcodec/riscv/audiodsp_rvf.S49
3 files changed, 84 insertions, 0 deletions
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
new file mode 100644
index 0000000000..414a9e9bd8
--- /dev/null
+++ b/libavcodec/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
+ riscv/audiodsp_rvf.o
diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
new file mode 100644
index 0000000000..c5842815d6
--- /dev/null
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/audiodsp.h"
+
+void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
+
+av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVF)
+ c->vector_clipf = ff_vector_clipf_rvf;
+}
diff --git a/libavcodec/riscv/audiodsp_rvf.S b/libavcodec/riscv/audiodsp_rvf.S
new file mode 100644
index 0000000000..2ec8a11691
--- /dev/null
+++ b/libavcodec/riscv/audiodsp_rvf.S
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_vector_clipf_rvf, f
+NOHWF fmv.w.x fa0, a3
+NOHWF fmv.w.x fa1, a4
+1:
+ flw ft0, (a1)
+ flw ft1, 4(a1)
+ fmax.s ft0, ft0, fa0
+ flw ft2, 8(a1)
+ fmax.s ft1, ft1, fa0
+ flw ft3, 12(a1)
+ fmax.s ft2, ft2, fa0
+ addi a2, a2, -4
+ fmax.s ft3, ft3, fa0
+ addi a1, a1, 16
+ fmin.s ft0, ft0, fa1
+ fmin.s ft1, ft1, fa1
+ fsw ft0, (a0)
+ fmin.s ft2, ft2, fa1
+ fsw ft1, 4(a0)
+ fmin.s ft3, ft3, fa1
+ fsw ft2, 8(a0)
+ fsw ft3, 12(a0)
+ addi a0, a0, 16
+ bnez a2, 1b
+
+ ret
+endfunc