Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViswanath Puttagunta <viswanath.puttagunta@linaro.org>2015-05-15 20:42:19 +0300
committerJean-Marc Valin <jmvalin@jmvalin.ca>2015-10-08 01:09:20 +0300
commitf48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch)
treea231446a0bc212ae891524881580f9b5ffd90798 /celt/dump_modes
parent0fe514352568530d4bd18a6686e6878417e6cf41 (diff)
armv7(float): Optimize encode usecase using NE10 library
Optimize opus encode (float only) usecase using ARM NE10 library. Mainly effects opus_fft and ctl_mdct_forward and related functions. This optimization can be used for ARM CPUs that have NEON VFP unit. This patch only enables optimizations for ARMv7. Official ARM NE10 library page available at http://projectne10.github.io/Ne10/ To enable this optimization, use --enable-intrinsics --with-NE10=<install_prefix> or --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir> Compile time checks made during configure process to make sure optimization option available only when compiler supports NEON instrinsics. Runtime checks made to make sure optimized functions only called on appropriate hardware. Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'celt/dump_modes')
-rw-r--r--celt/dump_modes/Makefile24
-rw-r--r--celt/dump_modes/dump_modes.c21
-rw-r--r--celt/dump_modes/dump_modes_arch.h41
-rw-r--r--celt/dump_modes/dump_modes_arm_ne10.c131
4 files changed, 216 insertions, 1 deletions
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
index 763cb303..93f599fb 100644
--- a/celt/dump_modes/Makefile
+++ b/celt/dump_modes/Makefile
@@ -1,10 +1,32 @@
+
CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
INCLUDES=-I. -I../ -I../.. -I../../include
+SOURCES = dump_modes.c \
+ ../modes.c \
+ ../cwrs.c \
+ ../rate.c \
+ ../entcode.c \
+ ../entenc.c \
+ ../entdec.c \
+ ../mathops.c \
+ ../mdct.c \
+ ../kiss_fft.c
+
+ifdef HAVE_ARM_NE10
+CC = gcc
+CFLAGS += -mfpu=neon
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
+LIBS = -L$(NE10_LIBDIR) -lNE10
+SOURCES += ../arm/celt_ne10_fft.c \
+ dump_modes_arm_ne10.c \
+ ../arm/armcpu.c
+endif
+
all: dump_modes
dump_modes:
- $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entcode.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm
+ $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm
clean:
rm -f dump_modes
diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
index ae6a8c15..9105a534 100644
--- a/celt/dump_modes/dump_modes.c
+++ b/celt/dump_modes/dump_modes.c
@@ -35,6 +35,7 @@
#include "modes.h"
#include "celt.h"
#include "rate.h"
+#include "dump_modes_arch.h"
#define INT16 "%d"
#define INT32 "%d"
@@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
fprintf(file, "#include \"modes.h\"\n");
fprintf(file, "#include \"rate.h\"\n");
+ fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
+ fprintf(file, "#define OVERRIDE_FFT 1\n");
+ fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
+ fprintf(file, "#endif\n");
fprintf(file, "\n");
@@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
fprintf (file, "};\n");
+#ifdef OVERRIDE_FFT
+ dump_mode_arch(mode);
+#endif
/* FFT Bitrev tables */
for (k=0;k<=mode->mdct.maxshift;k++)
{
@@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf (file, "}, /* factors */\n");
fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
+
+ fprintf (file, "#ifdef OVERRIDE_FFT\n");
+ fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf (file, "#else\n");
+ fprintf (file, "NULL,\n");
+ fprintf(file, "#endif\n");
+
fprintf (file, "};\n");
fprintf(file, "#endif\n");
@@ -323,8 +338,14 @@ int main(int argc, char **argv)
}
}
file = fopen(BASENAME ".h", "w");
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_init(m, nb);
+#endif
dump_modes(file, m, nb);
fclose(file);
+#ifdef OVERRIDE_FFT
+ dump_modes_arch_finalize();
+#endif
for (i=0;i<nb;i++)
opus_custom_mode_destroy(m[i]);
free(m);
diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
new file mode 100644
index 00000000..1436926e
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arch.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef DUMP_MODE_ARCH_H
+#define DUMP_MODE_ARCH_H
+
+void dump_modes_arch_init();
+void dump_mode_arch(CELTMode *mode);
+void dump_modes_arch_finalize();
+
+#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
+
+#if defined(HAVE_ARM_NE10)
+#define OVERRIDE_FFT (1)
+#endif
+
+#endif
diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
new file mode 100644
index 00000000..d37e7ada
--- /dev/null
+++ b/celt/dump_modes/dump_modes_arm_ne10.c
@@ -0,0 +1,131 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+ Written by Viswanath Puttagunta */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(HAVE_CONFIG_H)
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "modes.h"
+#include "dump_modes_arch.h"
+#include <NE10_dsp.h>
+
+static FILE *file;
+
+void dump_modes_arch_init(CELTMode **modes, int nb_modes)
+{
+ int i;
+
+ file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
+ fprintf(file, "/* The contents of this file was automatically generated by\n");
+ fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
+ for (i=0;i<nb_modes;i++)
+ {
+ CELTMode *mode = modes[i];
+ fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
+ }
+ fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
+ fprintf(file, "#include <NE10_init.h>\n\n");
+}
+
+void dump_modes_arch_finalize()
+{
+ fclose(file);
+}
+
+void dump_mode_arch(CELTMode *mode)
+{
+ int k, j;
+ int mdctSize;
+
+ mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
+
+ fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
+ /* cfg->factors */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
+ for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
+ fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
+ }
+ fprintf (file, "};\n");
+ }
+
+ /* cfg->twiddles */
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg)
+ continue;
+ fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
+ mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
+ for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
+ fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
+ }
+ fprintf (file, "};\n");
+ }
+
+ for(k=0;k<=mode->mdct.maxshift;k++) {
+ ne10_fft_cfg_float32_t cfg;
+ cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
+ if (!cfg) {
+ fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "0,\n");
+ fprintf(file, "NULL\n");
+ fprintf(file, "};\n");
+ continue;
+ }
+ fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "%d,\n", cfg->nfft);
+ fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "NULL,\n"); /* buffer */
+ fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
+ mode->mdct.kfft[k]->nfft, cfg->nfft);
+ fprintf(file, "/* is_forward_scaled = true */\n");
+ fprintf(file, "(ne10_int32_t) 1,\n");
+ fprintf(file, "/* is_backward_scaled = false */\n");
+ fprintf(file, "(ne10_int32_t) 0,\n");
+ fprintf(file, "};\n");
+
+ fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
+ mode->mdct.kfft[k]->nfft);
+ fprintf(file, "1,\n");
+ fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft);
+ fprintf(file, "};\n\n");
+ }
+ fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
+}