Avoid sin()/cos() calls for pitch reference

author: Jean-Marc Valin <jmvalin@amazon.com> 2023-08-02 06:48:34 +0300
committer: Jean-Marc Valin <jmvalin@amazon.com> 2023-08-02 06:48:34 +0300
commit: 3eac8c12e476f68eaa7608cb078379e980d3a7ab (patch)
tree: 77b17a1d3d5907ccba2f9b412d0df6ead4beeefb
parent: 83e95a5ce6619720131e13fa3f39bb8ab3119ecf (diff)
2 files changed, 28 insertions, 8 deletions
diff --git a/dnn/fwgan.c b/dnn/fwgan.c
index 4c22b34c..ae09d13a 100644
--- a/dnn/fwgan.c
+++ b/dnn/fwgan.c
@@ -48,14 +48,33 @@
 
 #define FWGAN_FEATURES (NB_FEATURES-1)
 
-static void pitch_embeddings(float *pembed, double *phase, double w0) {
+static void pitch_embeddings(float *pembed, float *phase, double w0) {
   int i;
-  /* FIXME: This could be speeded up by making phase a unit-norm complex value, rotating it
-     by exp(-i*w0) each sample, and renormalizing once in while.  */
+  float wreal, wimag;
+#if 1
+  /* This Taylor expansion should be good enough since w0 is always small. */
+  float w2 = w0*w0;
+  wreal = 1 - .5*w2*(1.f - 0.083333333f*w2);
+  wimag = w0*(1 - 0.166666667f*w2*(1.f - 0.05f*w2));
+#else
+  wreal = cos(w0);
+  wimag = sin(w0);
+#endif
+  /* Speed-up phase reference by making phase a unit-norm complex value and rotating it
+     by exp(-i*w0) each sample.  */
   for (i=0;i<SUBFRAME_SIZE;i++) {
-    *phase += w0;
-    pembed[i] = sin(*phase);
-    pembed[SUBFRAME_SIZE+i] = cos(*phase);
+    float tmp;
+    tmp = phase[0]*wreal - phase[1]*wimag;
+    phase[1] = phase[0]*wimag + phase[1]*wreal;
+    phase[0] = tmp;
+    pembed[i] = phase[1];
+    pembed[SUBFRAME_SIZE+i] = phase[0];
+  }
+  /* Renormalize once per sub-frame, though we could probably do it even less frequently. */
+  {
+    float r = 1.f/sqrt(phase[0]*phase[0] + phase[1]*phase[1]);
+    phase[0] *= r;
+    phase[1] *= r;
   }
 }
 
@@ -76,6 +95,7 @@ void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
   float tmp1[MAX_CONT_SIZE];
   float tmp2[MAX_CONT_SIZE];
   FWGAN *model;
+  st->embed_phase[0] = 1;
   model = &st->model;
   norm2 = celt_inner_prod(pcm0, pcm0, CONT_PCM_INPUTS, st->arch);
   norm_1 = 1.f/sqrt(1e-8f + norm2);
@@ -158,7 +178,7 @@ static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, do
   FWGAN *model;
   model = &st->model;
 
-  pitch_embeddings(pembed, &st->embed_phase, w0);
+  pitch_embeddings(pembed, st->embed_phase, w0);
   /* Interleave bfcc_cond and pembed for each subframe in feat_in. */
   OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
   OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
diff --git a/dnn/fwgan.h b/dnn/fwgan.h
index 3af76cda..fe7e111d 100644
--- a/dnn/fwgan.h
+++ b/dnn/fwgan.h
@@ -45,7 +45,7 @@ typedef struct {
   FWGAN model;
   int arch;
   int cont_initialized;
-  double embed_phase;
+  float embed_phase[2];
   float last_gain;
   float last_lpc[LPC_ORDER];
   float syn_mem[LPC_ORDER];
author	Jean-Marc Valin <jmvalin@amazon.com>	2023-08-02 06:48:34 +0300
committer	Jean-Marc Valin <jmvalin@amazon.com>	2023-08-02 06:48:34 +0300
commit	3eac8c12e476f68eaa7608cb078379e980d3a7ab (patch)
tree	77b17a1d3d5907ccba2f9b412d0df6ead4beeefb
parent	83e95a5ce6619720131e13fa3f39bb8ab3119ecf (diff)