Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@amazon.com>2023-06-22 23:03:54 +0300
committerJean-Marc Valin <jmvalin@amazon.com>2023-06-23 01:07:25 +0300
commitabe817c3fc2661f25c01ce3ebec8a6e14e344670 (patch)
tree9439d75070ef26fafbbf43d98915bea3c5ce0b88
parentf36685fc974394aa0d0f4db1bb601afc4780e3ed (diff)
Remove pcount that's no longer useful
We're back to processing 10 ms at a time and have no need for 40-ms "superframes".
-rw-r--r--dnn/dump_data.c27
-rw-r--r--dnn/lpcnet_enc.c44
-rw-r--r--dnn/lpcnet_plc.c10
-rw-r--r--dnn/lpcnet_private.h7
4 files changed, 38 insertions, 50 deletions
diff --git a/dnn/dump_data.c b/dnn/dump_data.c
index 0515352b..053ca774 100644
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@@ -81,22 +81,21 @@ static short float2short(float x)
}
-void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) {
- int i, k;
- for (k=0;k<nframes;k++) {
+void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) {
+ int i;
short data[2*FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) {
float p=0;
float e;
int j;
- for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j];
- e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
+ for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
+ e = lin2ulaw(pcm[i] - p);
/* Signal in. */
data[2*i] = float2short(st->sig_mem[0]);
/* Signal out. */
- data[2*i+1] = pcm[k*FRAME_SIZE+i];
+ data[2*i+1] = pcm[i];
/* Simulate error on excitation. */
- e += noise[k*FRAME_SIZE+i];
+ e += noise[i];
e = IMIN(255, IMAX(0, e));
RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
@@ -104,7 +103,6 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f
st->exc_mem = e;
}
fwrite(data, 4*FRAME_SIZE, 1, file);
- }
}
int main(int argc, char **argv) {
@@ -124,8 +122,7 @@ int main(int argc, char **argv) {
FILE *ffeat;
FILE *fpcm=NULL;
short pcm[FRAME_SIZE]={0};
- short pcmbuf[FRAME_SIZE*4]={0};
- int noisebuf[FRAME_SIZE*4]={0};
+ int noisebuf[FRAME_SIZE]={0};
short tmp[FRAME_SIZE] = {0};
float savedX[FRAME_SIZE] = {0};
float speech_gain=1;
@@ -237,18 +234,12 @@ int main(int argc, char **argv) {
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
compute_frame_features(st, x);
- RNN_COPY(&pcmbuf[st->pcount*FRAME_SIZE], pcm, FRAME_SIZE);
if (fpcm) {
- compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);
+ compute_noise(noisebuf, noise_std);
}
process_single_frame(st, ffeat);
- if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);
- st->pcount++;
- /* Running on groups of 4 frames. */
- if (st->pcount == 4) {
- st->pcount = 0;
- }
+ if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
/*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
old_speech_gain = speech_gain;
diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c
index 76f9f776..f5e33689 100644
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@@ -99,10 +99,10 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
follow = MAX16(follow-2.5f, Ly[i]);
E += Ex[i];
}
- dct(st->features[st->pcount], Ly);
- st->features[st->pcount][0] -= 4;
- lpc_from_cepstrum(st->lpc, st->features[st->pcount]);
- for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][NB_BANDS+2+i] = st->lpc[i];
+ dct(st->features, Ly);
+ st->features[0] -= 4;
+ lpc_from_cepstrum(st->lpc, st->features);
+ for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
for (i=0;i<FRAME_SIZE;i++) {
@@ -123,12 +123,12 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
celt_pitch_xcorr(&st->exc_buf[PITCH_MAX_PERIOD+off], st->exc_buf+off, xcorr, FRAME_SIZE/2, PITCH_MAX_PERIOD, st->arch);
ener0 = celt_inner_prod_c(&st->exc_buf[PITCH_MAX_PERIOD+off], &st->exc_buf[PITCH_MAX_PERIOD+off], FRAME_SIZE/2);
ener1 = celt_inner_prod_c(&st->exc_buf[off], &st->exc_buf[off], FRAME_SIZE/2-1);
- st->frame_weight[2+2*st->pcount+sub] = ener0;
- /*printf("%f\n", st->frame_weight[2+2*st->pcount+sub]);*/
+ st->frame_weight[sub] = ener0;
+ /*printf("%f\n", st->frame_weight[sub]);*/
for (i=0;i<PITCH_MAX_PERIOD;i++) {
ener1 += st->exc_buf[i+off+FRAME_SIZE/2-1]*st->exc_buf[i+off+FRAME_SIZE/2-1];
ener = 1 + ener0 + ener1;
- st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener;
+ st->xc[sub][i] = 2*xcorr[i] / ener;
ener1 -= st->exc_buf[i+off]*st->exc_buf[i+off];
}
if (1) {
@@ -140,18 +140,18 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
float val1=0, val2=0;
int j;
for (j=0;j<7;j++) {
- val1 += st->xc[2+2*st->pcount+sub][i-3+j]*interp[j];
- val2 += st->xc[2+2*st->pcount+sub][i+3-j]*interp[j];
- interpolated[i] = MAX16(st->xc[2+2*st->pcount+sub][i], MAX16(val1, val2));
+ val1 += st->xc[sub][i-3+j]*interp[j];
+ val2 += st->xc[sub][i+3-j]*interp[j];
+ interpolated[i] = MAX16(st->xc[sub][i], MAX16(val1, val2));
}
}
for (i=4;i<PITCH_MAX_PERIOD-4;i++) {
- st->xc[2+2*st->pcount+sub][i] = interpolated[i];
+ st->xc[sub][i] = interpolated[i];
}
}
#if 0
for (i=0;i<PITCH_MAX_PERIOD;i++)
- printf("%f ", st->xc[2*st->pcount+sub][i]);
+ printf("%f ", st->xc[sub][i]);
printf("\n");
#endif
}
@@ -165,14 +165,14 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
int pitch_prev[2][PITCH_MAX_PERIOD];
float frame_corr;
float frame_weight_sum = 1e-15f;
- for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub];
- for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum);
+ for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[sub];
+ for(sub=0;sub<2;sub++) st->frame_weight[sub] *= (2.f/frame_weight_sum);
for(sub=0;sub<2;sub++) {
float max_path_all = -1e15f;
best_i = 0;
for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
- float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]);
- if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1f) st->xc[2+2*st->pcount+sub][i] *= .8f;
+ float xc_half = MAX16(MAX16(st->xc[sub][(PITCH_MAX_PERIOD+i)/2], st->xc[sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[sub][(PITCH_MAX_PERIOD+i-1)/2]);
+ if (st->xc[sub][i] < xc_half*1.1f) st->xc[sub][i] *= .8f;
}
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
int j;
@@ -185,7 +185,7 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
pitch_prev[sub][i] = i+j;
}
}
- st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][i];
+ st->pitch_max_path[1][i] = max_prev + st->frame_weight[sub]*st->xc[sub][i];
if (st->pitch_max_path[1][i] > max_path_all) {
max_path_all = st->pitch_max_path[1][i];
best_i = i;
@@ -204,14 +204,14 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
/* Backward pass. */
for (sub=1;sub>=0;sub--) {
best[2+sub] = PITCH_MAX_PERIOD-best_i;
- frame_corr += st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][best_i];
+ frame_corr += st->frame_weight[sub]*st->xc[sub][best_i];
best_i = pitch_prev[sub][best_i];
}
frame_corr /= 2;
- st->features[st->pcount][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200);
- st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5f;
+ st->features[NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200);
+ st->features[NB_BANDS + 1] = frame_corr-.5f;
if (ffeat) {
- fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat);
+ fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
}
}
@@ -229,7 +229,7 @@ static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
compute_frame_features(st, x);
process_single_frame(st, NULL);
- RNN_COPY(features, &st->features[0][0], NB_TOTAL_FEATURES);
+ RNN_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
return 0;
}
diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c
index 61d96b33..6d384a03 100644
--- a/dnn/lpcnet_plc.c
+++ b/dnn/lpcnet_plc.c
@@ -183,7 +183,6 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
int delta = 0;
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
burg_cepstral_analysis(plc_features, x);
- st->enc.pcount = 0;
if (st->skip_analysis) {
/*fprintf(stderr, "skip update\n");*/
if (st->blend) {
@@ -232,7 +231,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
compute_frame_features(&st->enc, x);
process_single_frame(&st->enc, NULL);
if (!st->blend) {
- RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
+ RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features, NB_FEATURES);
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
compute_plc_pred(st, st->features, plc_features);
/* Discard an FEC frame that we know we will no longer need. */
@@ -243,7 +242,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
if (st->skip_analysis) {
if (st->enable_blending) {
/* FIXME: backtrack state, replace features. */
- run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);
+ run_frame_network_deferred(&st->lpcnet, st->enc.features);
}
st->skip_analysis--;
} else {
@@ -251,10 +250,10 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
#ifdef PLC_SKIP_UPDATES
{
- run_frame_network_deferred(&st->lpcnet, st->enc.features[0]);
+ run_frame_network_deferred(&st->lpcnet, st->enc.features);
}
#else
- lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
+ lpcnet_synthesize_impl(&st->lpcnet, st->enc.features, output, FRAME_SIZE, FRAME_SIZE);
#endif
RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
}
@@ -268,7 +267,6 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
int i;
short output[FRAME_SIZE];
run_frame_network_flush(&st->lpcnet);
- st->enc.pcount = 0;
/* If we concealed the previous frame, finish synthesizing the rest of the samples. */
/* FIXME: Copy/predict features. */
while (st->pcm_fill > 0) {
diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h
index 753f6b01..a0f0f7da 100644
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@@ -45,11 +45,10 @@ struct LPCNetEncState{
int arch;
float analysis_mem[OVERLAP_SIZE];
float mem_preemph;
- int pcount;
float pitch_mem[LPC_ORDER];
float pitch_filt;
- float xc[10][PITCH_MAX_PERIOD+1];
- float frame_weight[10];
+ float xc[2][PITCH_MAX_PERIOD+1];
+ float frame_weight[2];
float exc_buf[PITCH_BUF_SIZE];
float pitch_max_path[2][PITCH_MAX_PERIOD];
float pitch_max_path_all;
@@ -58,7 +57,7 @@ struct LPCNetEncState{
int last_period;
float lpc[LPC_ORDER];
float vq_mem[NB_BANDS];
- float features[4][NB_TOTAL_FEATURES];
+ float features[NB_TOTAL_FEATURES];
float sig_mem[LPC_ORDER];
int exc_mem;
float burg_cepstrum[2*NB_BANDS];