diff options
author | jmvalin <jmvalin@0101bb08-14d6-0310-b084-bc0e0c8e3800> | 2002-03-14 06:58:48 +0300 |
---|---|---|
committer | jmvalin <jmvalin@0101bb08-14d6-0310-b084-bc0e0c8e3800> | 2002-03-14 06:58:48 +0300 |
commit | ab9dbc47c097f93241e7f273d69f9da1f09f8ed6 (patch) | |
tree | 29e8567c819834f9f0ba70e7d4a13a9fe71dc167 /libspeex | |
parent | 218210edb6b74178e469ab70013384af7c125a1c (diff) |
Cleaning up the code and making it easier to switch algorithms...
git-svn-id: http://svn.xiph.org/trunk/speex@3141 0101bb08-14d6-0310-b084-bc0e0c8e3800
Diffstat (limited to 'libspeex')
-rw-r--r-- | libspeex/ltp.c | 18 | ||||
-rw-r--r-- | libspeex/ltp.h | 3 | ||||
-rw-r--r-- | libspeex/modes.c | 8 | ||||
-rw-r--r-- | libspeex/modes.h | 29 | ||||
-rw-r--r-- | libspeex/speex.c | 110 | ||||
-rw-r--r-- | libspeex/speex.h | 6 | ||||
-rw-r--r-- | libspeex/testenc.c | 2 |
7 files changed, 62 insertions, 114 deletions
diff --git a/libspeex/ltp.c b/libspeex/ltp.c index a6bf7a6..d668c66 100644 --- a/libspeex/ltp.c +++ b/libspeex/ltp.c @@ -322,9 +322,6 @@ float awk2[], /* Weighted LPCs #2 for this subframe */ float exc[], /* Overlapping codebook */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ -float *gain, /* 3-tab gains of optimum entry */ -int *pitch, /* Index of optimum entry */ -int *gain_index, /* Index of optimum gain */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ FrameBits *bits, @@ -336,6 +333,8 @@ float *stack float *x[3]; float corr[3]; float A[3][3]; + float gain[3]; + int pitch; tmp = PUSH(stack, 3*nsf); @@ -345,15 +344,15 @@ float *stack /* Perform closed-loop 1-tap search*/ overlap_cb_search(target, ak, awk1, awk2, - &exc[-end], end-start+1, gain, pitch, p, + &exc[-end], end-start+1, gain, &pitch, p, nsf); /* Real pitch value */ - *pitch=end-*pitch; + pitch=end-pitch; for (i=0;i<3;i++) { - residue_zero(&exc[-*pitch-1+i],awk1,x[i],nsf,p); + residue_zero(&exc[-pitch-1+i],awk1,x[i],nsf,p); syn_filt_zero(x[i],ak,x[i],nsf,p); syn_filt_zero(x[i],awk2,x[i],nsf,p); } @@ -396,8 +395,7 @@ float *stack gain[0] = gain_cdbk_nb[best_cdbk*12]; gain[1] = gain_cdbk_nb[best_cdbk*12+1]; gain[2] = gain_cdbk_nb[best_cdbk*12+2]; - *gain_index=best_cdbk; - frame_bits_pack(bits,(*pitch)-start,7); + frame_bits_pack(bits,pitch-start,7); frame_bits_pack(bits,best_cdbk,7); } @@ -405,8 +403,8 @@ float *stack /*FIXME: backward or forward? (ie recursive or not?)*/ /*for (i=0;i<nsf;i++)*/ for (i=nsf-1;i>=0;i--) - exc[i]=gain[0]*exc[i-*pitch+1]+gain[1]*exc[i-*pitch]+gain[2]*exc[i-*pitch-1]; - printf ("3-tap pitch = %d, gains = [%f %f %f]\n",*pitch, gain[0], gain[1], gain[2]); + exc[i]=gain[0]*exc[i-pitch+1]+gain[1]*exc[i-pitch]+gain[2]*exc[i-pitch-1]; + printf ("3-tap pitch = %d, gains = [%f %f %f]\n",pitch, gain[0], gain[1], gain[2]); { float tmp1=0,tmp2=0; diff --git a/libspeex/ltp.h b/libspeex/ltp.h index b064ca7..6c46cff 100644 --- a/libspeex/ltp.h +++ b/libspeex/ltp.h @@ -51,9 +51,6 @@ float awk2[], /* Weighted LPCs #2 for this subframe */ float exc[], /* Overlapping codebook */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ -float *gain, /* 3-tab gains of optimum entry */ -int *pitch, /* Best pitch delay */ -int *gain_index, /* Index of optimum gain */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ FrameBits *bits, diff --git a/libspeex/modes.c b/libspeex/modes.c index 73c4dcf..bd1c38f 100644 --- a/libspeex/modes.c +++ b/libspeex/modes.c @@ -20,6 +20,8 @@ */ #include "modes.h" +#include "ltp.h" +#include "quant_lsp.h" SpeexMode nb_mode = { 160, /*frameSize*/ @@ -30,5 +32,9 @@ SpeexMode nb_mode = { 20, /*pitchStart*/ 140, /*pitchEnd*/ 0.9, /*gamma1*/ - 0.6 /*gamma2*/ + 0.6, /*gamma2*/ + lsp_quant_nb, + lsp_unquant_nb, + pitch_search_3tap, + pitch_unquant_3tap }; diff --git a/libspeex/modes.h b/libspeex/modes.h index 6490c12..d6bde37 100644 --- a/libspeex/modes.h +++ b/libspeex/modes.h @@ -22,6 +22,24 @@ #ifndef MODES_H #define MODES_H +#include "bits.h" + +/* Quantizes LSPs */ +typedef void (*lsp_quant_func)(float *, float *, int, FrameBits *); + +/* Decodes quantized LSPs */ +typedef void (*lsp_unquant_func)(float *, int, FrameBits *); + + +/*Long-term predictor quantization*/ +typedef void (*ltp_quant_func)(float *, float *, float *, + float *, float *, int, int, + int, int, FrameBits*, float *); + +/*Long-term un-quantize*/ +typedef void (*ltp_unquant_func)(float *, int, int, int, FrameBits*, float*); + +/*Struct defining the encoding/decoding mode*/ typedef struct SpeexMode { int frameSize; int subframeSize; @@ -32,8 +50,15 @@ typedef struct SpeexMode { int pitchEnd; float gamma1; float gamma2; - /* Should add info about LSP quantization, pitch gain quantization - and other codebooks */ + + /*LSP functions*/ + lsp_quant_func lsp_quant; + lsp_unquant_func lsp_unquant; + + /*Lont-term predictor functions*/ + ltp_quant_func ltp_quant; + ltp_unquant_func ltp_unquant; + } SpeexMode; extern SpeexMode nb_mode; diff --git a/libspeex/speex.c b/libspeex/speex.c index 817e516..373607b 100644 --- a/libspeex/speex.c +++ b/libspeex/speex.c @@ -54,6 +54,8 @@ void encoder_init(EncState *st, SpeexMode *mode) st->min_pitch=mode->pitchStart; st->max_pitch=mode->pitchEnd; + st->lsp_quant = mode->lsp_quant; + st->ltp_quant = mode->ltp_quant; /* Over-sampling filter (fractional pitch)*/ st->os_fact=4; st->os_filt_ord2=4*st->os_fact; @@ -193,8 +195,9 @@ void encode(EncState *st, float *in, FrameBits *bits) st->lsp[i] = acos(st->lsp[i]); /* LSP Quantization */ - lsp_quant_nb(st->lsp, st->qlsp, 10, bits); + st->lsp_quant(st->lsp, st->qlsp, 10, bits); + /* Special case for first frame */ if (st->first) { for (i=0;i<st->lpcSize;i++) @@ -202,39 +205,13 @@ void encode(EncState *st, float *in, FrameBits *bits) for (i=0;i<st->lpcSize;i++) st->old_qlsp[i] = st->qlsp[i]; } - printf ("encode LSPs: "); - for (i=0;i<st->lpcSize;i++) - printf ("%f ", st->qlsp[i]); - printf ("\n"); - - /*Find open-loop pitch for the whole frame*/ - if (0) { - float *mem = PUSH(st->stack, st->lpcSize); - - for (i=0;i<st->lpcSize;i++) - st->interp_lsp[i] = .5*st->old_lsp[i] + .5*st->lsp[i]; - for (i=0;i<st->lpcSize;i++) - st->interp_lsp[i] = cos(st->interp_lsp[i]); - lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,st->stack); - bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize); - bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize); - for (i=0;i<st->lpcSize;i++) - mem[i]=st->mem_sp[i]; - residue_mem(st->frame, st->bw_lpc1, st->sw, st->frameSize, st->lpcSize, mem); - for (i=0;i<st->lpcSize;i++) - mem[i]=st->mem_sw[i]; - syn_filt_mem(st->sw, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize, mem); - open_loop_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize, &st->ol_pitch, &st->ol_voiced); - printf ("Open-loop pitch = %d\n", st->ol_pitch); - POP(st->stack); - } /* Loop on sub-frames */ for (sub=0;sub<st->nbSubframes;sub++) { float tmp, gain[3]; float esig=0, enoise=0, snr; - int pitch, offset, pitch_gain_index; + int pitch, offset; float *sp, *sw, *res, *exc, *target, *mem; /* Offset relative to start of frame */ @@ -303,20 +280,11 @@ void encode(EncState *st, float *in, FrameBits *bits) for (i=0;i<st->subframeSize;i++) exc[i]=0; -#if 1 /*If set to 0, we compute the excitation directly from the target, i.e. we're cheating */ - - /* Perform adaptive codebook search (3-tap pitch predictor) */ - pitch = st->ol_pitch; -#if 0 /* 1 for fractional pitch, 0 for integer pitch */ - closed_loop_fractional_pitch(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, st->os_filt, st->os_filt_ord2, st->os_fact, 20, 147, - &gain[0], &pitch, st->lpcSize, - st->subframeSize, st->stack); -#else - pitch_search_3tap(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, 20, 147, &gain[0], &pitch, &pitch_gain_index, st->lpcSize, - st->subframeSize, bits, st->stack); -#endif + + st->ltp_quant(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, + exc, 20, 147, st->lpcSize, st->subframeSize, + bits, st->stack); + /* Update target for adaptive codebook contribution */ residue_zero(exc, st->bw_lpc1, res, st->subframeSize, st->lpcSize); syn_filt_zero(res, st->interp_qlpc, res, st->subframeSize, st->lpcSize); @@ -355,11 +323,14 @@ void encode(EncState *st, float *in, FrameBits *bits) for (i=0;i<st->subframeSize;i++) target[i]-=gain[0]*res[i]; } + #else + /* Perform a split-codebook search */ split_cb_search(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, exc_table, 64, &gain[0], &pitch, st->lpcSize, st->subframeSize, exc, bits); #endif + /* Compute weighted noise energy, SNR */ enoise=0; for (i=0;i<st->subframeSize;i++) @@ -367,61 +338,6 @@ void encode(EncState *st, float *in, FrameBits *bits) snr = 10*log10((esig+1)/(enoise+1)); printf ("seg SNR = %f\n", snr); -#else /* Cheating to get perfect reconstruction */ - -#if 1 /* Code to calculate the exact excitation after pitch prediction */ - for (i=0;i<st->subframeSize;i++) - st->buf2[i]=target[i]; -#if 0 /* 0 for fractional pitch, 1 for integer */ - pitch_search_3tap(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, 20, 147, &gain[0], &pitch, &pitch_gain_index, st->lpcSize, - st->subframeSize); - for (i=0;i<st->subframeSize;i++) - exc[i]=gain[0]*exc[i-pitch]+gain[1]*exc[i-pitch-1]+gain[2]*exc[i-pitch-2]; - printf ("3-tap pitch = %d, gains = [%f %f %f]\n",pitch, gain[0], gain[1], gain[2]); -#else - pitch = st->ol_pitch; - closed_loop_fractional_pitch(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, st->os_filt, st->os_filt_ord2, st->os_fact, 20, 147, - &gain[0], &pitch, st->lpcSize, - st->subframeSize, st->stack); -#endif - /* Update target for adaptive codebook contribution */ - residue_zero(exc, st->bw_lpc1, res, st->subframeSize, st->lpcSize); - syn_filt_zero(res, st->interp_qlpc, res, st->subframeSize, st->lpcSize); - syn_filt_zero(res, st->bw_lpc2, res, st->subframeSize, st->lpcSize); - for (i=0;i<st->subframeSize;i++) - target[i]-=res[i]; - - enoise=0; - for (i=0;i<st->subframeSize;i++) - enoise += target[i]*target[i]; - snr = 10*log10((esig+1)/(enoise+1)); - printf ("pitch SNR = %f\n", snr); - - syn_filt_zero(target, st->bw_lpc1, res, st->subframeSize, st->lpcSize); - residue_zero(res, st->interp_qlpc, exc, st->subframeSize, st->lpcSize); - residue_zero(exc, st->bw_lpc2, exc, st->subframeSize, st->lpcSize); - if (snr>5) - { - for (i=0;i<st->subframeSize;i++) - { - if (i%8==0&&i) - printf("\n"); - printf ("%f ", exc[i]); - } - printf ("\n"); - } - for (i=0;i<st->subframeSize;i++) - target[i]=st->buf2[i]; -#endif - - /* We're cheating to get perfect reconstruction */ - syn_filt_zero(target, st->bw_lpc1, res, st->subframeSize, st->lpcSize); - residue_zero(res, st->interp_qlpc, exc, st->subframeSize, st->lpcSize); - residue_zero(exc, st->bw_lpc2, exc, st->subframeSize, st->lpcSize); -#endif - /*Keep the previous memory*/ for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sp[i]; diff --git a/libspeex/speex.h b/libspeex/speex.h index b714bb1..6859d58 100644 --- a/libspeex/speex.h +++ b/libspeex/speex.h @@ -67,6 +67,9 @@ typedef struct EncState { float *rc; /* Reflection coefficients */ float *mem_sp, *mem_sw; float *dmem1, *dmem2; + + lsp_quant_func lsp_quant; + ltp_quant_func ltp_quant; } EncState; typedef struct DecState { @@ -91,6 +94,9 @@ typedef struct DecState { float *interp_qlsp; /* Interpolated quantized LSPs */ float *interp_qlpc; /* Interpolated quantized LPCs */ float *mem_sp; + + lsp_unquant_func lsp_unquant; + ltp_unquant_func ltp_unquant; } DecState; /**Initializes encoder state*/ diff --git a/libspeex/testenc.c b/libspeex/testenc.c index e37c1b4..7a00b67 100644 --- a/libspeex/testenc.c +++ b/libspeex/testenc.c @@ -9,7 +9,7 @@ int main(int argc, char **argv) char *inFile, *outFile, *bitsFile; FILE *fin, *fout, *fbits=NULL; short in[FRAME_SIZE]; - float input[FRAME_SIZE], bak[FRAME_SIZE], bak2[FRAME_SIZE], decbuf[FRAME_SIZE]; + float input[FRAME_SIZE], bak[FRAME_SIZE], bak2[FRAME_SIZE]; char cbits[200]; int nbBits; int i; |