diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/manual.lyx | 435 | ||||
-rw-r--r-- | doc/nb_celp.c | 183 |
2 files changed, 611 insertions, 7 deletions
diff --git a/doc/manual.lyx b/doc/manual.lyx index 43710b8..6a6f88d 100644 --- a/doc/manual.lyx +++ b/doc/manual.lyx @@ -1,13 +1,14 @@ -#LyX 1.6.0rc2 created this file. For more info see http://www.lyx.org/ -\lyxformat 340 +#LyX 1.6.1 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 \begin_document \begin_header \textclass scrbook +\use_default_options true \language english \inputencoding auto -\font_roman times -\font_sans helvet -\font_typewriter courier +\font_roman default +\font_sans default +\font_typewriter default \font_default_family default \font_sc false \font_osf false @@ -21,7 +22,7 @@ \papersize letterpaper \use_geometry true \use_amsmath 2 -\use_esint 0 +\use_esint 2 \cite_engine basic \use_bibtopic false \paperorientation portrait @@ -37,7 +38,6 @@ \papercolumns 1 \papersides 1 \paperpagestyle headings -\listings_params "basicstyle={\ttfamily},breaklines=true,language=C,xleftmargin=0mm" \tracking_changes false \output_changes false \author "" @@ -8419,6 +8419,427 @@ Optional, implementation-defined. \end_layout +\begin_layout Subsection +Bit-stream definition +\end_layout + +\begin_layout Standard +This section defines the bit-stream that is transmitted on the wire. + One speex packet consist of 1 frame header and 4 sub-frames: +\end_layout + +\begin_layout Standard +\begin_inset Tabular +<lyxtabular version="3" rows="1" columns="5"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Frame Header +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Subframe 1 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Subframe2 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Subframe 3 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Subframe 4 +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Standard +The frame header is variable length, depending on decoding mode and submode. + The narrowband frame header is defined as follows: +\end_layout + +\begin_layout Standard +\begin_inset Tabular +<lyxtabular version="3" rows="1" columns="6"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +wb bit +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +modeid +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +LSP +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +OL-pitch +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +OL-pitchgain +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +OL ExcGain +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Standard +wb-bit: Wideband bit (1 bit) 0=narrowband, 1=wideband +\end_layout + +\begin_layout Standard +modeid: Mode identifier (4 bits) +\end_layout + +\begin_layout Standard +LSP: Line Spectral Pairs (0, 18 or 30 bits) +\end_layout + +\begin_layout Standard +OL-pitch: Open Loop Pitch (0 or 7 bits) +\end_layout + +\begin_layout Standard +OL-pitchgain: Open Loop Pitch Gain (0 or 4 bits) +\end_layout + +\begin_layout Standard +OL-ExcGain: Open Loop Excitation Gain (0 or 5 bits) +\end_layout + +\begin_layout Standard +... +\end_layout + +\begin_layout Standard +Each subframe is defined as follows: +\end_layout + +\begin_layout Standard +\begin_inset Tabular +<lyxtabular version="3" rows="1" columns="4"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +FinePitch +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +PitchGain +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +InnovationGain +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Innovation VQ +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Standard +FinePitch: (0 or 7 bits) +\end_layout + +\begin_layout Standard +PitchGain: (0, 5, or 7 bits) +\end_layout + +\begin_layout Standard +Innovation Gain: (0, 1, 3 bits) +\end_layout + +\begin_layout Standard +Innovation VQ: (0-96 bits) +\end_layout + +\begin_layout Standard +... +\end_layout + +\begin_layout Subsection +Sample decoder +\end_layout + +\begin_layout Standard +This section contains some sample source code, showing how a basic Speex + decoder can be implemented. + The sample decoder is narrowband submode 3 only, and with no advanced features + like enhancement, vbr etc. +\end_layout + +\begin_layout Standard +... +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "nb_celp.c" +lstparams "caption={Sample Decoder}" + +\end_inset + + +\end_layout + +\begin_layout Subsection +Lookup tables +\end_layout + +\begin_layout Standard +The Speex decoder includes a set of lookup tables and codebooks, which are + used to convert between values of different domains. + This includes: +\end_layout + +\begin_layout Standard +- Excitation 10x16 (3200 bps) +\end_layout + +\begin_layout Standard +- Excitation 10x32 (4000 bps) +\end_layout + +\begin_layout Standard +- Excitation 20x32 (2000 bps) +\end_layout + +\begin_layout Standard +- Excitation 5x256 (12800 bps) +\end_layout + +\begin_layout Standard +- Excitation 5x64 (9600 bps) +\end_layout + +\begin_layout Standard +- Excitation 8x128 (7000 bps) +\end_layout + +\begin_layout Standard +- Codebook for 3-tap pitch prediction gain (Normal and Low Bitrate) +\end_layout + +\begin_layout Standard +- Codebook for LSPs in narrowband CELP mode +\end_layout + +\begin_layout Standard +... +\end_layout + +\begin_layout Standard +The exact lookup tables are included here for reference. +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_5_64_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_5_256_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_8_128_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_10_16_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_10_32_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/exc_20_32_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/gain_table.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/gain_table_lbr.c" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../libspeex/lsp_tables_nb.c" + +\end_inset + + +\end_layout + \begin_layout Section Wideband embedded decoder \end_layout diff --git a/doc/nb_celp.c b/doc/nb_celp.c new file mode 100644 index 0000000..2f6ac2f --- /dev/null +++ b/doc/nb_celp.c @@ -0,0 +1,183 @@ +#include <math.h> +#include "nb_celp.h" +#include "lsp.h" +#include "ltp.h" +#include "quant_lsp.h" +#include "cb_search.h" +#include "filters.h" +#include "../include/speex/speex_bits.h" +#include "os_support.h" + +#ifndef NULL +#define NULL 0 +#endif + +#define LSP_MARGIN .002f +#define SIG_SCALING 1.f +#define NB_DEC_BUFFER (NB_FRAME_SIZE+2*NB_PITCH_END+NB_SUBFRAME_SIZE+12) +#define NB_ORDER 10 +#define NB_FRAME_SIZE 160 +#define NB_SUBFRAME_SIZE 40 +#define NB_NB_SUBFRAMES 4 +#define NB_PITCH_START 17 +#define NB_PITCH_END 144 + + +struct speex_decode_state { + float excBuf[NB_DEC_BUFFER]; /**< Excitation buffer */ + float *exc; /**< Start of excitation frame */ + float old_qlsp[10]; /**< Quantized LSPs for previous frame */ + float interp_qlpc[10]; /**< Interpolated quantized LPCs */ + float mem_sp[10]; /**< Filter memory for synthesis signal */ + int first; /**< Is this the first frame? */ +}; + + +static const float exc_gain_quant_scal1[2] = {0.70469f, 1.05127f}; + + +struct speex_decode_state *nb_decoder_init(void) +{ + struct speex_decode_state *st; + + st = malloc(sizeof(*st)); + if (!st) + return NULL; + + memset(st, 0, sizeof(*st)); + st->first = 1; + + return st; +} + + +void nb_decoder_destroy(struct speex_decode_state *state) +{ + if (state) + free(state); +} + + +/* basic decoder using mode3 only */ +int nb_decode(struct speex_decode_state *st, SpeexBits *bits, float *out) +{ + int i, sub, wideband, mode, qe; + float ol_gain; + float innov[NB_SUBFRAME_SIZE]; + float exc32[NB_SUBFRAME_SIZE]; + float qlsp[NB_ORDER], interp_qlsp[NB_ORDER]; + float ak[NB_ORDER]; + + if (!bits) + return -1; + + st->exc = st->excBuf + 2*NB_PITCH_END + NB_SUBFRAME_SIZE + 6; + + /* Decode Sub-modes */ + do { + if (speex_bits_remaining(bits) < 5) + return -1; + + wideband = speex_bits_unpack_unsigned(bits, 1); + if (wideband) { + printf("wideband not supported\n"); + return -2; + } + + mode = speex_bits_unpack_unsigned(bits, 4); + if (mode == 15) + return -1; + + } while (mode > 8); + + if (mode != 3) { + printf("only mode 3 supported\n"); + return -2; + } + + /* Shift all buffers by one frame */ + SPEEX_MOVE(st->excBuf, st->excBuf+NB_FRAME_SIZE, + 2*NB_PITCH_END + NB_SUBFRAME_SIZE + 12); + + /* Unquantize LSPs */ + lsp_unquant_lbr(qlsp, NB_ORDER, bits); + + /* Handle first frame */ + if (st->first) { + st->first = 0; + + for (i=0; i<NB_ORDER; i++) + st->old_qlsp[i] = qlsp[i]; + } + + /* Get global excitation gain */ + qe = speex_bits_unpack_unsigned(bits, 5); + ol_gain = SIG_SCALING*exp(qe/3.5); + + /* Loop on subframes */ + for (sub=0; sub<4; sub++) { + int offset, q_energy; + float *exc, *sp; + float ener; + + offset = NB_SUBFRAME_SIZE*sub; + exc = st->exc + offset; + sp = out + offset; + + SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE); + + /* Adaptive codebook contribution */ + pitch_unquant_3tap(exc, exc32, NB_PITCH_START, + NB_SUBFRAME_SIZE, bits, 0); + + sanitize_values32(exc32, -32000, 32000, NB_SUBFRAME_SIZE); + + /* Unquantize the innovation */ + SPEEX_MEMSET(innov, 0, NB_SUBFRAME_SIZE); + + /* Decode sub-frame gain correction */ + q_energy = speex_bits_unpack_unsigned(bits, 1); + ener = exc_gain_quant_scal1[q_energy] * ol_gain; + + /* Fixed codebook contribution */ + split_cb_shape_sign_unquant(innov, bits); + + /* De-normalize innovation and update excitation */ + signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE); + + for (i=0; i<NB_SUBFRAME_SIZE; i++) { + exc[i] = exc32[i] + innov[i]; + } + } + + SPEEX_COPY(out, &st->exc[-NB_SUBFRAME_SIZE], NB_FRAME_SIZE); + + /* Loop on subframes */ + for (sub=0; sub<4; sub++) { + const int offset = NB_SUBFRAME_SIZE*sub; + float *sp, *exc; + + sp = out + offset; + exc = st->exc + offset; + + /* LSP interpolation (quantized and unquantized) */ + lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER, + sub, NB_NB_SUBFRAMES, LSP_MARGIN); + + /* Compute interpolated LPCs (unquantized) */ + lsp_to_lpc(interp_qlsp, ak, NB_ORDER); + + iir_mem16(sp, st->interp_qlpc, sp, NB_SUBFRAME_SIZE, + NB_ORDER, st->mem_sp); + + /* Save for interpolation in next frame */ + for (i=0; i<NB_ORDER; i++) + st->interp_qlpc[i] = ak[i]; + } + + /* Store the LSPs for interpolation in the next frame */ + for (i=0; i<NB_ORDER; i++) + st->old_qlsp[i] = qlsp[i]; + + return 0; +} |