diff options
-rw-r--r-- | doc/manual.lyx | 77 | ||||
-rw-r--r-- | doc/manual.pdf | bin | 264509 -> 275797 bytes | |||
-rw-r--r-- | libspeex/speex_header.h | 28 | ||||
-rw-r--r-- | libspeex/speex_stereo.h | 17 | ||||
-rw-r--r-- | src/speexdec.1 | 3 | ||||
-rw-r--r-- | src/speexdec.c | 10 |
6 files changed, 108 insertions, 27 deletions
diff --git a/doc/manual.lyx b/doc/manual.lyx index eaaee24..11d0fb7 100644 --- a/doc/manual.lyx +++ b/doc/manual.lyx @@ -244,25 +244,55 @@ Linear Prediction (LPC) \layout Standard +Linear prediction is at the base of may speech coding techniques, including + CELP. + The idea behind it is to predict the signal +\begin_inset Formula $x(n)$ +\end_inset + + using a linear combination of its past samples: +\layout Standard + \begin_inset Formula \[ y(n)=\sum _{i=1}^{N}a_{i}x(n-i)\] \end_inset +where +\begin_inset Formula $y(n)$ +\end_inset + + is the linear prediction of +\begin_inset Formula $x(n)$ +\end_inset +. + The prediction error is thus given by: \begin_inset Formula \[ e(n)=x(n)-y(n)=x(n)-\sum _{i=1}^{N}a_{i}x(n-i)\] \end_inset +\layout Standard + +The goal of the LPC analysis is to find the best prediction coefficients + +\begin_inset Formula $a_{i}$ +\end_inset + + which minimize the quadratic error function: \begin_inset Formula \[ E=\sum _{n=0}^{L-1}\left[e(n)\right]^{2}=\sum _{n=0}^{L-1}\left[x(n)-\sum _{i=1}^{N}a_{i}x(n-i)\right]^{2}\] \end_inset +That can be done by making all derivatives +\begin_inset Formula $\frac{\partial E}{\partial a_{i}}$ +\end_inset + equal to zero: \begin_inset Formula \[ \frac{\partial E}{\partial a_{i}}=\frac{\partial }{\partial a_{i}}\sum _{n=0}^{L-1}\left[x(n)-\sum _{i=1}^{N}a_{i}x(n-i)\right]^{2}=0\] @@ -785,7 +815,7 @@ OL \begin_inset Float table -placement htbp +placement h wide true collapsed false @@ -1836,7 +1866,7 @@ So far, no MOS (Mean Opinion Score \begin_inset Float table -placement htbp +placement h wide true collapsed false @@ -2603,7 +2633,7 @@ For the wideband mode, all the narrowband frame is packed before the high-band \begin_inset Float table -placement htbp +placement h wide true collapsed false @@ -3042,6 +3072,19 @@ Bit allocation for high-band in wideband mode \end_inset +\layout Standard + + +\begin_inset ERT +status Open + +\layout Standard + +\backslash +clearpage +\end_inset + + \layout Section \pagebreak_top Command-line encoder/decoder @@ -3195,8 +3238,12 @@ The decoder takes the following options: --stereo Force decoding in stereo \layout Description +--rate\SpecialChar ~ +n For decoding at n Hz sampling rate +\layout Description + --packet-loss\SpecialChar ~ -n Simulate n % random packet loss +n Simulate n % rando m packet loss \layout Description -V Verbose operation, print bit-rate currently in use @@ -4202,10 +4249,17 @@ RTP \end_inset - Payload Format + Payload Format \layout Standard -This is a work in progress. +The latest RTP payload draft can be found at +\begin_inset LatexCommand \url{http://www.speex.org/drafts/latest} + +\end_inset + +. + We are (2002/11/11) about to send the latest draft to the IETF for comments. + \layout Comment Since Speex encoded frames already contain mode information, they can be @@ -4216,6 +4270,17 @@ Since Speex encoded frames already contain mode information, they can be \layout Subsection +MIME Type +\layout Standard + +Speex will use the MIME type +\family typewriter +audio/speex +\family default +. + We will apply for that type in the near future. +\layout Subsection + Ogg \begin_inset LatexCommand \index{Ogg} diff --git a/doc/manual.pdf b/doc/manual.pdf Binary files differindex a177a39..091528f 100644 --- a/doc/manual.pdf +++ b/doc/manual.pdf diff --git a/libspeex/speex_header.h b/libspeex/speex_header.h index 02ceca3..c0acaba 100644 --- a/libspeex/speex_header.h +++ b/libspeex/speex_header.h @@ -47,25 +47,25 @@ struct SpeexMode; #define SPEEX_HEADER_VERSION_LENGTH 20 /** Current version of the Speex header */ -#define SPEEX_HEADER_VERSION -1 +#define SPEEX_HEADER_VERSION 1 /** Speex header info for file-based formats */ typedef struct SpeexHeader { - char speex_string[8]; /**< Identifies a Speex bit-stream, always set to "Speex " */ + char speex_string[8]; /**< Identifies a Speex bit-stream, always set to "Speex " */ char speex_version[SPEEX_HEADER_VERSION_LENGTH]; /**< Speex version */ - int speex_header_version; /**< Version number for the header */ - int header_size; /**< Total size of the header ( sizeof(SpeexHeader) ) */ - int rate; /**< Sampling rate used */ - int mode; /**< Mode used (0 for narrowband, 1 for wideband) */ + int speex_header_version; /**< Version number for the header */ + int header_size; /**< Total size of the header ( sizeof(SpeexHeader) ) */ + int rate; /**< Sampling rate used */ + int mode; /**< Mode used (0 for narrowband, 1 for wideband) */ int mode_bitstream_version; /**< Version ID of the bit-stream */ - int nb_channels; /**< Number of channels encoded */ - int bitrate; /**< Bit-rate used */ - int frame_size; /**< Size of frames */ - int vbr; /**< 1 for a VBR encoding, 0 otherwise */ - int frames_per_packet; /**< Number of frames stored per Ogg packet */ - int reserved1; /**< Reserved for future use */ - int reserved2; /**< Reserved for future use */ - int reserved3; /**< Reserved for future use */ + int nb_channels; /**< Number of channels encoded */ + int bitrate; /**< Bit-rate used */ + int frame_size; /**< Size of frames */ + int vbr; /**< 1 for a VBR encoding, 0 otherwise */ + int frames_per_packet; /**< Number of frames stored per Ogg packet */ + int reserved1; /**< Reserved for future use, must be zero */ + int reserved2; /**< Reserved for future use, must be zero */ + int reserved3; /**< Reserved for future use, must be zero */ } SpeexHeader; /** Initializes a SpeexHeader using basic information */ diff --git a/libspeex/speex_stereo.h b/libspeex/speex_stereo.h index d745d00..f9d3b83 100644 --- a/libspeex/speex_stereo.h +++ b/libspeex/speex_stereo.h @@ -1,6 +1,9 @@ -/* Copyright (C) 2002 Jean-Marc Valin - File: stereo.c - +/* Copyright (C) 2002 Jean-Marc Valin*/ +/** + @file speex_stereo.h + @brief Describes the handling for intensity stereo +*/ +/* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -34,17 +37,21 @@ #include "speex_bits.h" +/** State used for decoding (intensity) stereo information */ typedef struct SpeexStereoState { - float balance; - float e_ratio; + float balance; /**< Left/right balance info */ + float e_ratio; /**< Ratio of energies: E(left+right)/[E(left)+E(right)] */ } SpeexStereoState; #define SPEEX_STEREO_STATE_INIT {1,.5} +/** Transforms a stereo frame into a mono frame and stores intensity stereo info in 'bits' */ void speex_encode_stereo(float *data, int frame_size, SpeexBits *bits); +/** Transforms a mono frame into a stereo frame using intensity stereo info */ void speex_decode_stereo(float *data, int frame_size, SpeexStereoState *stereo); +/** Callback handler for intensity stereo info */ int speex_std_stereo_request_handler(SpeexBits *bits, void *state, void *data); #endif diff --git a/src/speexdec.1 b/src/speexdec.1 index 3b39c6d..371c3df 100644 --- a/src/speexdec.1 +++ b/src/speexdec.1 @@ -50,6 +50,9 @@ Force decoding in mono \fB\-\-stereo\fR Force decoding in stereo .TP +\fB\-\-rate\fR n +Force decoding at sampling rate n Hz +.TP \fB\-\-packet\-loss\fR n Simulate n % random packet loss .TP diff --git a/src/speexdec.c b/src/speexdec.c index 347dcb6..898f098 100644 --- a/src/speexdec.c +++ b/src/speexdec.c @@ -203,6 +203,7 @@ void usage() printf (" --force-uwb Force decoding in ultra-wideband\n"); printf (" --mono Force decoding in mono\n"); printf (" --stereo Force decoding in stereo\n"); + printf (" --rate n Force decoding at sampling rate n Hz\n"); printf (" --packet-loss n Simulate n %% random packet loss\n"); printf (" -V Verbose mode (show bit-rate)\n"); printf (" -h, --help This help\n"); @@ -273,7 +274,8 @@ static void *process_header(ogg_packet *op, int enh_enabled, int *frame_size, in callback.data = stereo; speex_decoder_ctl(st, SPEEX_SET_HANDLER, &callback); - *rate = header->rate; + if (!*rate) + *rate = header->rate; /* Adjust rate if --force-* options are used */ if (forceMode!=-1) { @@ -329,6 +331,7 @@ int main(int argc, char **argv) {"force-nb", no_argument, NULL, 0}, {"force-wb", no_argument, NULL, 0}, {"force-uwb", no_argument, NULL, 0}, + {"rate", required_argument, NULL, 0}, {"mono", no_argument, NULL, 0}, {"stereo", no_argument, NULL, 0}, {"packet-loss", required_argument, NULL, 0}, @@ -348,6 +351,7 @@ int main(int argc, char **argv) float loss_percent=-1; SpeexStereoState stereo = SPEEX_STEREO_STATE_INIT; int channels=-1; + int rate=0; enh_enabled = 0; @@ -403,6 +407,9 @@ int main(int argc, char **argv) } else if (strcmp(long_options[option_index].name,"stereo")==0) { channels=2; + } else if (strcmp(long_options[option_index].name,"rate")==0) + { + rate=atoi (optarg); } else if (strcmp(long_options[option_index].name,"packet-loss")==0) { loss_percent = atof(optarg); @@ -490,7 +497,6 @@ int main(int argc, char **argv) /*If first packet, process as Speex header*/ if (packet_count==0) { - int rate; st = process_header(&op, enh_enabled, &frame_size, &rate, &nframes, forceMode, &channels, &stereo); if (!nframes) nframes=1; |