Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.xiph.org/xiph/opus.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2013-03-08 21:29:53 +0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2013-03-08 21:29:53 +0400
commitf96fc8cc839331ae5e6e596fb931bf8261e129ff (patch)
tree05b841eb36fa15606f0e9e5b8157e7414c1ef130
parentfa43c770e0fd0ebc796a61a32c305dfcf3869128 (diff)
parent73142b100adebe4321d3919ab657f510b7cfe40d (diff)
Merge branch 'exp_analysis'
Conflicts: celt/celt_encoder.c
-rw-r--r--celt/celt_encoder.c18
-rw-r--r--include/opus_defines.h38
-rw-r--r--src/analysis.c230
-rw-r--r--src/analysis.h31
-rw-r--r--src/mlp_data.c153
-rw-r--r--src/mlp_train.c21
-rw-r--r--src/opus_demo.c65
-rw-r--r--src/opus_encoder.c435
-rw-r--r--src/opus_multistream_encoder.c125
-rw-r--r--src/opus_private.h16
10 files changed, 936 insertions, 196 deletions
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 96a9f6fe..d93b15a6 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -1329,7 +1329,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
prefilter_tapset = st->tapset_decision;
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
- if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
+ if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
pitch_change = 1;
if (pf_on==0)
@@ -1353,15 +1353,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
isTransient = 0;
shortBlocks = 0;
+ if (st->complexity >= 1)
+ {
+ isTransient = transient_analysis(in, N+st->overlap, CC,
+ &tf_estimate, &tf_chan);
+ }
if (LM>0 && ec_tell(enc)+3<=total_bits)
{
- if (st->complexity >= 1)
- {
- isTransient = transient_analysis(in, N+st->overlap, CC,
- &tf_estimate, &tf_chan);
- if (isTransient)
- shortBlocks = M;
- }
+ if (isTransient)
+ shortBlocks = M;
+ } else {
+ isTransient = 0;
}
ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
diff --git a/include/opus_defines.h b/include/opus_defines.h
index cdde061a..203144a7 100644
--- a/include/opus_defines.h
+++ b/include/opus_defines.h
@@ -148,8 +148,9 @@ extern "C" {
#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */
#define OPUS_SET_LSB_DEPTH_REQUEST 4036
#define OPUS_GET_LSB_DEPTH_REQUEST 4037
-
#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
@@ -185,6 +186,15 @@ extern "C" {
#define OPUS_BANDWIDTH_SUPERWIDEBAND 1104 /**<12 kHz bandpass @hideinitializer*/
#define OPUS_BANDWIDTH_FULLBAND 1105 /**<20 kHz bandpass @hideinitializer*/
+#define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */
+#define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */
+#define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */
+#define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */
+#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
+#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
+#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
+#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
+
/**@}*/
@@ -525,6 +535,32 @@ extern "C" {
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
* @hideinitializer */
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of variable duration frames.
+ * When enabled, the encoder is free to use a shorter frame size than the one
+ * requested in the opus_encode*() call. It is then the user's responsibility
+ * to verify how much audio was encoded by checking the ToC byte of the encoded
+ * packet. The part of the audio that was not encoded needs to be resent to the
+ * encoder for the next call. Do not use this option unless you <b>really</b>
+ * know what you are doing.
+ * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+ * @param[in] x <tt>opus_int32</tt>: Allowed values:
+ * <dl>
+ * <dt>0</dt><dd>Disable variable duration (default).</dd>
+ * <dt>1</dt><dd>Enable variable duration.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured use of variable duration frames.
+ * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+ * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+ * <dl>
+ * <dt>0</dt><dd>variable duration disabled (default).</dd>
+ * <dt>1</dt><dd>variable duration enabled.</dd>
+ * </dl>
+ * @hideinitializer */
+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
+
/**@}*/
/** @defgroup opus_genericctls Generic CTLs
diff --git a/src/analysis.c b/src/analysis.c
index 22a8fa79..14b2246c 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -139,10 +139,56 @@ static inline float fast_atan2f(float y, float x) {
}
}
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
+{
+ int pos;
+ int curr_lookahead;
+ float psum;
+ int i;
+
+ pos = tonal->read_pos;
+ curr_lookahead = tonal->write_pos-tonal->read_pos;
+ if (curr_lookahead<0)
+ curr_lookahead += DETECT_SIZE;
+
+ if (len > 480 && pos != tonal->write_pos)
+ {
+ pos++;
+ if (pos==DETECT_SIZE)
+ pos=0;
+ }
+ if (pos == tonal->write_pos)
+ pos--;
+ if (pos<0)
+ pos = DETECT_SIZE-1;
+ OPUS_COPY(info_out, &tonal->info[pos], 1);
+ tonal->read_subframe += len/120;
+ while (tonal->read_subframe>=4)
+ {
+ tonal->read_subframe -= 4;
+ tonal->read_pos++;
+ }
+ if (tonal->read_pos>=DETECT_SIZE)
+ tonal->read_pos-=DETECT_SIZE;
+
+ /* Compensate for the delay in the features themselves.
+ FIXME: Need a better estimate the 10 I just made up */
+ curr_lookahead = IMAX(curr_lookahead-10, 0);
+
+ psum=0;
+ for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
+ psum += tonal->pmusic[i];
+ for (;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i];
+ psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+ /*printf("%f %f\n", psum, info_out->music_prob);*/
+
+ info_out->music_prob = psum;
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
{
int i, b;
- const CELTMode *mode;
const kiss_fft_state *kfft;
kiss_fft_cpx in[480], out[480];
int N = 480, N2=240;
@@ -163,14 +209,15 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
float slope=0;
float frame_stationarity;
float relativeE;
- float frame_prob;
+ float frame_probs[2];
float alpha, alphaE, alphaE2;
float frame_loudness;
float bandwidth_mask;
int bandwidth=0;
float maxE = 0;
float noise_floor;
- celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
+ int remaining;
+ AnalysisInfo *info;
tonal->last_transition++;
alpha = 1.f/IMIN(20, 1+tonal->count);
@@ -179,27 +226,32 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
if (tonal->count<4)
tonal->music_prob = .5;
- kfft = mode->mdct.kfft[0];
- if (C==1)
+ kfft = celt_mode->mdct.kfft[0];
+ if (tonal->count==0)
+ tonal->mem_fill = 240;
+ downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+ if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
{
- for (i=0;i<N2;i++)
- {
- float w = analysis_window[i];
- in[i].r = MULT16_16(w, x[i]);
- in[i].i = MULT16_16(w, x[N-N2+i]);
- in[N-i-1].r = MULT16_16(w, x[N-i-1]);
- in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
- }
- } else {
- for (i=0;i<N2;i++)
- {
- float w = analysis_window[i];
- in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
- in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
- in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
- in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
- }
+ tonal->mem_fill += len;
+ /* Don't have enough to update the analysis */
+ return;
}
+ info = &tonal->info[tonal->write_pos++];
+ if (tonal->write_pos>=DETECT_SIZE)
+ tonal->write_pos-=DETECT_SIZE;
+
+ for (i=0;i<N2;i++)
+ {
+ float w = analysis_window[i];
+ in[i].r = MULT16_16(w, tonal->inmem[i]);
+ in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
+ in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
+ in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+ }
+ OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+ remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+ downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+ tonal->mem_fill = 240 + remaining;
opus_fft(kfft, in, out);
for (i=1;i<N2;i++)
@@ -417,27 +469,91 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
features[24] = tonal->lowECount;
#ifndef FIXED_POINT
- mlp_process(&net, features, &frame_prob);
- frame_prob = .5f*(frame_prob+1);
+ mlp_process(&net, features, frame_probs);
+ frame_probs[0] = .5f*(frame_probs[0]+1);
/* Curve fitting between the MLP probability and the actual probability */
- frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);
+ frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+ frame_probs[1] = .5*frame_probs[1]+.5;
+ frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5;
- /*printf("%f\n", frame_prob);*/
+ /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
{
float tau, beta;
float p0, p1;
- float max_certainty;
/* One transition every 3 minutes */
- tau = .00005f;
- beta = .1f;
- max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
+ tau = .00005f*frame_probs[1];
+ beta = .05f;
+ if (1) {
+ /* Adapt beta based on how "unexpected" the new prob is */
+ float p, q;
+ p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+ q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+ beta = .01+.05*ABS16(p-q)/(p*(1-q)+q*(1-p));
+ }
p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
- p0 *= (float)pow(1-frame_prob, beta);
- p1 *= (float)pow(frame_prob, beta);
- tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
+ p0 *= (float)pow(1-frame_probs[0], beta);
+ p1 *= (float)pow(frame_probs[0], beta);
+ tonal->music_prob = p1/(p0+p1);
info->music_prob = tonal->music_prob;
- /*printf("%f %f\n", frame_prob, info->music_prob);*/
+
+ float psum=1e-20;
+ float speech0 = (float)pow(1-frame_probs[0], beta);
+ float music0 = (float)pow(frame_probs[0], beta);
+ if (tonal->count==1)
+ {
+ tonal->pspeech[0]=.5;
+ tonal->pmusic [0]=.5;
+ }
+ float s0, m0;
+ s0 = tonal->pspeech[0] + tonal->pspeech[1];
+ m0 = tonal->pmusic [0] + tonal->pmusic [1];
+ tonal->pspeech[0] = s0*(1-tau)*speech0;
+ tonal->pmusic [0] = m0*(1-tau)*music0;
+ for (i=1;i<DETECT_SIZE-1;i++)
+ {
+ tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
+ tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
+ }
+ tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+ tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
+
+ for (i=0;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i] + tonal->pmusic[i];
+ psum = 1.f/psum;
+ for (i=0;i<DETECT_SIZE;i++)
+ {
+ tonal->pspeech[i] *= psum;
+ tonal->pmusic [i] *= psum;
+ }
+ psum = tonal->pmusic[0];
+ for (i=1;i<DETECT_SIZE;i++)
+ psum += tonal->pspeech[i];
+
+ /* Estimate our confidence in the speech/music decisions */
+ if (frame_probs[1]>.75)
+ {
+ if (tonal->music_prob>.9)
+ {
+ float adapt;
+ adapt = 1.f/(++tonal->music_confidence_count);
+ tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+ tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+ }
+ if (tonal->music_prob<.1)
+ {
+ float adapt;
+ adapt = 1.f/(++tonal->speech_confidence_count);
+ tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+ tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+ }
+ } else {
+ if (tonal->music_confidence_count==0)
+ tonal->music_confidence = .9;
+ if (tonal->speech_confidence_count==0)
+ tonal->speech_confidence = .1;
+ }
+ psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
}
if (tonal->last_music != (tonal->music_prob>.5f))
tonal->last_transition=0;
@@ -465,4 +581,48 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
/*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
info->noisiness = frame_noisiness;
info->valid = 1;
+ if (info_out!=NULL)
+ OPUS_COPY(info_out, info, 1);
+}
+
+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
+ const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+{
+ int offset;
+ int pcm_len;
+
+ /* Avoid overflow/wrap-around of the analysis buffer */
+ frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
+
+ pcm_len = frame_size - analysis->analysis_offset;
+ offset = 0;
+ do {
+ tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
+ offset += 480;
+ pcm_len -= 480;
+ } while (pcm_len>0);
+ analysis->analysis_offset = frame_size;
+
+ if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+ {
+ int LM = 3;
+ LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
+ analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
+ while ((Fs/400<<LM)>frame_size)
+ LM--;
+ frame_size = (Fs/400<<LM);
+ } else {
+ frame_size = frame_size_select(frame_size, variable_duration, Fs);
+ }
+ if (frame_size<0)
+ return -1;
+ analysis->analysis_offset -= frame_size;
+
+ /* Only perform analysis up to 20-ms frames. Longer ones will be split if
+ they're in CELT-only mode. */
+ analysis_info->valid = 0;
+ tonality_get_info(analysis, analysis_info, frame_size);
+
+ return frame_size;
}
diff --git a/src/analysis.h b/src/analysis.h
index bf8ad40a..7b17118c 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -28,18 +28,27 @@
#ifndef ANALYSIS_H
#define ANALYSIS_H
+#include "celt.h"
+#include "opus_private.h"
+
#define NB_FRAMES 8
#define NB_TBANDS 18
#define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+
+#define DETECT_SIZE 200
typedef struct {
float angle[240];
float d_angle[240];
float d2_angle[240];
+ float inmem[ANALYSIS_BUF_SIZE];
+ int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
float E[NB_FRAMES][NB_TBANDS];
- float lowE[NB_TBANDS], highE[NB_TBANDS];
+ float lowE[NB_TBANDS];
+ float highE[NB_TBANDS];
float meanE[NB_TOT_BANDS];
float mem[32];
float cmean[8];
@@ -52,9 +61,27 @@ typedef struct {
int last_transition;
int count;
int opus_bandwidth;
+ opus_val32 subframe_mem[3];
+ int analysis_offset;
+ float pspeech[DETECT_SIZE];
+ float pmusic[DETECT_SIZE];
+ float speech_confidence;
+ float music_confidence;
+ int speech_confidence_count;
+ int music_confidence_count;
+ int write_pos;
+ int read_pos;
+ int read_subframe;
+ AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
- CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);
+ const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
+
+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
+ const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+ int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
#endif
diff --git a/src/mlp_data.c b/src/mlp_data.c
index 5c13ca40..9085b85f 100644
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -3,74 +3,103 @@
#include "mlp.h"
-/* RMS error was 0.179835, seed was 1322103961 */
+/* RMS error was 0.138320, seed was 1361535663 */
-static const float weights[271] = {
+static const float weights[422] = {
/* hidden layer */
-1.55597f, -0.0739792f, -0.0646761f, -0.099531f, -0.0794943f,
-0.0180174f, -0.0391354f, 0.0508224f, -0.0160169f, -0.0773263f,
--0.0300002f, -0.0865361f, 0.124477f, -0.28648f, -0.0860702f,
--0.518949f, -0.0873341f, -0.235393f, -0.907833f, -0.383573f,
-0.535388f, -0.57944f, 0.98116f, 0.8482f, 1.12426f,
--3.23721f, -0.647072f, -0.0265139f, 0.0711052f, -0.00125666f,
--0.0396181f, -0.44282f, -0.510495f, -0.201865f, 0.0134336f,
--0.167205f, -0.155406f, 0.00041678f, -0.00468705f, -0.0233224f,
-0.264279f, -0.301375f, 0.00234895f, 0.0144741f, -0.137535f,
-0.200323f, 0.0192027f, 3.19818f, 2.03495f, 0.705517f,
--4.6025f, -0.11485f, -0.792716f, 0.150714f, 0.10608f,
-0.240633f, 0.0690698f, 0.0695297f, 0.124819f, 0.0501433f,
-0.0460952f, 0.147639f, 0.10327f, 0.158007f, 0.113714f,
-0.0276191f, 0.0680749f, -0.130012f, 0.0796126f, 0.133067f,
-0.51495f, 0.747578f, -0.128742f, 5.98112f, -1.16698f,
--0.276492f, -1.73549f, -3.90234f, 2.01489f, -0.040118f,
--0.113002f, -0.146751f, -0.113569f, 0.0534873f, 0.0989832f,
-0.0872875f, 0.049266f, 0.0367557f, -0.00889148f, -0.0648461f,
--0.00190352f, 0.0143773f, 0.0259364f, -0.0592133f, -0.0672924f,
-0.1399f, -0.0987886f, -0.347402f, 0.101326f, -0.0680876f,
-0.469186f, 0.246922f, 10.4017f, 3.44846f, -0.662725f,
--0.0328208f, -0.0561274f, -0.0167744f, 0.00044282f, -0.0457645f,
--0.0408314f, -0.013113f, -0.0373873f, -0.0474122f, -0.0273745f,
--0.0308505f, 0.000582959f, -0.0421135f, 0.464859f, 0.196842f,
-0.320538f, 0.0435528f, -0.200168f, 0.266475f, -0.0853727f,
-1.20397f, 0.711542f, -1.04397f, -1.47759f, 1.26768f,
-0.446958f, 0.266477f, -0.30802f, 0.28431f, -0.118541f,
-0.00836345f, 0.0689026f, -0.0137996f, -0.0395417f, 0.26982f,
--0.206255f, 0.16066f, 0.114757f, 0.359587f, -0.106503f,
--0.0948534f, 0.175358f, -0.122966f, -0.0056675f, 0.483848f,
--0.134916f, -0.427567f, -0.140172f, -1.0866f, -2.73921f,
-0.549843f, 0.17685f, 0.0010675f, -0.00137386f, 0.0884424f,
--0.0698736f, -0.00174136f, 0.0718775f, -0.0396849f, 0.0448056f,
-0.0577853f, -0.0372353f, 0.134599f, 0.0260656f, 0.140322f,
-0.22704f, -0.020568f, -0.0142424f, -0.21723f, -0.997704f,
--0.884573f, -0.163495f, 2.33617f, 0.224142f, 0.19635f,
--0.957387f, 0.144678f, 1.47035f, -0.00700498f, -0.0472309f,
--0.0137848f, -0.0189145f, 0.00856479f, 0.0316965f, 0.00613373f,
-0.00209807f, 0.00270964f, -0.0490206f, 0.0105712f, -0.0465045f,
--0.0381532f, -0.0985268f, -0.108297f, 0.0146409f, -0.0040718f,
--0.0698572f, -0.380568f, -0.230479f, 3.98917f, 0.457652f,
--1.02355f, -7.4435f, -0.475314f, 1.61743f, 0.0254017f,
--0.00791293f, 0.047217f, 0.0220995f, -0.0304311f, 0.0052168f,
--0.0404054f, -0.0230293f, 0.00169229f, -0.0138178f, 0.0043137f,
--0.0598088f, -0.133601f, 0.0555138f, -0.177358f, -0.159856f,
--0.137281f, 0.108051f, -0.305973f, 0.393775f, 0.0747287f,
-0.783993f, -0.875086f, 1.06862f, 0.340519f, -0.352681f,
--0.0830912f, -0.100017f, 0.0729085f, -0.00829403f, 0.027489f,
--0.0779597f, 0.082286f, -0.164181f, -0.41519f, 0.00282335f,
--0.29573f, 0.125571f, 0.726935f, 0.392137f, 0.491348f,
-0.0723196f, -0.0259758f, -0.0636332f, -0.452384f, -0.000225974f,
--2.34001f, 2.45211f, -0.544628f, 5.62944f, -3.44507f,
+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
/* output layer */
--3.13835f, 0.994751f, 0.444901f, 1.59518f, 1.23665f,
-3.37012f, -1.34606f, 1.99131f, 1.33476f, 1.3885f,
-1.12559f, };
+-0.381439, 0.12115, -0.906927, 2.93878, 1.6388,
+0.882811, 0.874344, 1.21726, -0.874545, 0.321706,
+0.785055, 0.946558, -0.575066, -3.46553, 0.884905,
+0.0924047, -9.90712, 0.391338, 0.160103, -2.04954,
+4.1455, 0.0684029, -0.144761, -0.285282, 0.379244,
+-1.1584, -0.0277241, -9.85, -4.82386, 3.71333,
+3.87308, 3.52558, };
-static const int topo[3] = {25, 10, 1};
+static const int topo[3] = {25, 15, 2};
const MLP net = {
- 3,
- topo,
- weights
+ 3,
+ topo,
+ weights
};
-
diff --git a/src/mlp_train.c b/src/mlp_train.c
index 5fbbff08..2e9568ba 100644
--- a/src/mlp_train.c
+++ b/src/mlp_train.c
@@ -106,6 +106,7 @@ MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int
}
#define MAX_NEURONS 100
+#define MAX_OUT 10
double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
{
@@ -120,7 +121,8 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
double netOut[MAX_NEURONS];
double error[MAX_NEURONS];
- *error_rate = 0;
+ for (i=0;i<outDim;i++)
+ error_rate[i] = 0;
topo = net->topo;
inDim = net->topo[0];
hiddenDim = net->topo[1];
@@ -153,7 +155,7 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
netOut[i] = tansig_approx(sum);
error[i] = out[i] - netOut[i];
rms += error[i]*error[i];
- *error_rate += fabs(error[i])>1;
+ error_rate[i] += fabs(error[i])>1;
/*error[i] = error[i]/(1+fabs(error[i]));*/
}
/* Back-propagate error */
@@ -194,7 +196,7 @@ struct GradientArg {
double *W0_grad;
double *W1_grad;
double rms;
- double error_rate;
+ double error_rate[MAX_OUT];
};
void *gradient_thread_process(void *_arg)
@@ -213,7 +215,7 @@ void *gradient_thread_process(void *_arg)
sem_wait(&sem_begin[arg->id]);
if (arg->done)
break;
- arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);
+ arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);
sem_post(&sem_end[arg->id]);
}
fprintf(stderr, "done\n");
@@ -295,7 +297,7 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
for (e=0;e<nbEpoch;e++)
{
double rms=0;
- double error_rate = 0;
+ double error_rate[2] = {0,0};
for (i=0;i<NB_THREADS;i++)
{
sem_post(&sem_begin[i]);
@@ -306,7 +308,8 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
{
sem_wait(&sem_end[i]);
rms += args[i].rms;
- error_rate += args[i].error_rate;
+ error_rate[0] += args[i].error_rate[0];
+ error_rate[1] += args[i].error_rate[1];
for (j=0;j<W0_size;j++)
W0_grad[j] += args[i].W0_grad[j];
for (j=0;j<W1_size;j++)
@@ -315,8 +318,9 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
float mean_rate = 0, min_rate = 1e10;
rms = (rms/(outDim*nbSamples));
- error_rate = (error_rate/(outDim*nbSamples));
- fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);
+ error_rate[0] = (error_rate[0]/(nbSamples));
+ error_rate[1] = (error_rate[1]/(nbSamples));
+ fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);
if (rms < best_rms)
{
best_rms = rms;
@@ -445,6 +449,7 @@ int main(int argc, char **argv)
outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
seed = time(NULL);
+ /*seed = 1361480659;*/
fprintf (stderr, "Seed is %u\n", seed);
srand(seed);
build_tansig_table();
diff --git a/src/opus_demo.c b/src/opus_demo.c
index 09b12a33..a0acb0cd 100644
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@@ -53,6 +53,7 @@ void print_usage( char* argv[] )
fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" );
fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" );
fprintf(stderr, "-cvbr : enable constrained variable bitrate; default: unconstrained\n" );
+ fprintf(stderr, "-variable-duration : enable frames of variable duration (experts only); default: disabled\n" );
fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );
@@ -221,6 +222,8 @@ int main(int argc, char *argv[])
short *in, *out;
int application=OPUS_APPLICATION_AUDIO;
double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
+ double tot_samples=0;
+ opus_uint64 tot_in, tot_out;
int bandwidth=-1;
const char *bandwidth_string;
int lost = 0, lost_prev = 1;
@@ -239,6 +242,10 @@ int main(int argc, char *argv[])
int curr_mode=0;
int curr_mode_count=0;
int mode_switch_time = 48000;
+ int nb_encoded;
+ int remaining=0;
+ int variable_duration=OPUS_FRAMESIZE_ARG;
+ int delayed_decision=0;
if (argc < 5 )
{
@@ -246,6 +253,7 @@ int main(int argc, char *argv[])
return EXIT_FAILURE;
}
+ tot_in=tot_out=0;
fprintf(stderr, "%s\n", opus_get_version_string());
args = 1;
@@ -306,7 +314,7 @@ int main(int argc, char *argv[])
forcechannels = OPUS_AUTO;
use_dtx = 0;
packet_loss_perc = 0;
- max_frame_size = 960*6;
+ max_frame_size = 2*48000;
curr_read=0;
while( args < argc - 2 ) {
@@ -374,6 +382,14 @@ int main(int argc, char *argv[])
check_encoder_option(decode_only, "-cvbr");
cvbr = 1;
args++;
+ } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {
+ check_encoder_option(decode_only, "-variable-duration");
+ variable_duration = OPUS_FRAMESIZE_VARIABLE;
+ args++;
+ } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {
+ check_encoder_option(decode_only, "-delayed-decision");
+ delayed_decision = 1;
+ args++;
} else if( strcmp( argv[ args ], "-dtx") == 0 ) {
check_encoder_option(decode_only, "-dtx");
use_dtx = 1;
@@ -499,6 +515,7 @@ int main(int argc, char *argv[])
opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));
opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));
+ opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
}
if (!encode_only)
{
@@ -554,6 +571,26 @@ int main(int argc, char *argv[])
if ( use_inbandfec ) {
data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
}
+ if(delayed_decision)
+ {
+ if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)
+ {
+ if (frame_size==sampling_rate/400)
+ variable_duration = OPUS_FRAMESIZE_2_5_MS;
+ else if (frame_size==sampling_rate/200)
+ variable_duration = OPUS_FRAMESIZE_5_MS;
+ else if (frame_size==sampling_rate/100)
+ variable_duration = OPUS_FRAMESIZE_10_MS;
+ else if (frame_size==sampling_rate/50)
+ variable_duration = OPUS_FRAMESIZE_20_MS;
+ else if (frame_size==sampling_rate/25)
+ variable_duration = OPUS_FRAMESIZE_40_MS;
+ else
+ variable_duration = OPUS_FRAMESIZE_60_MS;
+ opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+ }
+ frame_size = 2*48000;
+ }
while (!stop)
{
if (delayed_celt)
@@ -617,22 +654,28 @@ int main(int argc, char *argv[])
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
frame_size = mode_list[curr_mode][2];
}
- err = fread(fbytes, sizeof(short)*channels, frame_size, fin);
+ err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
curr_read = err;
+ tot_in += curr_read;
for(i=0;i<curr_read*channels;i++)
{
opus_int32 s;
s=fbytes[2*i+1]<<8|fbytes[2*i];
s=((s&0xFFFF)^0x8000)-0x8000;
- in[i]=s;
+ in[i+remaining*channels]=s;
}
- if (curr_read < frame_size)
+ if (curr_read+remaining < frame_size)
{
- for (i=curr_read*channels;i<frame_size*channels;i++)
+ for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)
in[i] = 0;
- stop = 1;
+ if (encode_only || decode_only)
+ stop = 1;
}
len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
+ nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);
+ remaining = frame_size-nb_encoded;
+ for(i=0;i<remaining*channels;i++)
+ in[i] = in[nb_encoded*channels+i];
if (sweep_bps!=0)
{
bitrate_bps += sweep_bps;
@@ -681,6 +724,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "Error writing.\n");
return EXIT_FAILURE;
}
+ tot_samples += nb_encoded;
} else {
int output_samples;
lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);
@@ -703,6 +747,11 @@ int main(int argc, char *argv[])
}
if (output_samples>0)
{
+ if (!decode_only && tot_out + output_samples > tot_in)
+ {
+ stop=1;
+ output_samples = tot_in-tot_out;
+ }
if (output_samples>skip) {
int i;
for(i=0;i<(output_samples-skip)*channels;i++)
@@ -716,6 +765,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "Error writing.\n");
return EXIT_FAILURE;
}
+ tot_out += output_samples-skip;
}
if (output_samples<skip) skip -= output_samples;
else skip = 0;
@@ -723,6 +773,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "error decoding frame: %s\n",
opus_strerror(output_samples));
}
+ tot_samples += output_samples;
}
}
@@ -767,7 +818,7 @@ int main(int argc, char *argv[])
toggle = (toggle + use_inbandfec) & 1;
}
fprintf (stderr, "average bitrate: %7.3f kb/s\n",
- 1e-3*bits*sampling_rate/(frame_size*(double)count));
+ 1e-3*bits*sampling_rate/tot_samples);
fprintf (stderr, "maximum bitrate: %7.3f kb/s\n",
1e-3*bits_max*sampling_rate/frame_size);
if (!decode_only)
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 0daeb020..a8074473 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -67,6 +67,7 @@ struct OpusEncoder {
opus_int32 Fs;
int use_vbr;
int vbr_constraint;
+ int variable_duration;
opus_int32 bitrate_bps;
opus_int32 user_bitrate_bps;
int lsb_depth;
@@ -89,7 +90,8 @@ struct OpusEncoder {
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
#ifndef FIXED_POINT
TonalityAnalysisState analysis;
- int detected_bandwidth;
+ int detected_bandwidth;
+ int analysis_offset;
#endif
opus_uint32 rangeFinal;
};
@@ -213,6 +215,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->voice_ratio = -1;
st->encoder_buffer = st->Fs/100;
st->lsb_depth = 24;
+ st->variable_duration = OPUS_FRAMESIZE_ARG;
/* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
+ 1.5 ms for SILK resamplers and stereo prediction) */
@@ -535,8 +538,258 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
return st->user_bitrate_bps;
}
+#ifndef FIXED_POINT
+/* Don't use more than 60 ms for the frame size analysis */
+#define MAX_DYNAMIC_FRAMESIZE 24
+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
+{
+ int i;
+ int M;
+ float sumE=0, sumE_1=0;
+ float metric;
+
+ M = IMIN(maxM, (1<<LM)+1);
+ for (i=0;i<M;i++)
+ {
+ sumE += E[i];
+ sumE_1 += E_1[i];
+ }
+ metric = sumE*sumE_1/(M*M);
+ /*if (LM==3)
+ printf("%f\n", metric);*/
+ /*return metric>10 ? 1 : 0;*/
+ /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
+ return MIN16(1,sqrt(MAX16(0,.05*(metric-2))));
+}
+
+/* Viterbi decoding trying to find the best frame size combination using look-ahead
+
+ State numbering:
+ 0: unused
+ 1: 2.5 ms
+ 2: 5 ms (#1)
+ 3: 5 ms (#2)
+ 4: 10 ms (#1)
+ 5: 10 ms (#2)
+ 6: 10 ms (#3)
+ 7: 10 ms (#4)
+ 8: 20 ms (#1)
+ 9: 20 ms (#2)
+ 10: 20 ms (#3)
+ 11: 20 ms (#4)
+ 12: 20 ms (#5)
+ 13: 20 ms (#6)
+ 14: 20 ms (#7)
+ 15: 20 ms (#8)
+*/
+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
+{
+ int i;
+ float cost[MAX_DYNAMIC_FRAMESIZE][16];
+ int states[MAX_DYNAMIC_FRAMESIZE][16];
+ float best_cost;
+ int best_state;
+
+ /* Makes variable framesize less aggressive at lower bitrates, but I can't
+ find any valid theretical justification for this (other than it seems
+ to help) */
+ frame_cost *= 720/rate;
+ for (i=0;i<16;i++)
+ {
+ /* Impossible state */
+ states[0][i] = -1;
+ cost[0][i] = 1e10;
+ }
+ for (i=0;i<4;i++)
+ {
+ cost[0][1<<i] = frame_cost + rate*(1<<i)*transient_boost(E, E_1, i, N+1);
+ states[0][1<<i] = i;
+ }
+ for (i=1;i<N;i++)
+ {
+ int j;
+
+ /* Follow continuations */
+ for (j=2;j<16;j++)
+ {
+ cost[i][j] = cost[i-1][j-1];
+ states[i][j] = j-1;
+ }
+
+ /* New frames */
+ for(j=0;j<4;j++)
+ {
+ int k;
+ float min_cost;
+ float curr_cost;
+ states[i][1<<j] = 1;
+ min_cost = cost[i-1][1];
+ for(k=1;k<4;k++)
+ {
+ float tmp = cost[i-1][(1<<(k+1))-1];
+ if (tmp < min_cost)
+ {
+ states[i][1<<j] = (1<<(k+1))-1;
+ min_cost = tmp;
+ }
+ }
+ curr_cost = frame_cost+rate*(1<<j)*transient_boost(E+i, E_1+i, j, N-i+1);
+ cost[i][1<<j] = min_cost;
+ /* If part of the frame is outside the analysis window, only count part of the cost */
+ if (N-i < (1<<j))
+ cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
+ else
+ cost[i][1<<j] += curr_cost;
+ }
+ }
+
+ best_state=1;
+ best_cost = cost[N-1][1];
+ /* Find best end state (doesn't force a frame to end at N-1) */
+ for (i=2;i<16;i++)
+ {
+ if (cost[N-1][i]<best_cost)
+ {
+ best_cost = cost[N-1][i];
+ best_state = i;
+ }
+ }
+
+ /* Follow transitions back */
+ for (i=N-1;i>=0;i--)
+ {
+ /*printf("%d ", best_state);*/
+ best_state = states[i][best_state];
+ }
+ /*printf("%d\n", best_state);*/
+ return best_state;
+}
+
+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)
+{
+ const float *x;
+ int c, j;
+ x = (const float *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = x[(j+offset)*C];
+ for (c=1;c<C;c++)
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c];
+}
+
+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)
+{
+ const opus_int16 *x;
+ int c, j;
+ x = (const opus_int16 *)_x;
+ for (j=0;j<subframe;j++)
+ sub[j] = x[(j+offset)*C];
+ for (c=1;c<C;c++)
+ for (j=0;j<subframe;j++)
+ sub[j] += x[(j+offset)*C+c];
+}
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+ int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
+ downmix_func downmix)
+{
+ int N;
+ int i;
+ float e[MAX_DYNAMIC_FRAMESIZE+4];
+ float e_1[MAX_DYNAMIC_FRAMESIZE+3];
+ float memx;
+ int bestLM=0;
+ int subframe;
+ int pos;
+ VARDECL(opus_val16, sub);
+
+ subframe = Fs/400;
+ ALLOC(sub, subframe, opus_val16);
+ e[0]=mem[0];
+ e_1[0]=1./(EPSILON+mem[0]);
+ if (buffering)
+ {
+ /* Consider the CELT delay when not in restricted-lowdelay */
+ /* We assume the buffering is between 2.5 and 5 ms */
+ int offset = 2*subframe - buffering;
+ celt_assert(offset>=0 && offset <= subframe);
+ x += C*offset;
+ len -= offset;
+ e[1]=mem[1];
+ e_1[1]=1./(EPSILON+mem[1]);
+ e[2]=mem[2];
+ e_1[2]=1./(EPSILON+mem[2]);
+ pos = 3;
+ } else {
+ pos=1;
+ }
+ N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
+ memx = x[0];
+ for (i=0;i<N;i++)
+ {
+ float tmp;
+ float tmpx;
+ int j;
+ tmp=EPSILON;
+
+ downmix(x, sub, subframe, i*subframe, C);
+ if (i==0)
+ memx = sub[0];
+ for (j=0;j<subframe;j++)
+ {
+ tmpx = sub[j];
+ tmp += (tmpx-memx)*(tmpx-memx);
+ memx = tmpx;
+ }
+ e[i+pos] = tmp;
+ e_1[i+pos] = 1.f/tmp;
+ }
+ /* Hack to get 20 ms working with APPLICATION_AUDIO
+ The real problem is that the corresponding memory needs to use 1.5 ms
+ from this frame and 1 ms from the next frame */
+ e[i+pos] = e[i+pos-1];
+ if (buffering)
+ N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
+ bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(40*C+40), bitrate/400);
+ mem[0] = e[1<<bestLM];
+ if (buffering)
+ {
+ mem[1] = e[(1<<bestLM)+1];
+ mem[2] = e[(1<<bestLM)+2];
+ }
+ return bestLM;
+}
+
+#endif
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
+{
+ int new_size;
+ if (frame_size<Fs/400)
+ return -1;
+ if (variable_duration == OPUS_FRAMESIZE_ARG)
+ new_size = frame_size;
+ else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
+ new_size = Fs/50;
+ else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
+ new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
+ else
+ return -1;
+ if (new_size>frame_size)
+ return -1;
+ if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
+ 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
+ return -1;
+ return new_size;
+}
+
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
- unsigned char *data, opus_int32 out_data_bytes, int lsb_depth)
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
+#ifndef FIXED_POINT
+ , AnalysisInfo *analysis_info
+#endif
+ )
{
void *silk_enc;
CELTEncoder *celt_enc;
@@ -563,11 +816,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int curr_bandwidth;
opus_val16 HB_gain;
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
- int extra_buffer, total_buffer;
- int perform_analysis=0;
-#ifndef FIXED_POINT
- AnalysisInfo analysis_info;
-#endif
+ int total_buffer;
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -575,36 +824,37 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
max_data_bytes = IMIN(1276, out_data_bytes);
st->rangeFinal = 0;
- if (400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
+ if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs)
- {
- RESTORE_STACK;
- return OPUS_BAD_ARG;
- }
- if (max_data_bytes<=0)
+ || (400*frame_size < st->Fs)
+ || max_data_bytes<=0
+ )
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
-
- lsb_depth = IMIN(lsb_depth, st->lsb_depth);
-
-#ifndef FIXED_POINT
- perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;
-#endif
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
delay_compensation = 0;
else
delay_compensation = st->delay_compensation;
- if (perform_analysis)
+
+ lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+
+ st->voice_ratio = -1;
+
+#ifndef FIXED_POINT
+ st->detected_bandwidth = 0;
+ if (analysis_info->valid)
{
- total_buffer = IMAX(st->Fs/200, delay_compensation);
- } else {
- total_buffer = delay_compensation;
+ if (st->signal_type == OPUS_AUTO)
+ st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));
+ st->detected_bandwidth = analysis_info->opus_bandwidth;
}
- extra_buffer = total_buffer-delay_compensation;
+#endif
+
+ total_buffer = delay_compensation;
st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
frame_rate = st->Fs/frame_size;
@@ -916,7 +1166,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
if (to_celt && i==nb_frames-1)
st->user_forced_mode = MODE_CELT_ONLY;
- tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth);
+ tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth
+#ifndef FIXED_POINT
+ , analysis_info
+#endif
+ );
if (tmp_len<0)
{
RESTORE_STACK;
@@ -942,7 +1196,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
RESTORE_STACK;
return ret;
}
-
curr_bandwidth = st->bandwidth;
/* Chooses the appropriate mode for speech
@@ -981,22 +1234,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
-#ifndef FIXED_POINT
- if (perform_analysis)
- {
- int nb_analysis_frames;
- nb_analysis_frames = frame_size/(st->Fs/100);
- for (i=0;i<nb_analysis_frames;i++)
- tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);
- if (st->signal_type == OPUS_AUTO)
- st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
- st->detected_bandwidth = analysis_info.opus_bandwidth;
- } else {
- analysis_info.valid = 0;
- st->voice_ratio = -1;
- st->detected_bandwidth = 0;
- }
-#endif
+
/* SILK processing */
HB_gain = Q15ONE;
@@ -1205,9 +1443,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
} else {
if (st->use_vbr)
{
+ opus_int32 bonus=0;
+#ifndef FIXED_POINT
+ if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
+ {
+ bonus = (40*st->stream_channels+40)*(st->Fs/frame_size-50);
+ if (analysis_info->valid)
+ bonus = bonus*(1.f+.5*analysis_info->tonality);
+ }
+#endif
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
} else {
nb_compr_bytes = bytes_target;
@@ -1222,7 +1469,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
{
for (i=0;i<st->channels*st->Fs/400;i++)
- tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+ tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
}
for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
@@ -1236,7 +1483,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
const CELTMode *celt_mode;
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
- gain_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels,
+ gain_fade(pcm_buf, pcm_buf,
st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
}
st->prev_HB_gain = HB_gain;
@@ -1258,7 +1505,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
g1 *= (1.f/16384);
g2 *= (1.f/16384);
#endif
- stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,
+ stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
frame_size, st->channels, celt_mode->window, st->Fs);
st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
}
@@ -1312,7 +1559,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int err;
celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
- err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+ err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
if (err < 0)
{
RESTORE_STACK;
@@ -1339,10 +1586,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (ec_tell(&enc) <= 8*nb_compr_bytes)
{
#ifndef FIXED_POINT
- if (perform_analysis)
- celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
#endif
- ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);
+ ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
if (ret < 0)
{
RESTORE_STACK;
@@ -1365,9 +1611,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
/* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
- celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);
+ celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
- err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+ err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
if (err < 0)
{
RESTORE_STACK;
@@ -1440,6 +1686,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
VARDECL(opus_int16, in);
ALLOC_STACK;
+ frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
if(frame_size<0)
{
RESTORE_STACK;
@@ -1459,6 +1706,12 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{
+ frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+ if(frame_size<0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);
}
@@ -1467,21 +1720,74 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int i, ret;
+ const CELTMode *celt_mode;
+ int delay_compensation;
+ int lsb_depth;
VARDECL(float, in);
+ AnalysisInfo analysis_info;
ALLOC_STACK;
+ opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+
+ lsb_depth = IMIN(16, st->lsb_depth);
+
+ analysis_info.valid = 0;
+ if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+ {
+ frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
+ frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);
+ } else {
+ frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+ }
+ if(frame_size<0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
ALLOC(in, frame_size*st->channels, float);
for (i=0;i<frame_size*st->channels;i++)
in[i] = (1.0f/32768)*pcm[i];
- ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);
+ ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);
RESTORE_STACK;
return ret;
}
opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{
- return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24);
+ const CELTMode *celt_mode;
+ int delay_compensation;
+ int lsb_depth;
+ AnalysisInfo analysis_info;
+
+ opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+
+ lsb_depth = IMIN(24, st->lsb_depth);
+
+ analysis_info.valid = 0;
+ if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+ {
+ frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
+ frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);
+ } else {
+ frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+ }
+ if(frame_size<0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);
}
#endif
@@ -1750,6 +2056,18 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
*value = st->lsb_depth;
}
break;
+ case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ st->variable_duration = value;
+ }
+ break;
+ case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ *value = st->variable_duration;
+ }
+ break;
case OPUS_RESET_STATE:
{
void *silk_enc;
@@ -1779,6 +2097,15 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
st->user_forced_mode = value;
}
break;
+
+ case CELT_GET_MODE_REQUEST:
+ {
+ const CELTMode ** value = va_arg(ap, const CELTMode**);
+ if (value==0)
+ goto bad_arg;
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
+ }
+ break;
default:
/* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
ret = OPUS_UNIMPLEMENTED;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index db9fc785..c6204185 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -36,10 +36,14 @@
#include <stdarg.h>
#include "float_cast.h"
#include "os_support.h"
+#include "analysis.h"
struct OpusMSEncoder {
+ TonalityAnalysisState analysis;
ChannelLayout layout;
- int bitrate;
+ int variable_duration;
+ opus_int32 bitrate_bps;
+ opus_val32 subframe_mem[3];
/* Encoder states go here */
};
@@ -102,6 +106,8 @@ int opus_multistream_encoder_init(
st->layout.nb_streams = streams;
st->layout.nb_coupled_streams = coupled_streams;
+ st->bitrate_bps = OPUS_AUTO;
+ st->variable_duration = OPUS_FRAMESIZE_ARG;
for (i=0;i<st->layout.nb_channels;i++)
st->layout.mapping[i] = mapping[i];
if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
@@ -182,6 +188,10 @@ static int opus_multistream_encode_native
unsigned char *data,
opus_int32 max_data_bytes,
int lsb_depth
+#ifndef FIXED_POINT
+ , downmix_func downmix
+ , const void *pcm_analysis
+#endif
)
{
opus_int32 Fs;
@@ -193,10 +203,43 @@ static int opus_multistream_encode_native
VARDECL(opus_val16, buf);
unsigned char tmp_data[MS_FRAME_TMP];
OpusRepacketizer rp;
+ int orig_frame_size;
+ int coded_channels;
+ opus_int32 channel_rate;
+ opus_int32 complexity;
+ AnalysisInfo analysis_info;
+ const CELTMode *celt_mode;
ALLOC_STACK;
ptr = (char*)st + align(sizeof(OpusMSEncoder));
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));
+ opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
+
+ if (400*frame_size < Fs)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+ orig_frame_size = IMIN(frame_size,Fs/50);
+#ifndef FIXED_POINT
+ analysis_info.valid = 0;
+ if (complexity >= 7 && Fs==48000)
+ {
+ opus_int32 delay_compensation;
+ int channels;
+
+ channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+ opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
+ delay_compensation -= Fs/400;
+
+ frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,
+ frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);
+ } else
+#endif
+ {
+ frame_size = frame_size_select(frame_size, st->variable_duration, Fs);
+ }
/* Validate frame_size before using it to allocate stack space.
This mirrors the checks in opus_encode[_float](). */
if (400*frame_size != Fs && 200*frame_size != Fs &&
@@ -215,6 +258,39 @@ static int opus_multistream_encode_native
RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
}
+
+ /* Compute bitrate allocation between streams (this could be a lot better) */
+ coded_channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+ if (st->bitrate_bps==OPUS_AUTO)
+ {
+ channel_rate = Fs+60*Fs/orig_frame_size;
+ } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+ {
+ channel_rate = 300000;
+ } else {
+ channel_rate = st->bitrate_bps/coded_channels;
+ }
+#ifndef FIXED_POINT
+ if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+ {
+ opus_int32 bonus;
+ bonus = 60*(Fs/frame_size-50);
+ channel_rate += bonus;
+ }
+#endif
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ OpusEncoder *enc;
+ enc = (OpusEncoder*)ptr;
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ opus_encoder_ctl(enc, OPUS_SET_BITRATE(channel_rate * (s < st->layout.nb_coupled_streams ? 2 : 1)));
+ }
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
/* Counting ToC */
tot_size = 0;
for (s=0;s<st->layout.nb_streams;s++)
@@ -246,7 +322,11 @@ static int opus_multistream_encode_native
/* Reserve three bytes for the last stream and four for the others */
curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
curr_max = IMIN(curr_max,MS_FRAME_TMP);
- len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth);
+ len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth
+#ifndef FIXED_POINT
+ , &analysis_info
+#endif
+ );
if (len<0)
{
RESTORE_STACK;
@@ -345,8 +425,9 @@ int opus_multistream_encode_float
opus_int32 max_data_bytes
)
{
+ int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
- pcm, frame_size, data, max_data_bytes, 24);
+ pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset);
}
int opus_multistream_encode(
@@ -357,8 +438,9 @@ int opus_multistream_encode(
opus_int32 max_data_bytes
)
{
+ int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
- pcm, frame_size, data, max_data_bytes, 16);
+ pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset);
}
#endif
@@ -378,20 +460,10 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
{
case OPUS_SET_BITRATE_REQUEST:
{
- int chan, s;
opus_int32 value = va_arg(ap, opus_int32);
- chan = st->layout.nb_streams + st->layout.nb_coupled_streams;
- value /= chan;
- for (s=0;s<st->layout.nb_streams;s++)
- {
- OpusEncoder *enc;
- enc = (OpusEncoder*)ptr;
- if (s < st->layout.nb_coupled_streams)
- ptr += align(coupled_size);
- else
- ptr += align(mono_size);
- opus_encoder_ctl(enc, request, value * (s < st->layout.nb_coupled_streams ? 2 : 1));
- }
+ if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
+ goto bad_arg;
+ st->bitrate_bps = value;
}
break;
case OPUS_GET_BITRATE_REQUEST:
@@ -504,7 +576,21 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
}
*value = (OpusEncoder*)ptr;
}
- break;
+ break;
+ case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if (value<0 || value>1)
+ goto bad_arg;
+ st->variable_duration = value;
+ }
+ break;
+ case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ *value = st->variable_duration;
+ }
+ break;
default:
ret = OPUS_UNIMPLEMENTED;
break;
@@ -512,6 +598,9 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
va_end(ap);
return ret;
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
}
void opus_multistream_encoder_destroy(OpusMSEncoder *st)
diff --git a/src/opus_private.h b/src/opus_private.h
index c9a4ff53..2caac689 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -31,6 +31,7 @@
#include "arch.h"
#include "opus.h"
+#include "celt.h"
struct OpusRepacketizer {
unsigned char toc;
@@ -81,11 +82,24 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
#define OPUS_SET_FORCE_MODE_REQUEST 11002
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
+typedef void (*downmix_func)(const void *, float *, int, int, int);
+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);
+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+ int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
+ downmix_func downmix);
int encode_size(int size, unsigned char *data);
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
- unsigned char *data, opus_int32 out_data_bytes, int lsb_depth);
+ unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
+#ifndef FIXED_POINT
+ , AnalysisInfo *analysis_info
+#endif
+ );
int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,