diff options
author | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2009-11-08 16:29:54 +0300 |
---|---|---|
committer | Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> | 2009-11-08 16:36:51 +0300 |
commit | 294863bbea2d218d58c294b7b9ff60f221494c3d (patch) | |
tree | c77685e10d60f782c3c00d813f9d188d092c1100 | |
parent | 5ed24f8155adf59a4134c92383c19dc3cae68033 (diff) |
Temporal pitch search
-rw-r--r-- | libcelt/celt.c | 16 | ||||
-rw-r--r-- | libcelt/pitch.c | 153 | ||||
-rw-r--r-- | libcelt/pitch.h | 2 |
3 files changed, 168 insertions, 3 deletions
diff --git a/libcelt/celt.c b/libcelt/celt.c index c7d1c09..33f8a30 100644 --- a/libcelt/celt.c +++ b/libcelt/celt.c @@ -104,6 +104,8 @@ struct CELTEncoder { celt_sig *in_mem; celt_sig *out_mem; + celt_word16 *pitch_buf; + celt_sig xmem; celt_word16 *oldBandE; #ifdef EXP_PSY @@ -185,6 +187,7 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode, int channels, int *error) st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig)); st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig)); + st->pitch_buf = celt_alloc((MAX_PERIOD>>1)*sizeof(celt_word16)); st->oldBandE = (celt_word16*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16)); @@ -657,7 +660,8 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig && norm_rate < 50; if (has_pitch) { - find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C); + /*find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C);*/ + find_temporal_pitch(st->mode, in, st->pitch_buf, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C, &st->xmem); } /* Deferred allocation after find_spectral_pitch() to reduce @@ -667,7 +671,6 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig ALLOC(pitch_freq, C*N, celt_sig); /**< Interleaved signal MDCTs */ if (has_pitch) { - compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, pitch_freq, C); has_pitch = compute_pitch_gain(st->mode, freq, pitch_freq, norm_rate, &gain_id, C, &st->gain_prod); } @@ -1247,7 +1250,14 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p if (st->loss_count == 0) { - find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index, C); + celt_word16 pitch_buf[MAX_PERIOD]; + celt_word32 tmp=0; + /*find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index, C);*/ + /* FIXME: Should do a bit of interpolation while decimating */ + for (i=0;i<MAX_PERIOD;i++) + pitch_buf[i] = EXTRACT16(SHR32(st->out_mem[2*i], SIG_SHIFT)); + find_temporal_pitch(st->mode, st->out_mem+MAX_PERIOD-len, pitch_buf, len, MAX_PERIOD-len-100, &pitch_index, C, &tmp); + pitch_index = MAX_PERIOD-len-pitch_index; st->last_pitch_index = pitch_index; } else { diff --git a/libcelt/pitch.c b/libcelt/pitch.c index e18323f..fc68d38 100644 --- a/libcelt/pitch.c +++ b/libcelt/pitch.c @@ -245,5 +245,158 @@ void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyD /* The peak in the correlation gives us the pitch */ *pitch = find_max16(Y, max_pitch); + /*printf ("%d ", *pitch);*/ RESTORE_STACK; } + +void find_best_pitch(celt_word32 *xcorr, celt_word32 maxcorr, celt_word16 *y, int yshift, int len, int max_pitch, int best_pitch[2]) +{ + int i, j; + celt_word32 Syy=1; + celt_word16 best_num[2]; + celt_word32 best_den[2]; +#ifdef FIXED_POINT + int xshift; + + xshift = celt_ilog2(maxcorr)-14; +#endif + + best_num[0] = -1; + best_num[1] = -1; + best_den[0] = 0; + best_den[1] = 0; + best_pitch[0] = 0; + best_pitch[1] = 1; + for (j=0;j<len;j++) + Syy = MAC16_16(Syy, y[j],y[j]); + for (i=0;i<max_pitch;i++) + { + float score; + if (xcorr[i]>0) + { + celt_word16 num; + celt_word32 xcorr16; + xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); + num = MULT16_16_Q15(xcorr16,xcorr16); + score = num*1./Syy; + if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) + { + if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) + { + best_num[1] = best_num[0]; + best_den[1] = best_den[0]; + best_pitch[1] = best_pitch[0]; + best_num[0] = num; + best_den[0] = Syy; + best_pitch[0] = i; + } else { + best_num[1] = num; + best_den[1] = Syy; + best_pitch[1] = i; + } + } + } + Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); + Syy = MAX32(1, Syy); + } +} + +void find_temporal_pitch(const CELTMode *m, const celt_sig * restrict x, celt_word16 * restrict y, int len, int max_pitch, int *pitch, int _C, celt_sig *xmem) +{ + int i, j; + const int C = CHANNELS(_C); + const int lag = MAX_PERIOD; + const int N = FRAMESIZE(m); + int best_pitch[2]={0}; + celt_word16 x_lp[len>>1]; + celt_word16 x_lp4[len>>2]; + celt_word16 y_lp4[lag>>2]; + celt_word32 xcorr[max_pitch>>1]; + celt_word32 maxcorr=1; + int offset; + int shift=0; + + /* Down-sample by two and downmix to mono */ + for (i=1;i<len>>1;i++) + x_lp[i] = SHR32(HALF32(HALF32(x[(2*i-1)*C]+x[(2*i+1)*C])+x[2*i*C]), SIG_SHIFT); + x_lp[0] = SHR32(HALF32(HALF32(*xmem+x[C])+x[0]), SIG_SHIFT); + *xmem = x[N-C]; + if (C==2) + { + for (i=1;i<len>>1;i++) + x_lp[i] = SHR32(HALF32(HALF32(x[(2*i-1)*C+1]+x[(2*i+1)*C+1])+x[2*i*C+1]), SIG_SHIFT); + x_lp[0] += SHR32(HALF32(HALF32(x[C+1])+x[1]), SIG_SHIFT); + *xmem += x[N-C+1]; + } + + /* Downsample by 2 again */ + for (j=0;j<len>>2;j++) + x_lp4[j] = x_lp[2*j]; + for (j=0;j<lag>>2;j++) + y_lp4[j] = y[2*j]; + +#ifdef FIXED_POINT + shift = celt_ilog2(MAX16(celt_maxabs16(x_lp4, len>>2), celt_maxabs16(y_lp4, lag>>2)))-11; + if (shift>0) + { + for (j=0;j<len>>2;j++) + x_lp4[j] = SHR16(x_lp4[j], shift); + for (j=0;j<lag>>2;j++) + y_lp4[j] = SHR16(y_lp4[j], shift); + /* Use double the shift for a MAC */ + shift *= 2; + } else { + shift = 0; + } +#endif + + /* Coarse search with 4x decimation */ + + for (i=0;i<max_pitch>>2;i++) + { + celt_word32 sum = 0; + for (j=0;j<len>>2;j++) + sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]); + xcorr[i] = MAX32(-1, sum); + maxcorr = MAX32(maxcorr, sum); + } + find_best_pitch(xcorr, maxcorr, y_lp4, 0, len>>2, max_pitch>>2, best_pitch); + + /* Finer search with 2x decimation */ + maxcorr=1; + for (i=0;i<max_pitch>>1;i++) + { + celt_word32 sum=0; + xcorr[i] = 0; + if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) + continue; + for (j=0;j<len>>1;j++) + sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); + xcorr[i] = MAX32(-1, sum); + maxcorr = MAX32(maxcorr, sum); + } + find_best_pitch(xcorr, maxcorr, y, shift, len>>1, max_pitch>>1, best_pitch); + + /* Refine by pseudo-interpolation */ + if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1) + { + celt_word32 a, b, c; + a = xcorr[best_pitch[0]-1]; + b = xcorr[best_pitch[0]]; + c = xcorr[best_pitch[0]+1]; + if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a)) + offset = 1; + else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c)) + offset = -1; + else + offset = 0; + } else { + offset = 0; + } + *pitch = 2*best_pitch[0]-offset; + + CELT_COPY(y, y+(N>>1), (lag-N)>>1); + CELT_COPY(y+((lag-N)>>1), x_lp, N>>1); + + /*printf ("%d\n", *pitch);*/ +} diff --git a/libcelt/pitch.h b/libcelt/pitch.h index 129f69a..51cc8f3 100644 --- a/libcelt/pitch.h +++ b/libcelt/pitch.h @@ -51,4 +51,6 @@ void pitch_state_free(kiss_fftr_cfg st); easier to apply psychoacoustic weighting */ void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig *x, const celt_sig *y, const celt_word16 *window, celt_word16 * restrict X, int len, int max_pitch, int *pitch, int _C); +void find_temporal_pitch(const CELTMode *m, const celt_sig * restrict x, celt_word16 * restrict y, int len, int max_pitch, int *pitch, int _C, celt_sig *xmem); + #endif |