source/SoundTouch/BPMDetect.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371

////////////////////////////////////////////////////////////////////////////////
///
/// Beats-per-minute (BPM) detection routine.
///
/// The beat detection algorithm works as follows:
/// - Use function 'inputSamples' to input a chunks of samples to the class for
///   analysis. It's a good idea to enter a large sound file or stream in smallish
///   chunks of around few kilosamples in order not to extinguish too much RAM memory.
/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
///   which is basically ok as low (bass) frequencies mostly determine the beat rate.
///   Simple averaging is used for anti-alias filtering because the resulting signal
///   quality isn't of that high importance.
/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
///   taking absolute value that's smoothed by sliding average. Signal levels that
///   are below a couple of times the general RMS amplitude level are cut away to
///   leave only notable peaks there.
/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term 
///   autocorrelation function of the enveloped signal.
/// - After whole sound data file has been analyzed as above, the bpm level is 
///   detected by function 'getBpm' that finds the highest peak of the autocorrelation 
///   function, calculates it's precise location and converts this reading to bpm's.
///
/// Author        : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed  : $Date$
// File revision : $Revision: 4 $
//
// $Id$
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
//  SoundTouch audio processing library
//  Copyright (c) Olli Parviainen
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public
//  License along with this library; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
////////////////////////////////////////////////////////////////////////////////

#include <math.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include "FIFOSampleBuffer.h"
#include "PeakFinder.h"
#include "BPMDetect.h"

using namespace soundtouch;

#define INPUT_BLOCK_SAMPLES       2048
#define DECIMATED_BLOCK_SAMPLES   256

/// decay constant for calculating RMS volume sliding average approximation 
/// (time constant is about 10 sec)
const float avgdecay = 0.99986f;

/// Normalization coefficient for calculating RMS sliding average approximation.
const float avgnorm = (1 - avgdecay);


////////////////////////////////////////////////////////////////////////////////

// Enable following define to create bpm analysis file:

// #define _CREATE_BPM_DEBUG_FILE

#ifdef _CREATE_BPM_DEBUG_FILE

    #define DEBUGFILE_NAME  "c:\\temp\\soundtouch-bpm-debug.txt"

    static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
    {
        FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
        int i;

        if (fptr)
        {
            printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
            for (i = minpos; i < maxpos; i ++)
            {
                fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
            }
            fclose(fptr);
        }
    }
#else
    #define _SaveDebugData(a,b,c,d)
#endif

////////////////////////////////////////////////////////////////////////////////


BPMDetect::BPMDetect(int numChannels, int aSampleRate)
{
    this->sampleRate = aSampleRate;
    this->channels = numChannels;

    decimateSum = 0;
    decimateCount = 0;

    envelopeAccu = 0;

    // Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's
    // safe initial RMS signal level value for song data. This value is then adapted
    // to the actual level during processing.
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
    // integer samples
    RMSVolumeAccu = (1500 * 1500) / avgnorm;
#else
    // float samples, scaled to range [-1..+1[
    RMSVolumeAccu = (0.045f * 0.045f) / avgnorm;
#endif

    // choose decimation factor so that result is approx. 1000 Hz
    decimateBy = sampleRate / 1000;
    assert(decimateBy > 0);
    assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);

    // Calculate window length & starting item according to desired min & max bpms
    windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);

    assert(windowLen > windowStart);

    // allocate new working objects
    xcorr = new float[windowLen];
    memset(xcorr, 0, windowLen * sizeof(float));

    // allocate processing buffer
    buffer = new FIFOSampleBuffer();
    // we do processing in mono mode
    buffer->setChannels(1);
    buffer->clear();
}


BPMDetect::~BPMDetect()
{
    delete[] xcorr;
    delete buffer;
}


/// convert to mono, low-pass filter & decimate to about 500 Hz. 
/// return number of outputted samples.
///
/// Decimation is used to remove the unnecessary frequencies and thus to reduce 
/// the amount of data needed to be processed as calculating autocorrelation 
/// function is a very-very heavy operation.
///
/// Anti-alias filtering is done simply by averaging the samples. This is really a 
/// poor-man's anti-alias filtering, but it's not so critical in this kind of application
/// (it'd also be difficult to design a high-quality filter with steep cut-off at very 
/// narrow band)
int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
{
    int count, outcount;
    LONG_SAMPLETYPE out;

    assert(channels > 0);
    assert(decimateBy > 0);
    outcount = 0;
    for (count = 0; count < numsamples; count ++) 
    {
        int j;

        // convert to mono and accumulate
        for (j = 0; j < channels; j ++)
        {
            decimateSum += src[j];
        }
        src += j;

        decimateCount ++;
        if (decimateCount >= decimateBy) 
        {
            // Store every Nth sample only
            out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
            decimateSum = 0;
            decimateCount = 0;
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
            // check ranges for sure (shouldn't actually be necessary)
            if (out > 32767) 
            {
                out = 32767;
            } 
            else if (out < -32768) 
            {
                out = -32768;
            }
#endif // SOUNDTOUCH_INTEGER_SAMPLES
            dest[outcount] = (SAMPLETYPE)out;
            outcount ++;
        }
    }
    return outcount;
}


// Calculates autocorrelation function of the sample history buffer
void BPMDetect::updateXCorr(int process_samples)
{
    int offs;
    SAMPLETYPE *pBuffer;
    
    assert(buffer->numSamples() >= (uint)(process_samples + windowLen));

    pBuffer = buffer->ptrBegin();
    #pragma omp parallel for
    for (offs = windowStart; offs < windowLen; offs ++) 
    {
        LONG_SAMPLETYPE sum;
        int i;

        sum = 0;
        for (i = 0; i < process_samples; i ++) 
        {
            sum += pBuffer[i] * pBuffer[i + offs];    // scaling the sub-result shouldn't be necessary
        }
//        xcorr[offs] *= xcorr_decay;   // decay 'xcorr' here with suitable coefficients 
                                        // if it's desired that the system adapts automatically to
                                        // various bpms, e.g. in processing continouos music stream.
                                        // The 'xcorr_decay' should be a value that's smaller than but 
                                        // close to one, and should also depend on 'process_samples' value.

        xcorr[offs] += (float)sum;
    }
}


// Calculates envelope of the sample data
void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples) 
{
    const static double decay = 0.7f;               // decay constant for smoothing the envelope
    const static double norm = (1 - decay);

    int i;
    LONG_SAMPLETYPE out;
    double val;

    for (i = 0; i < numsamples; i ++) 
    {
        // calc average RMS volume
        RMSVolumeAccu *= avgdecay;
        val = (float)fabs((float)samples[i]);
        RMSVolumeAccu += val * val;

        // cut amplitudes that are below cutoff ~2 times RMS volume
        // (we're interested in peak values, not the silent moments)
        if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm))
        {
            val = 0;
        }

        // smooth amplitude envelope
        envelopeAccu *= decay;
        envelopeAccu += val;
        out = (LONG_SAMPLETYPE)(envelopeAccu * norm);

#ifdef SOUNDTOUCH_INTEGER_SAMPLES
        // cut peaks (shouldn't be necessary though)
        if (out > 32767) out = 32767;
#endif // SOUNDTOUCH_INTEGER_SAMPLES
        samples[i] = (SAMPLETYPE)out;
    }
}


void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
{
    SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];

    // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
    while (numSamples > 0)
    {
        int block;
        int decSamples;

        block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;

        // decimate. note that converts to mono at the same time
        decSamples = decimate(decimated, samples, block);
        samples += block * channels;
        numSamples -= block;

        // envelope new samples and add them to buffer
        calcEnvelope(decimated, decSamples);
        buffer->putSamples(decimated, decSamples);
    }

    // when the buffer has enought samples for processing...
    if ((int)buffer->numSamples() > windowLen) 
    {
        int processLength;

        // how many samples are processed
        processLength = (int)buffer->numSamples() - windowLen;

        // ... calculate autocorrelations for oldest samples...
        updateXCorr(processLength);
        // ... and remove them from the buffer
        buffer->receiveSamples(processLength);
    }
}


void BPMDetect::removeBias()
{
    int i;
    float minval = 1e12f;   // arbitrary large number

    for (i = windowStart; i < windowLen; i ++)
    {
        if (xcorr[i] < minval)
        {
            minval = xcorr[i];
        }
    }

    for (i = windowStart; i < windowLen; i ++)
    {
        xcorr[i] -= minval;
    }
}


float BPMDetect::getBpm()
{
    double peakPos;
    double coeff;
    PeakFinder peakFinder;

    coeff = 60.0 * ((double)sampleRate / (double)decimateBy);

    // save bpm debug analysis data if debug data enabled
    _SaveDebugData(xcorr, windowStart, windowLen, coeff);

    // remove bias from xcorr data
    removeBias();

    // find peak position
    peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);

    assert(decimateBy != 0);
    if (peakPos < 1e-9) return 0.0; // detection failed.

    // calculate BPM
    return (float) (coeff / peakPos);
}