Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/alexmarsev/soundtouch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoroparviai <oparviai@f3a24b6a-cf45-0410-b55a-8c22e2698227>2015-02-22 00:24:29 +0300
committeroparviai <oparviai@f3a24b6a-cf45-0410-b55a-8c22e2698227>2015-02-22 00:24:29 +0300
commitbcdafc5733b45ecd20b1cff50cf394c4eddcced4 (patch)
tree10af8ea1f8b3d75b662bcd4f99b745708b664104
parent6296efcb2046deb3ed765c045d6efc69b430fea4 (diff)
Implemented parallel computation using OpenMP pragmas
-rw-r--r--README.html3
-rw-r--r--source/SoundStretch/Makefile.am6
-rw-r--r--source/SoundTouch/BPMDetect.cpp1
-rw-r--r--source/SoundTouch/FIRFilter.cpp61
-rw-r--r--source/SoundTouch/FIRFilter.h4
-rw-r--r--source/SoundTouch/Makefile.am2
-rw-r--r--source/SoundTouch/SoundTouch.vcproj1
-rw-r--r--source/SoundTouch/TDStretch.cpp36
-rw-r--r--source/SoundTouch/sse_optimized.cpp14
9 files changed, 72 insertions, 56 deletions
diff --git a/README.html b/README.html
index ba50408..2ec83fc 100644
--- a/README.html
+++ b/README.html
@@ -505,9 +505,12 @@ and estimates the BPM rate:</p>
<h3>5.1. SoundTouch library Change History </h3>
<p><b>1.8.1pre:</b></p>
<ul>
+ <li>Added parallel computation support via OpenMP primitives for better performance in multicore systems.
+ Benchmarks show processing speedup improvement range from +30% (x86 dual-core) to +180% (ARM quad-core).</li>
<li>Replaced Windows-like 'BOOL' types with native 'bool'</li>
<li>Fixed bug in Android.mk make file</li>
<li>Changed documentation token to "dist_doc_DATA" in Makefile.am file</li>
+ <li>Removed -fcheck-new from gcc switches</li>
</ul>
<p><b>1.8.0:</b></p>
<ul>
diff --git a/source/SoundStretch/Makefile.am b/source/SoundStretch/Makefile.am
index 1d46d29..9033722 100644
--- a/source/SoundStretch/Makefile.am
+++ b/source/SoundStretch/Makefile.am
@@ -44,11 +44,11 @@ soundstretch_SOURCES=main.cpp RunParameters.cpp WavFile.cpp
soundstretch_LDADD=../SoundTouch/libSoundTouch.la -lm
## linker flags.
-# OP 2011-7-17 Linker flags disabled to prevent stripping symbols by default
-# soundstretch_LDFLAGS=-s
+# OP 2011-7-17 Linker flag -s disabled to prevent stripping symbols by default
+#soundstretch_LDFLAGS=-s
## additional compiler flags
-soundstretch_CXXFLAGS=-O3
+soundstretch_CXXFLAGS=-O3 -fopenmp
#clean-local:
# -rm -f additional-files-to-remove-on-make-clean
diff --git a/source/SoundTouch/BPMDetect.cpp b/source/SoundTouch/BPMDetect.cpp
index c36661f..44652ca 100644
--- a/source/SoundTouch/BPMDetect.cpp
+++ b/source/SoundTouch/BPMDetect.cpp
@@ -226,6 +226,7 @@ void BPMDetect::updateXCorr(int process_samples)
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
pBuffer = buffer->ptrBegin();
+ #pragma omp parallel for
for (offs = windowStart; offs < windowLen; offs ++)
{
LONG_SAMPLETYPE sum;
diff --git a/source/SoundTouch/FIRFilter.cpp b/source/SoundTouch/FIRFilter.cpp
index 54e2047..422c0a2 100644
--- a/source/SoundTouch/FIRFilter.cpp
+++ b/source/SoundTouch/FIRFilter.cpp
@@ -61,22 +61,18 @@ FIRFilter::FIRFilter()
length = 0;
lengthDiv8 = 0;
filterCoeffs = NULL;
- sum = NULL;
- sumsize = 0;
}
FIRFilter::~FIRFilter()
{
delete[] filterCoeffs;
- delete[] sum;
}
// Usual C-version of the filter routine for stereo sound
uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
{
- uint i, j, end;
- LONG_SAMPLETYPE suml, sumr;
+ int j, end;
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
// when using floating point samples, use a scaler instead of a divider
// because division is much slower operation than multiplying.
@@ -90,9 +86,12 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
end = 2 * (numSamples - length);
+ #pragma omp parallel for
for (j = 0; j < end; j += 2)
{
const SAMPLETYPE *ptr;
+ LONG_SAMPLETYPE suml, sumr;
+ uint i;
suml = sumr = 0;
ptr = src + j;
@@ -133,28 +132,31 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
// Usual C-version of the filter routine for mono sound
uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
{
- uint i, j, end;
- LONG_SAMPLETYPE sum;
+ int j, end;
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
// when using floating point samples, use a scaler instead of a divider
// because division is much slower operation than multiplying.
double dScaler = 1.0 / (double)resultDivider;
#endif
-
assert(length != 0);
end = numSamples - length;
+ #pragma omp parallel for
for (j = 0; j < end; j ++)
{
+ const SAMPLETYPE *pSrc = src + j;
+ LONG_SAMPLETYPE sum;
+ uint i;
+
sum = 0;
for (i = 0; i < length; i += 4)
{
// loop is unrolled by factor of 4 here for efficiency
- sum += src[i + 0] * filterCoeffs[i + 0] +
- src[i + 1] * filterCoeffs[i + 1] +
- src[i + 2] * filterCoeffs[i + 2] +
- src[i + 3] * filterCoeffs[i + 3];
+ sum += pSrc[i + 0] * filterCoeffs[i + 0] +
+ pSrc[i + 1] * filterCoeffs[i + 1] +
+ pSrc[i + 2] * filterCoeffs[i + 2] +
+ pSrc[i + 3] * filterCoeffs[i + 3];
}
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
sum >>= resultDivFactor;
@@ -164,7 +166,6 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
sum *= dScaler;
#endif // SOUNDTOUCH_INTEGER_SAMPLES
dest[j] = (SAMPLETYPE)sum;
- src ++;
}
return end;
}
@@ -172,15 +173,7 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels)
{
- uint i, j, end, c;
-
- if (sumsize < numChannels)
- {
- // allocate large enough array for keeping sums
- sumsize = numChannels;
- delete[] sum;
- sum = new LONG_SAMPLETYPE[numChannels];
- }
+ int j, end;
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
// when using floating point samples, use a scaler instead of a divider
@@ -192,17 +185,21 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
assert(src != NULL);
assert(dest != NULL);
assert(filterCoeffs != NULL);
+ assert(numChannels < 16);
end = numChannels * (numSamples - length);
- for (c = 0; c < numChannels; c ++)
- {
- sum[c] = 0;
- }
-
+ #pragma omp parallel for
for (j = 0; j < end; j += numChannels)
{
const SAMPLETYPE *ptr;
+ LONG_SAMPLETYPE sums[16];
+ uint c, i;
+
+ for (c = 0; c < numChannels; c ++)
+ {
+ sums[c] = 0;
+ }
ptr = src + j;
@@ -211,7 +208,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
SAMPLETYPE coef=filterCoeffs[i];
for (c = 0; c < numChannels; c ++)
{
- sum[c] += ptr[0] * coef;
+ sums[c] += ptr[0] * coef;
ptr ++;
}
}
@@ -219,13 +216,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
for (c = 0; c < numChannels; c ++)
{
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
- sum[c] >>= resultDivFactor;
+ sums[c] >>= resultDivFactor;
#else
- sum[c] *= dScaler;
+ sums[c] *= dScaler;
#endif // SOUNDTOUCH_INTEGER_SAMPLES
- *dest = (SAMPLETYPE)sum[c];
- dest++;
- sum[c] = 0;
+ dest[j+c] = (SAMPLETYPE)sums[c];
}
}
return numSamples - length;
diff --git a/source/SoundTouch/FIRFilter.h b/source/SoundTouch/FIRFilter.h
index 6c213a6..63ca71a 100644
--- a/source/SoundTouch/FIRFilter.h
+++ b/source/SoundTouch/FIRFilter.h
@@ -65,10 +65,6 @@ protected:
// Memory for filter coefficients
SAMPLETYPE *filterCoeffs;
- // Memory for keeping temporary sums in multichannel processing
- LONG_SAMPLETYPE *sum;
- uint sumsize;
-
virtual uint evaluateFilterStereo(SAMPLETYPE *dest,
const SAMPLETYPE *src,
uint numSamples) const;
diff --git a/source/SoundTouch/Makefile.am b/source/SoundTouch/Makefile.am
index 3644e6a..e458fb5 100644
--- a/source/SoundTouch/Makefile.am
+++ b/source/SoundTouch/Makefile.am
@@ -34,7 +34,7 @@ libSoundTouch_la_SOURCES=AAFilter.cpp FIRFilter.cpp FIFOSampleBuffer.cpp \
InterpolateShannon.cpp
# Compiler flags
-AM_CXXFLAGS=-O3 -fcheck-new -I../../include
+AM_CXXFLAGS=-O3 -fopenmp -I../../include
# Compile the files that need MMX and SSE individually.
libSoundTouch_la_LIBADD=libSoundTouchMMX.la libSoundTouchSSE.la
diff --git a/source/SoundTouch/SoundTouch.vcproj b/source/SoundTouch/SoundTouch.vcproj
index b657a1f..da73b56 100644
--- a/source/SoundTouch/SoundTouch.vcproj
+++ b/source/SoundTouch/SoundTouch.vcproj
@@ -209,6 +209,7 @@
BasicRuntimeChecks="3"
RuntimeLibrary="1"
FloatingPointModel="2"
+ OpenMP="true"
UsePrecompiledHeader="0"
PrecompiledHeaderFile=".\Debug/SoundTouch.pch"
AssemblerListingLocation=".\Debug/"
diff --git a/source/SoundTouch/TDStretch.cpp b/source/SoundTouch/TDStretch.cpp
index c63f38d..28f7035 100644
--- a/source/SoundTouch/TDStretch.cpp
+++ b/source/SoundTouch/TDStretch.cpp
@@ -292,9 +292,9 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
{
int bestOffs;
- double bestCorr, corr;
- double norm;
+ double bestCorr;
int i;
+ double norm;
bestCorr = FLT_MIN;
bestOffs = 0;
@@ -302,14 +302,23 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
// Scans for the best correlation value by testing each possible position
// over the permitted range.
bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
+
+ #pragma omp parallel for
for (i = 1; i < seekLength; i ++)
{
- // Calculates correlation value for the mixing position corresponding
- // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
- // "calcCrossCorr", but saves time by reusing & updating previously stored
+ norm = 0;
+ double corr;
+ // Calculates correlation value for the mixing position corresponding to 'i'
+#ifdef _OPENMP
+ // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
+ // iterate the loop in sequential order
+ corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
+#else
+ // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
+ // as "calcCrossCorr", but saves time by reusing & updating previously stored
// "norm" value
corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
-
+#endif
// heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
@@ -317,8 +326,15 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
// Checks for the highest correlation value
if (corr > bestCorr)
{
- bestCorr = corr;
- bestOffs = i;
+ // For optimal performance, enter critical section only in case that best value found.
+ // in such case repeat 'if' condition as it's possible that parallel execution may have
+ // updated the bestCorr value in the mean time
+ #pragma omp critical
+ if (corr > bestCorr)
+ {
+ bestCorr = corr;
+ bestOffs = i;
+ }
}
}
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
@@ -881,9 +897,10 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
/// Calculate cross-correlation
-double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
+double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) const
{
double corr;
+ double norm;
int i;
corr = norm = 0;
@@ -905,6 +922,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
mixingPos[i + 3] * mixingPos[i + 3];
}
+ anorm = norm;
return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
}
diff --git a/source/SoundTouch/sse_optimized.cpp b/source/SoundTouch/sse_optimized.cpp
index 672e58f..d1c728e 100644
--- a/source/SoundTouch/sse_optimized.cpp
+++ b/source/SoundTouch/sse_optimized.cpp
@@ -71,7 +71,7 @@ using namespace soundtouch;
#include <math.h>
// Calculates cross correlation of two buffers
-double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &norm) const
+double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &anorm) const
{
int i;
const float *pVec1;
@@ -141,7 +141,8 @@ double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, double &n
// return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
float *pvNorm = (float*)&vNorm;
- norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+ float norm = (pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+ anorm = norm;
float *pvSum = (float*)&vSum;
return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / sqrt(norm < 1e-9 ? 1.0 : norm);
@@ -258,14 +259,17 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
assert(((ulongptr)filterCoeffsAlign) % 16 == 0);
// filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
+ #pragma omp parallel for
for (j = 0; j < count; j += 2)
{
const float *pSrc;
+ float *pDest;
const __m128 *pFil;
__m128 sum1, sum2;
uint i;
- pSrc = (const float*)source; // source audio data
+ pSrc = (const float*)source + j * 2; // source audio data
+ pDest = dest + j * 2; // destination audio data
pFil = (const __m128*)filterCoeffsAlign; // filter coefficients. NOTE: Assumes coefficients
// are aligned to 16-byte boundary
sum1 = sum2 = _mm_setzero_ps();
@@ -298,12 +302,10 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
// to sum the two hi- and lo-floats of these registers together.
// post-shuffle & add the filtered values and store to dest.
- _mm_storeu_ps(dest, _mm_add_ps(
+ _mm_storeu_ps(pDest, _mm_add_ps(
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)), // s2_1 s2_0 s1_3 s1_2
_mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0)) // s2_3 s2_2 s1_1 s1_0
));
- source += 4;
- dest += 4;
}
// Ideas for further improvement: