Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mpc-hc/mpc-hc.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorXhmikosR <xhmikosr@users.sourceforge.net>2012-04-10 20:08:12 +0400
committerXhmikosR <xhmikosr@users.sourceforge.net>2012-04-10 20:08:12 +0400
commit6f2f7270dd822670597917d62244d9117ead15cf (patch)
treee3ec6d5d82118d434e804bd380bd735338a3ec06 /src
parentaa7ed919e600b8c44b2fdbb0919dcea357d99837 (diff)
update SoundTouch to v1.7.0pre r142
git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@4331 10f7b99b-c216-0410-bff0-8a66a9350fd8
Diffstat (limited to 'src')
-rw-r--r--src/thirdparty/SoundTouch/include/BPMDetect.h2
-rw-r--r--src/thirdparty/SoundTouch/include/STTypes.h2
-rw-r--r--src/thirdparty/SoundTouch/include/SoundTouch.h4
-rw-r--r--src/thirdparty/SoundTouch/source/FIFOSampleBuffer.cpp3
-rw-r--r--src/thirdparty/SoundTouch/source/FIRFilter.cpp7
-rw-r--r--src/thirdparty/SoundTouch/source/PeakFinder.cpp64
-rw-r--r--src/thirdparty/SoundTouch/source/PeakFinder.h4
-rw-r--r--src/thirdparty/SoundTouch/source/RateTransposer.cpp6
-rw-r--r--src/thirdparty/SoundTouch/source/SoundTouch.cpp7
-rw-r--r--src/thirdparty/SoundTouch/source/SoundTouch.vcxproj2
-rw-r--r--src/thirdparty/SoundTouch/source/SoundTouch.vcxproj.filters2
-rw-r--r--src/thirdparty/SoundTouch/source/TDStretch.cpp340
-rw-r--r--src/thirdparty/SoundTouch/source/TDStretch.h43
-rw-r--r--src/thirdparty/SoundTouch/source/cpu_detect_x86.cpp139
-rw-r--r--src/thirdparty/SoundTouch/source/cpu_detect_x86_win.cpp137
-rw-r--r--src/thirdparty/SoundTouch/source/mmx_optimized.cpp11
-rw-r--r--src/thirdparty/SoundTouch/source/sse_optimized.cpp79
17 files changed, 301 insertions, 551 deletions
diff --git a/src/thirdparty/SoundTouch/include/BPMDetect.h b/src/thirdparty/SoundTouch/include/BPMDetect.h
index ff1d3c44f..c5989cfc8 100644
--- a/src/thirdparty/SoundTouch/include/BPMDetect.h
+++ b/src/thirdparty/SoundTouch/include/BPMDetect.h
@@ -67,7 +67,7 @@ namespace soundtouch
#define MIN_BPM 29
/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit.
-#define MAX_BPM 230
+#define MAX_BPM 200
/// Class for calculating BPM rate for audio data.
diff --git a/src/thirdparty/SoundTouch/include/STTypes.h b/src/thirdparty/SoundTouch/include/STTypes.h
index 65772efcd..248683e7d 100644
--- a/src/thirdparty/SoundTouch/include/STTypes.h
+++ b/src/thirdparty/SoundTouch/include/STTypes.h
@@ -89,7 +89,7 @@ namespace soundtouch
#endif
- #ifndef _WIN64 // MPC-HC custom code: disable MMX for x64
+ #ifndef _M_X64 // MPC-HC custom code: disable optimizations for x64; it fails when linking
/// Define this to allow X86-specific assembler/intrinsic optimizations.
/// Notice that library contains also usual C++ versions of each of these
/// these routines, so if you're having difficulties getting the optimized
diff --git a/src/thirdparty/SoundTouch/include/SoundTouch.h b/src/thirdparty/SoundTouch/include/SoundTouch.h
index 164de19f5..3e4dbb89d 100644
--- a/src/thirdparty/SoundTouch/include/SoundTouch.h
+++ b/src/thirdparty/SoundTouch/include/SoundTouch.h
@@ -79,10 +79,10 @@ namespace soundtouch
{
/// Soundtouch library version string
-#define SOUNDTOUCH_VERSION "1.6.1pre"
+#define SOUNDTOUCH_VERSION "1.7.0"
/// SoundTouch library version id
-#define SOUNDTOUCH_VERSION_ID (10601)
+#define SOUNDTOUCH_VERSION_ID (10700)
//
// Available setting IDs for the 'setSetting' & 'get_setting' functions:
diff --git a/src/thirdparty/SoundTouch/source/FIFOSampleBuffer.cpp b/src/thirdparty/SoundTouch/source/FIFOSampleBuffer.cpp
index 8393f7b0d..f9efee60e 100644
--- a/src/thirdparty/SoundTouch/source/FIFOSampleBuffer.cpp
+++ b/src/thirdparty/SoundTouch/source/FIFOSampleBuffer.cpp
@@ -47,7 +47,6 @@
#include <memory.h>
#include <string.h>
#include <assert.h>
-#include <stdexcept>
#include "FIFOSampleBuffer.h"
@@ -175,7 +174,7 @@ void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement)
tempUnaligned = new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE) + 16 / sizeof(SAMPLETYPE)];
if (tempUnaligned == NULL)
{
- throw std::runtime_error("Couldn't allocate memory!\n");
+ ST_THROW_RT_ERROR("Couldn't allocate memory!\n");
}
// Align the buffer to begin at 16byte cache line boundary for optimal performance
temp = (SAMPLETYPE *)(((ulong)tempUnaligned + 15) & (ulong)-16);
diff --git a/src/thirdparty/SoundTouch/source/FIRFilter.cpp b/src/thirdparty/SoundTouch/source/FIRFilter.cpp
index a2745625c..f882f7efd 100644
--- a/src/thirdparty/SoundTouch/source/FIRFilter.cpp
+++ b/src/thirdparty/SoundTouch/source/FIRFilter.cpp
@@ -43,7 +43,6 @@
#include <assert.h>
#include <math.h>
#include <stdlib.h>
-#include <stdexcept>
#include "FIRFilter.h"
#include "cpu_detect.h"
@@ -174,7 +173,7 @@ uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint
void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
{
assert(newLength > 0);
- if (newLength % 8) throw std::runtime_error("FIR filter length not divisible by 8");
+ if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
lengthDiv8 = newLength / 8;
length = lengthDiv8 * 8;
@@ -222,8 +221,8 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample
void * FIRFilter::operator new(size_t s)
{
// Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
- throw std::runtime_error("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
- return NULL;
+ ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
+ return newInstance();
}
diff --git a/src/thirdparty/SoundTouch/source/PeakFinder.cpp b/src/thirdparty/SoundTouch/source/PeakFinder.cpp
index 9ad601cd9..b122612d2 100644
--- a/src/thirdparty/SoundTouch/source/PeakFinder.cpp
+++ b/src/thirdparty/SoundTouch/source/PeakFinder.cpp
@@ -55,15 +55,46 @@ PeakFinder::PeakFinder()
}
+// Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'.
+int PeakFinder::findTop(const float *data, int peakpos) const
+{
+ int i;
+ int start, end;
+ float refvalue;
+
+ refvalue = data[peakpos];
+
+ // seek within �10 points
+ start = peakpos - 10;
+ if (start < minPos) start = minPos;
+ end = peakpos + 10;
+ if (end > maxPos) end = maxPos;
+
+ for (i = start; i <= end; i ++)
+ {
+ if (data[i] > refvalue)
+ {
+ peakpos = i;
+ refvalue = data[i];
+ }
+ }
+
+ // failure if max value is at edges of seek range => it's not peak, it's at slope.
+ if ((peakpos == start) || (peakpos == end)) return 0;
+
+ return peakpos;
+}
+
+
// Finds 'ground level' of a peak hump by starting from 'peakpos' and proceeding
// to direction defined by 'direction' until next 'hump' after minimum value will
// begin
int PeakFinder::findGround(const float *data, int peakpos, int direction) const
{
- float refvalue;
int lowpos;
int pos;
int climb_count;
+ float refvalue;
float delta;
climb_count = 0;
@@ -210,30 +241,41 @@ double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos)
// Now check if the highest peak were in fact harmonic of the true base beat peak
// - sometimes the highest peak can be Nth harmonic of the true base peak yet
// just a slightly higher than the true base
- for (i = 2; i < 10; i ++)
+
+ int hp = (int)(highPeak + 0.5);
+
+ for (i = 3; i < 10; i ++)
{
- double peaktmp, tmp;
+ double peaktmp, harmonic;
int i1,i2;
- peakpos = (int)(highPeak / (double)i + 0.5f);
+ harmonic = (double)i * 0.5;
+ peakpos = (int)(highPeak / harmonic + 0.5f);
if (peakpos < minPos) break;
+ peakpos = findTop(data, peakpos); // seek true local maximum index
+ if (peakpos == 0) continue; // no local max here
- // calculate mass-center of possible base peak
+ // calculate mass-center of possible harmonic peak
peaktmp = getPeakCenter(data, peakpos);
+ // accept harmonic peak if
+ // (a) it is found
+ // (b) is within �4% of the expected harmonic interval
+ // (c) has at least half x-corr value of the max. peak
+
+ double diff = harmonic * peaktmp / highPeak;
+ if ((diff < 0.96) || (diff > 1.04)) continue; // peak too afar from expected
+
// now compare to highest detected peak
i1 = (int)(highPeak + 0.5);
i2 = (int)(peaktmp + 0.5);
- tmp = 2 * (data[i2] - data[i1]) / (data[i2] + data[i1]);
- if (fabs(tmp) < 0.1)
+ if (data[i2] >= 0.5*data[i1])
{
- // The highest peak is harmonic of almost as high base peak,
- // thus use the base peak instead
+ // The harmonic is at least half as high primary peak,
+ // thus use the harmonic peak instead
peak = peaktmp;
}
}
return peak;
}
-
-
diff --git a/src/thirdparty/SoundTouch/source/PeakFinder.h b/src/thirdparty/SoundTouch/source/PeakFinder.h
index a72b24f28..fce4dc1be 100644
--- a/src/thirdparty/SoundTouch/source/PeakFinder.h
+++ b/src/thirdparty/SoundTouch/source/PeakFinder.h
@@ -63,6 +63,10 @@ protected:
int direction /// Direction where to proceed from the peak: 1 = right, -1 = left.
) const;
+ // Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'.
+ int findTop(const float *data, int peakpos) const;
+
+
/// Finds the 'ground' level, i.e. smallest level between two neighbouring peaks, to right-
/// or left-hand side of the given peak position.
int findGround(const float *data, /// Data vector.
diff --git a/src/thirdparty/SoundTouch/source/RateTransposer.cpp b/src/thirdparty/SoundTouch/source/RateTransposer.cpp
index 2afc18750..772e95b21 100644
--- a/src/thirdparty/SoundTouch/source/RateTransposer.cpp
+++ b/src/thirdparty/SoundTouch/source/RateTransposer.cpp
@@ -42,11 +42,9 @@
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
-#include <stdexcept>
#include "RateTransposer.h"
#include "AAFilter.h"
-using namespace std;
using namespace soundtouch;
@@ -108,8 +106,8 @@ public:
// depending on if we've a MMX/SSE/etc-capable CPU available or not.
void * RateTransposer::operator new(size_t s)
{
- throw runtime_error("Error in RateTransoser::new: don't use \"new TDStretch\" directly, use \"newInstance\" to create a new instance instead!");
- return NULL;
+ ST_THROW_RT_ERROR("Error in RateTransoser::new: don't use \"new TDStretch\" directly, use \"newInstance\" to create a new instance instead!");
+ return newInstance();
}
diff --git a/src/thirdparty/SoundTouch/source/SoundTouch.cpp b/src/thirdparty/SoundTouch/source/SoundTouch.cpp
index 6f7b9a894..b67dd2c89 100644
--- a/src/thirdparty/SoundTouch/source/SoundTouch.cpp
+++ b/src/thirdparty/SoundTouch/source/SoundTouch.cpp
@@ -73,7 +73,6 @@
#include <stdlib.h>
#include <memory.h>
#include <math.h>
-#include <stdexcept>
#include <stdio.h>
#include "SoundTouch.h"
@@ -146,7 +145,7 @@ void SoundTouch::setChannels(uint numChannels)
{
if (numChannels != 1 && numChannels != 2)
{
- throw std::runtime_error("Illegal number of channels");
+ ST_THROW_RT_ERROR("Illegal number of channels");
}
channels = numChannels;
pRateTransposer->setChannels((int)numChannels);
@@ -295,11 +294,11 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
{
if (bSrateSet == FALSE)
{
- throw std::runtime_error("SoundTouch : Sample rate not defined");
+ ST_THROW_RT_ERROR("SoundTouch : Sample rate not defined");
}
else if (channels == 0)
{
- throw std::runtime_error("SoundTouch : Number of channels not defined");
+ ST_THROW_RT_ERROR("SoundTouch : Number of channels not defined");
}
// Transpose the rate of the new samples if necessary
diff --git a/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj b/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj
index 7e9bc2cac..fda474482 100644
--- a/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj
+++ b/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj
@@ -116,7 +116,7 @@
<ItemGroup>
<ClCompile Include="AAFilter.cpp" />
<ClCompile Include="BPMDetect.cpp" />
- <ClCompile Include="cpu_detect_x86_win.cpp" />
+ <ClCompile Include="cpu_detect_x86.cpp" />
<ClCompile Include="FIFOSampleBuffer.cpp" />
<ClCompile Include="FIRFilter.cpp" />
<ClCompile Include="mmx_optimized.cpp" />
diff --git a/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj.filters b/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj.filters
index 347a988b8..66c309a4e 100644
--- a/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj.filters
+++ b/src/thirdparty/SoundTouch/source/SoundTouch.vcxproj.filters
@@ -58,7 +58,7 @@
<ClCompile Include="AAFilter.cpp">
<Filter>Source Files</Filter>
</ClCompile>
- <ClCompile Include="cpu_detect_x86_win.cpp">
+ <ClCompile Include="cpu_detect_x86.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="FIFOSampleBuffer.cpp">
diff --git a/src/thirdparty/SoundTouch/source/TDStretch.cpp b/src/thirdparty/SoundTouch/source/TDStretch.cpp
index 3ef7798a8..54aee423c 100644
--- a/src/thirdparty/SoundTouch/source/TDStretch.cpp
+++ b/src/thirdparty/SoundTouch/source/TDStretch.cpp
@@ -46,7 +46,6 @@
#include <assert.h>
#include <math.h>
#include <float.h>
-#include <stdexcept>
#include "STTypes.h"
#include "cpu_detect.h"
@@ -91,7 +90,7 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
channels = 2;
pMidBuffer = NULL;
- pRefMidBufferUnaligned = NULL;
+ pMidBufferUnaligned = NULL;
overlapLength = 0;
bAutoSeqSetting = TRUE;
@@ -111,8 +110,7 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
TDStretch::~TDStretch()
{
- delete[] pMidBuffer;
- delete[] pRefMidBufferUnaligned;
+ delete[] pMidBufferUnaligned;
}
@@ -196,12 +194,17 @@ void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWind
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
{
- int i, itemp;
+ int i;
+ SAMPLETYPE m1, m2;
+
+ m1 = (SAMPLETYPE)0;
+ m2 = (SAMPLETYPE)overlapLength;
for (i = 0; i < overlapLength ; i ++)
{
- itemp = overlapLength - i;
- pOutput[i] = (pInput[i] * i + pMidBuffer[i] * itemp ) / overlapLength; // >> overlapDividerBits;
+ pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
+ m1 += 1;
+ m2 -= 1;
}
}
@@ -247,35 +250,17 @@ BOOL TDStretch::isQuickSeekEnabled() const
// Seeks for the optimal overlap-mixing position.
int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
{
- if (channels == 2)
+ if (bQuickSeek)
{
- // stereo sound
- if (bQuickSeek)
- {
- return seekBestOverlapPositionStereoQuick(refPos);
- }
- else
- {
- return seekBestOverlapPositionStereo(refPos);
- }
+ return seekBestOverlapPositionQuick(refPos);
}
else
{
- // mono sound
- if (bQuickSeek)
- {
- return seekBestOverlapPositionMonoQuick(refPos);
- }
- else
- {
- return seekBestOverlapPositionMono(refPos);
- }
+ return seekBestOverlapPositionFull(refPos);
}
}
-
-
// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
// of 'ovlPos'.
inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
@@ -292,22 +277,18 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
-
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
// routine
//
// The best position is determined as the position where the two overlapped
// sample sequences are 'most alike', in terms of the highest cross-correlation
// value over the overlapping period
-int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
+int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
{
int bestOffs;
double bestCorr, corr;
int i;
- // Slopes the amplitudes of the 'midBuffer' samples
- precalcCorrReferenceStereo();
-
bestCorr = FLT_MIN;
bestOffs = 0;
@@ -317,7 +298,7 @@ int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
{
// Calculates correlation value for the mixing position corresponding
// to 'i'
- corr = (double)calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
+ corr = calcCrossCorr(refPos + channels * i, pMidBuffer);
// heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
@@ -342,16 +323,13 @@ int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
// The best position is determined as the position where the two overlapped
// sample sequences are 'most alike', in terms of the highest cross-correlation
// value over the overlapping period
-int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
+int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
{
int j;
int bestOffs;
double bestCorr, corr;
int scanCount, corrOffset, tempOffset;
- // Slopes the amplitude of the 'midBuffer' samples
- precalcCorrReferenceStereo();
-
bestCorr = FLT_MIN;
bestOffs = _scanOffsets[0][0];
corrOffset = 0;
@@ -373,7 +351,7 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
// Calculates correlation value for the mixing position corresponding
// to 'tempOffset'
- corr = (double)calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
+ corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer);
// heuristic rule to slightly favour values close to mid of the range
double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
@@ -396,111 +374,6 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
-// Seeks for the optimal overlap-mixing position. The 'mono' version of the
-// routine
-//
-// The best position is determined as the position where the two overlapped
-// sample sequences are 'most alike', in terms of the highest cross-correlation
-// value over the overlapping period
-int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos)
-{
- int bestOffs;
- double bestCorr, corr;
- int tempOffset;
- const SAMPLETYPE *compare;
-
- // Slopes the amplitude of the 'midBuffer' samples
- precalcCorrReferenceMono();
-
- bestCorr = FLT_MIN;
- bestOffs = 0;
-
- // Scans for the best correlation value by testing each possible position
- // over the permitted range.
- for (tempOffset = 0; tempOffset < seekLength; tempOffset ++)
- {
- compare = refPos + tempOffset;
-
- // Calculates correlation value for the mixing position corresponding
- // to 'tempOffset'
- corr = (double)calcCrossCorrMono(pRefMidBuffer, compare);
- // heuristic rule to slightly favour values close to mid of the range
- double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
- corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
-
- // Checks for the highest correlation value
- if (corr > bestCorr)
- {
- bestCorr = corr;
- bestOffs = tempOffset;
- }
- }
- // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
- clearCrossCorrState();
-
- return bestOffs;
-}
-
-
-// Seeks for the optimal overlap-mixing position. The 'mono' version of the
-// routine
-//
-// The best position is determined as the position where the two overlapped
-// sample sequences are 'most alike', in terms of the highest cross-correlation
-// value over the overlapping period
-int TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos)
-{
- int j;
- int bestOffs;
- double bestCorr, corr;
- int scanCount, corrOffset, tempOffset;
-
- // Slopes the amplitude of the 'midBuffer' samples
- precalcCorrReferenceMono();
-
- bestCorr = FLT_MIN;
- bestOffs = _scanOffsets[0][0];
- corrOffset = 0;
- tempOffset = 0;
-
- // Scans for the best correlation value using four-pass hierarchical search.
- //
- // The look-up table 'scans' has hierarchical position adjusting steps.
- // In first pass the routine searhes for the highest correlation with
- // relatively coarse steps, then rescans the neighbourhood of the highest
- // correlation with better resolution and so on.
- for (scanCount = 0;scanCount < 4; scanCount ++)
- {
- j = 0;
- while (_scanOffsets[scanCount][j])
- {
- tempOffset = corrOffset + _scanOffsets[scanCount][j];
- if (tempOffset >= seekLength) break;
-
- // Calculates correlation value for the mixing position corresponding
- // to 'tempOffset'
- corr = (double)calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
- // heuristic rule to slightly favour values close to mid of the range
- double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
- corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
-
- // Checks for the highest correlation value
- if (corr > bestCorr)
- {
- bestCorr = corr;
- bestOffs = tempOffset;
- }
- j ++;
- }
- corrOffset = bestOffs;
- }
- // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
- clearCrossCorrState();
-
- return bestOffs;
-}
-
-
/// clear cross correlation routine state if necessary
void TDStretch::clearCrossCorrState()
{
@@ -713,15 +586,13 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
if (overlapLength > prevOvl)
{
- delete[] pMidBuffer;
- delete[] pRefMidBufferUnaligned;
+ delete[] pMidBufferUnaligned;
- pMidBuffer = new SAMPLETYPE[overlapLength * 2];
- clearMidBuffer();
+ pMidBufferUnaligned = new SAMPLETYPE[overlapLength * 2 + 16 / sizeof(SAMPLETYPE)];
+ // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
+ pMidBuffer = (SAMPLETYPE *)((((ulong)pMidBufferUnaligned) + 15) & (ulong)-16);
- pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];
- // ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency
- pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & (ulong)-16);
+ clearMidBuffer();
}
}
@@ -731,8 +602,8 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
void * TDStretch::operator new(size_t s)
{
// Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
- throw std::runtime_error("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
- return NULL;
+ ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
+ return newInstance();
}
@@ -778,43 +649,6 @@ TDStretch * TDStretch::newInstance()
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
-// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
-// is faster to calculate
-void TDStretch::precalcCorrReferenceStereo()
-{
- int i, cnt2;
- int temp, temp2;
-
- for (i=0 ; i < (int)overlapLength ;i ++)
- {
- temp = i * (overlapLength - i);
- cnt2 = i * 2;
-
- temp2 = (pMidBuffer[cnt2] * temp) / slopingDivider;
- pRefMidBuffer[cnt2] = (short)(temp2);
- temp2 = (pMidBuffer[cnt2 + 1] * temp) / slopingDivider;
- pRefMidBuffer[cnt2 + 1] = (short)(temp2);
- }
-}
-
-
-// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
-// is faster to calculate
-void TDStretch::precalcCorrReferenceMono()
-{
- int i;
- long temp;
- long temp2;
-
- for (i=0 ; i < (int)overlapLength ;i ++)
- {
- temp = i * (overlapLength - i);
- temp2 = (pMidBuffer[i] * temp) / slopingDivider;
- pRefMidBuffer[i] = (short)temp2;
- }
-}
-
-
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
// version of the routine.
void TDStretch::overlapStereo(short *poutput, const short *input) const
@@ -865,44 +699,32 @@ void TDStretch::calculateOverlapLength(int aoverlapMs)
}
-long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const
+double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) const
{
long corr;
long norm;
int i;
corr = norm = 0;
- for (i = 1; i < overlapLength; i ++)
+ // Same routine for stereo and mono. For stereo, unroll loop for better
+ // efficiency and gives slightly better resolution against rounding.
+ // For mono it same routine, just unrolls loop by factor of 4
+ for (i = 0; i < channels * overlapLength; i += 4)
{
- corr += (mixingPos[i] * compare[i]) >> overlapDividerBits;
- norm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
+ corr += (mixingPos[i] * compare[i] +
+ mixingPos[i + 1] * compare[i + 1] +
+ mixingPos[i + 2] * compare[i + 2] +
+ mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
+ norm += (mixingPos[i] * mixingPos[i] +
+ mixingPos[i + 1] * mixingPos[i + 1] +
+ mixingPos[i + 2] * mixingPos[i + 2] +
+ mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
}
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
if (norm == 0) norm = 1; // to avoid div by zero
- return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
-}
-
-
-long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const
-{
- long corr;
- long norm;
- int i;
-
- corr = norm = 0;
- for (i = 2; i < 2 * overlapLength; i += 2)
- {
- corr += (mixingPos[i] * compare[i] +
- mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;
- norm += (mixingPos[i] * mixingPos[i] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits;
- }
-
- // Normalize result by dividing by sqrt(norm) - this step is easiest
- // done using floating point operation
- if (norm == 0) norm = 1; // to avoid div by zero
- return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
+ return (double)corr / sqrt((double)norm);
}
#endif // SOUNDTOUCH_INTEGER_SAMPLES
@@ -914,57 +736,26 @@ long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
-
-// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
-// is faster to calculate
-void TDStretch::precalcCorrReferenceStereo()
-{
- int i, cnt2;
- float temp;
-
- for (i=0 ; i < (int)overlapLength ;i ++)
- {
- temp = (float)i * (float)(overlapLength - i);
- cnt2 = i * 2;
- pRefMidBuffer[cnt2] = (float)(pMidBuffer[cnt2] * temp);
- pRefMidBuffer[cnt2 + 1] = (float)(pMidBuffer[cnt2 + 1] * temp);
- }
-}
-
-
-// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
-// is faster to calculate
-void TDStretch::precalcCorrReferenceMono()
-{
- int i;
- float temp;
-
- for (i=0 ; i < (int)overlapLength ;i ++)
- {
- temp = (float)i * (float)(overlapLength - i);
- pRefMidBuffer[i] = (float)(pMidBuffer[i] * temp);
- }
-}
-
-
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
{
int i;
- int cnt2;
- float fTemp;
float fScale;
- float fi;
+ float f1;
+ float f2;
fScale = 1.0f / (float)overlapLength;
- for (i = 0; i < (int)overlapLength ; i ++)
+ f1 = 0;
+ f2 = 1.0f;
+
+ for (i = 0; i < 2 * (int)overlapLength ; i += 2)
{
- fTemp = (float)(overlapLength - i) * fScale;
- fi = (float)i * fScale;
- cnt2 = 2 * i;
- pOutput[cnt2 + 0] = pInput[cnt2 + 0] * fi + pMidBuffer[cnt2 + 0] * fTemp;
- pOutput[cnt2 + 1] = pInput[cnt2 + 1] * fi + pMidBuffer[cnt2 + 1] * fTemp;
+ pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
+ pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
+
+ f1 += fScale;
+ f2 -= fScale;
}
}
@@ -985,38 +776,29 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
}
-
-double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const
+double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) const
{
double corr;
double norm;
int i;
corr = norm = 0;
- for (i = 1; i < overlapLength; i ++)
- {
- corr += mixingPos[i] * compare[i];
- norm += mixingPos[i] * mixingPos[i];
- }
-
- if (norm < 1e-9) norm = 1.0; // to avoid div by zero
- return corr / sqrt(norm);
-}
-
-
-double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const
-{
- double corr;
- double norm;
- int i;
-
- corr = norm = 0;
- for (i = 2; i < 2 * overlapLength; i += 2)
+ // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
+ // For mono it's same routine yet unrollsd by factor of 4.
+ for (i = 0; i < channels * overlapLength; i += 4)
{
corr += mixingPos[i] * compare[i] +
mixingPos[i + 1] * compare[i + 1];
+
norm += mixingPos[i] * mixingPos[i] +
mixingPos[i + 1] * mixingPos[i + 1];
+
+ // unroll the loop for better CPU efficiency:
+ corr += mixingPos[i + 2] * compare[i + 2] +
+ mixingPos[i + 3] * compare[i + 3];
+
+ norm += mixingPos[i + 2] * mixingPos[i + 2] +
+ mixingPos[i + 3] * mixingPos[i + 3];
}
if (norm < 1e-9) norm = 1.0; // to avoid div by zero
diff --git a/src/thirdparty/SoundTouch/source/TDStretch.h b/src/thirdparty/SoundTouch/source/TDStretch.h
index c236aa4e7..12ce2cf03 100644
--- a/src/thirdparty/SoundTouch/source/TDStretch.h
+++ b/src/thirdparty/SoundTouch/source/TDStretch.h
@@ -115,8 +115,7 @@ protected:
float tempo;
SAMPLETYPE *pMidBuffer;
- SAMPLETYPE *pRefMidBuffer;
- SAMPLETYPE *pRefMidBufferUnaligned;
+ SAMPLETYPE *pMidBufferUnaligned;
int overlapLength;
int seekLength;
int seekWindowLength;
@@ -127,8 +126,6 @@ protected:
FIFOSampleBuffer outputBuffer;
FIFOSampleBuffer inputBuffer;
BOOL bQuickSeek;
-// int outDebt;
-// BOOL bMidBufferDirty;
int sampleRate;
int sequenceMs;
@@ -142,13 +139,10 @@ protected:
virtual void clearCrossCorrState();
void calculateOverlapLength(int overlapMs);
- virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
- virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
+ virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
- virtual int seekBestOverlapPositionStereo(const SAMPLETYPE *refPos);
- virtual int seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos);
- virtual int seekBestOverlapPositionMono(const SAMPLETYPE *refPos);
- virtual int seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos);
+ virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos);
+ virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos);
int seekBestOverlapPosition(const SAMPLETYPE *refPos);
virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
@@ -157,9 +151,6 @@ protected:
void clearMidBuffer();
void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
- void precalcCorrReferenceMono();
- void precalcCorrReferenceStereo();
-
void calcSeqParameters();
/// Changes the tempo of the given sound samples.
@@ -234,17 +225,17 @@ public:
///< contains both channels if stereo
);
- /// return nominal input sample requirement for triggering a processing batch
- int getInputSampleReq() const
- {
- return (int)(nominalSkip + 0.5);
- }
-
- /// return nominal output sample amount when running a processing batch
- int getOutputBatchSize() const
- {
- return seekWindowLength - overlapLength;
- }
+ /// return nominal input sample requirement for triggering a processing batch
+ int getInputSampleReq() const
+ {
+ return (int)(nominalSkip + 0.5);
+ }
+
+ /// return nominal output sample amount when running a processing batch
+ int getOutputBatchSize() const
+ {
+ return seekWindowLength - overlapLength;
+ }
};
@@ -256,7 +247,7 @@ public:
class TDStretchMMX : public TDStretch
{
protected:
- long calcCrossCorrStereo(const short *mixingPos, const short *compare) const;
+ double calcCrossCorr(const short *mixingPos, const short *compare) const;
virtual void overlapStereo(short *output, const short *input) const;
virtual void clearCrossCorrState();
};
@@ -268,7 +259,7 @@ public:
class TDStretchSSE : public TDStretch
{
protected:
- double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
+ double calcCrossCorr(const float *mixingPos, const float *compare) const;
};
#endif /// SOUNDTOUCH_ALLOW_SSE
diff --git a/src/thirdparty/SoundTouch/source/cpu_detect_x86.cpp b/src/thirdparty/SoundTouch/source/cpu_detect_x86.cpp
new file mode 100644
index 000000000..d0b5ea73e
--- /dev/null
+++ b/src/thirdparty/SoundTouch/source/cpu_detect_x86.cpp
@@ -0,0 +1,139 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Generic version of the x86 CPU extension detection routine.
+///
+/// This file is for GNU & other non-Windows compilers, see 'cpu_detect_x86_win.cpp'
+/// for the Microsoft compiler version.
+///
+/// Author : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed : $Date$
+// File revision : $Revision: 4 $
+//
+// $Id$
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+// SoundTouch audio processing library
+// Copyright (c) Olli Parviainen
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "cpu_detect.h"
+#include "STTypes.h"
+
+#if defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS)
+
+ #if defined(__GNUC__) && defined(__i386__)
+ // gcc
+ #include "cpuid.h"
+ #endif
+
+ #if defined(_M_IX86)
+ // windows
+ #include <intrin.h>
+ #define bit_MMX (1 << 23)
+ #define bit_SSE (1 << 25)
+ #define bit_SSE2 (1 << 26)
+ #endif
+
+#endif
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// processor instructions extension detection routines
+//
+//////////////////////////////////////////////////////////////////////////////
+
+// Flag variable indicating whick ISA extensions are disabled (for debugging)
+static uint _dwDisabledISA = 0x00; // 0xffffffff; //<- use this to disable all extensions
+
+// Disables given set of instruction extensions. See SUPPORT_... defines.
+void disableExtensions(uint dwDisableMask)
+{
+ _dwDisabledISA = dwDisableMask;
+}
+
+
+
+/// Checks which instruction set extensions are supported by the CPU.
+uint detectCPUextensions(void)
+{
+/// If building for a 64bit system (no Itanium) and the user wants optimizations.
+/// Return the OR of SUPPORT_{MMX,SSE,SSE2}. 11001 or 0x19.
+/// Keep the _dwDisabledISA test (2 more operations, could be eliminated).
+#if ((defined(__GNUC__) && defined(__x86_64__)) \
+ || defined(_M_X64)) \
+ && defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS)
+ return 0x19 & ~_dwDisabledISA;
+
+/// If building for a 32bit system and the user wants optimizations.
+/// Keep the _dwDisabledISA test (2 more operations, could be eliminated).
+#elif ((defined(__GNUC__) && defined(__i386__)) \
+ || defined(_M_IX86)) \
+ && defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS)
+
+ if (_dwDisabledISA == 0xffffffff) return 0;
+
+ uint res = 0;
+
+#if defined(__GNUC__)
+ // GCC version of cpuid. Requires GCC 4.3.0 or later for __cpuid intrinsic support.
+ uint eax, ebx, ecx, edx; // unsigned int is the standard type. uint is defined by the compiler and not guaranteed to be portable.
+
+ // Check if no cpuid support.
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) return 0; // always disable extensions.
+
+ if (edx & bit_MMX) res = res | SUPPORT_MMX;
+ if (edx & bit_SSE) res = res | SUPPORT_SSE;
+ if (edx & bit_SSE2) res = res | SUPPORT_SSE2;
+
+#else
+ // Window / VS version of cpuid. Notice that Visual Studio 2005 or later required
+ // for __cpuid intrinsic support.
+ int reg[4] = {-1};
+
+ // Check if no cpuid support.
+ __cpuid(reg,0);
+ if ((unsigned int)reg[0] == 0) return 0; // always disable extensions.
+
+ __cpuid(reg,1);
+ if ((unsigned int)reg[3] & bit_MMX) res = res | SUPPORT_MMX;
+ if ((unsigned int)reg[3] & bit_SSE) res = res | SUPPORT_SSE;
+ if ((unsigned int)reg[3] & bit_SSE2) res = res | SUPPORT_SSE2;
+
+#endif
+
+ return res & ~_dwDisabledISA;
+
+#else
+
+/// One of these is true:
+/// 1) We don't want optimizations.
+/// 2) Using an unsupported compiler.
+/// 3) Running on a non-x86 platform.
+ return 0;
+
+#endif
+}
diff --git a/src/thirdparty/SoundTouch/source/cpu_detect_x86_win.cpp b/src/thirdparty/SoundTouch/source/cpu_detect_x86_win.cpp
deleted file mode 100644
index 44e0520d6..000000000
--- a/src/thirdparty/SoundTouch/source/cpu_detect_x86_win.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-////////////////////////////////////////////////////////////////////////////////
-///
-/// Win32 version of the x86 CPU detect routine.
-///
-/// This file is to be compiled in Windows platform with Microsoft Visual C++
-/// Compiler. Please see 'cpu_detect_x86_gcc.cpp' for the gcc compiler version
-/// for all GNU platforms.
-///
-/// Author : Copyright (c) Olli Parviainen
-/// Author e-mail : oparviai 'at' iki.fi
-/// SoundTouch WWW: http://www.surina.net/soundtouch
-///
-////////////////////////////////////////////////////////////////////////////////
-//
-// Last changed : $Date$
-// File revision : $Revision: 4 $
-//
-// $Id$
-//
-////////////////////////////////////////////////////////////////////////////////
-//
-// License :
-//
-// SoundTouch audio processing library
-// Copyright (c) Olli Parviainen
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-//
-////////////////////////////////////////////////////////////////////////////////
-
-#include "cpu_detect.h"
-
-#include "STTypes.h"
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// processor instructions extension detection routines
-//
-//////////////////////////////////////////////////////////////////////////////
-
-// Flag variable indicating whick ISA extensions are disabled (for debugging)
-static uint _dwDisabledISA = 0x00; // 0xffffffff; //<- use this to disable all extensions
-
-
-// Disables given set of instruction extensions. See SUPPORT_... defines.
-void disableExtensions(uint dwDisableMask)
-{
- _dwDisabledISA = dwDisableMask;
-}
-
-
-
-/// Checks which instruction set extensions are supported by the CPU.
-uint detectCPUextensions(void)
-{
- uint res = 0;
-
- if (_dwDisabledISA == 0xffffffff) return 0;
-
-#ifndef _M_X64
- // 32bit compilation, detect CPU capabilities with inline assembler.
- __asm
- {
- ; check if 'cpuid' instructions is available by toggling eflags bit 21
- ;
- xor esi, esi ; clear esi = result register
-
- pushfd ; save eflags to stack
- mov eax,dword ptr [esp] ; load eax from stack (with eflags)
- mov ecx, eax ; save the original eflags values to ecx
- xor eax, 0x00200000 ; toggle bit 21
- mov dword ptr [esp],eax ; store toggled eflags to stack
- popfd ; load eflags from stack
-
- pushfd ; save updated eflags to stack
- mov eax,dword ptr [esp] ; load eax from stack
- popfd ; pop stack to restore stack pointer
-
- xor edx, edx ; clear edx for defaulting no mmx
- cmp eax, ecx ; compare to original eflags values
- jz end ; jumps to 'end' if cpuid not present
-
- ; cpuid instruction available, test for presence of mmx instructions
- mov eax, 1
- cpuid
- test edx, 0x00800000
- jz end ; branch if MMX not available
-
- or esi, SUPPORT_MMX ; otherwise add MMX support bit
-
- test edx, 0x02000000
- jz test3DNow ; branch if SSE not available
-
- or esi, SUPPORT_SSE ; otherwise add SSE support bit
-
- test3DNow:
- ; test for precense of AMD extensions
- mov eax, 0x80000000
- cpuid
- cmp eax, 0x80000000
- jbe end ; branch if no AMD extensions detected
-
- ; test for precense of 3DNow! extension
- mov eax, 0x80000001
- cpuid
- test edx, 0x80000000
- jz end ; branch if 3DNow! not detected
-
- or esi, SUPPORT_3DNOW ; otherwise add 3DNow support bit
-
- end:
-
- mov res, esi
- }
-
-#else
-
- // Visual C++ 64bit compilation doesn't support inline assembler. However,
- // all x64 compatible CPUs support MMX & SSE extensions.
- res = SUPPORT_MMX | SUPPORT_SSE | SUPPORT_SSE2;
-
-#endif
-
- return res & ~_dwDisabledISA;
-}
diff --git a/src/thirdparty/SoundTouch/source/mmx_optimized.cpp b/src/thirdparty/SoundTouch/source/mmx_optimized.cpp
index feeab49a6..684bad0e9 100644
--- a/src/thirdparty/SoundTouch/source/mmx_optimized.cpp
+++ b/src/thirdparty/SoundTouch/source/mmx_optimized.cpp
@@ -68,7 +68,7 @@ using namespace soundtouch;
// Calculates cross correlation of two buffers
-long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
+double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const
{
const __m64 *pVec1, *pVec2;
__m64 shifter;
@@ -82,9 +82,9 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
shifter = _m_from_int(overlapDividerBits);
normaccu = accu = _mm_setzero_si64();
- // Process 4 parallel sets of 2 * stereo samples each during each
- // round to improve CPU-level parallellization.
- for (i = 0; i < overlapLength / 8; i ++)
+ // Process 4 parallel sets of 2 * stereo samples or 4 * mono samples
+ // during each round for improved CPU-level parallellization.
+ for (i = 0; i < channels * overlapLength / 16; i ++)
{
__m64 temp, temp2;
@@ -126,7 +126,8 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
// Normalize result by dividing by sqrt(norm) - this step is easiest
// done using floating point operation
if (norm == 0) norm = 1; // to avoid div by zero
- return (long)((double)corr * USHRT_MAX / sqrt((double)norm));
+
+ return (double)corr / sqrt((double)norm);
// Note: Warning about the missing EMMS instruction is harmless
// as it'll be called elsewhere.
}
diff --git a/src/thirdparty/SoundTouch/source/sse_optimized.cpp b/src/thirdparty/SoundTouch/source/sse_optimized.cpp
index d989ad5aa..ddafb08e3 100644
--- a/src/thirdparty/SoundTouch/source/sse_optimized.cpp
+++ b/src/thirdparty/SoundTouch/source/sse_optimized.cpp
@@ -71,7 +71,7 @@ using namespace soundtouch;
#include <math.h>
// Calculates cross correlation of two buffers
-double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const
+double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const
{
int i;
const float *pVec1;
@@ -110,8 +110,9 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
pVec2 = (const __m128*)pV2;
vSum = vNorm = _mm_setzero_ps();
- // Unroll the loop by factor of 4 * 4 operations
- for (i = 0; i < overlapLength / 8; i ++)
+ // Unroll the loop by factor of 4 * 4 operations. Use same routine for
+ // stereo & mono, for mono it just means twice the amount of unrolling.
+ for (i = 0; i < channels * overlapLength / 16; i ++)
{
__m128 vTemp;
// vSum += pV1[0..3] * pV2[0..3]
@@ -152,7 +153,7 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
// Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
corr = norm = 0.0;
- for (i = 0; i < overlapLength / 8; i ++)
+ for (i = 0; i < channels * overlapLength / 16; i ++)
{
corr += pV1[0] * pV2[0] +
pV1[1] * pV2[1] +
@@ -171,81 +172,13 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
pV1[14] * pV2[14] +
pV1[15] * pV2[15];
- for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
+ for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
pV1 += 16;
pV2 += 16;
}
return corr / sqrt(norm);
*/
-
- /* This is a bit outdated, corresponding routine in assembler. This may be teeny-weeny bit
- faster than intrinsic version, but more difficult to maintain & get compiled on multiple
- platforms.
-
- uint overlapLengthLocal = overlapLength;
- float corr;
-
- _asm
- {
- // Very important note: data in 'pV2' _must_ be aligned to
- // 16-byte boundary!
-
- // give prefetch hints to CPU of what data are to be needed soonish
- // give more aggressive hints on pV1 as that changes while pV2 stays
- // same between runs
- prefetcht0 [pV1]
- prefetcht0 [pV2]
- prefetcht0 [pV1 + 32]
-
- mov eax, dword ptr pV1
- mov ebx, dword ptr pV2
-
- xorps xmm0, xmm0
-
- mov ecx, overlapLengthLocal
- shr ecx, 3 // div by eight
-
- loop1:
- prefetcht0 [eax + 64] // give a prefetch hint to CPU what data are to be needed soonish
- prefetcht0 [ebx + 32] // give a prefetch hint to CPU what data are to be needed soonish
- movups xmm1, [eax]
- mulps xmm1, [ebx]
- addps xmm0, xmm1
-
- movups xmm2, [eax + 16]
- mulps xmm2, [ebx + 16]
- addps xmm0, xmm2
-
- prefetcht0 [eax + 96] // give a prefetch hint to CPU what data are to be needed soonish
- prefetcht0 [ebx + 64] // give a prefetch hint to CPU what data are to be needed soonish
-
- movups xmm3, [eax + 32]
- mulps xmm3, [ebx + 32]
- addps xmm0, xmm3
-
- movups xmm4, [eax + 48]
- mulps xmm4, [ebx + 48]
- addps xmm0, xmm4
-
- add eax, 64
- add ebx, 64
-
- dec ecx
- jnz loop1
-
- // add the four floats of xmm0 together and return the result.
-
- movhlps xmm1, xmm0 // move 3 & 4 of xmm0 to 1 & 2 of xmm1
- addps xmm1, xmm0
- movaps xmm2, xmm1
- shufps xmm2, xmm2, 0x01 // move 2 of xmm2 as 1 of xmm2
- addss xmm2, xmm1
- movss corr, xmm2
- }
-
- return (double)corr;
- */
}