Update to latest SoundTouch (1.5.0)

git-svn-id: https://mpc-hc.svn.sourceforge.net/svnroot/mpc-hc/trunk@1610 10f7b99b-c216-0410-bff0-8a66a9350fd8
author: Spec-Chum <spec-chum@users.sourceforge.net> 2010-02-05 17:56:52 +0300
committer: Spec-Chum <spec-chum@users.sourceforge.net> 2010-02-05 17:56:52 +0300
commit: 33bd1ff9d035530baf765b5e19424af707d9d781 (patch)
tree: 3225aadad423fd78abf8bf760d0d063db027536f /src/filters/renderer/MpcAudioRenderer/SoundTouch
parent: 0acc867ab62d247fd92de0cca343cd2ee73c50cb (diff)
20 files changed, 457 insertions, 351 deletions
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/BPMDetect.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/BPMDetect.h
index cebbbf754..4def43f1e 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/BPMDetect.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/BPMDetect.h
@@ -26,10 +26,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-12-25 14:20:01 +0200 (Thu, 25 Dec 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: BPMDetect.h 33 2008-12-25 12:20:01Z oparviai $
+// $Id: BPMDetect.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -108,9 +108,6 @@ protected:
     /// FIFO-buffer for decimated processing samples.
     soundtouch::FIFOSampleBuffer *buffer;
 
-    /// Initialize the class for processing.
-    void init(int numChannels, int sampleRate);
-
     /// Updates auto-correlation function for given number of decimated samples that 
     /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe 
     /// though).
@@ -146,8 +143,8 @@ public:
     /// function. 
     /// 
     /// Notice that data in 'samples' array can be disrupted in processing.
-    void inputSamples(soundtouch::SAMPLETYPE *samples,  ///< Pointer to input/working data buffer
-                      int numSamples                    ///< Number of samples in buffer
+    void inputSamples(const soundtouch::SAMPLETYPE *samples,    ///< Pointer to input/working data buffer
+                      int numSamples                            ///< Number of samples in buffer
                       );
 
 
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSampleBuffer.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSampleBuffer.h
index 9f6e8a823..76cbf9514 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSampleBuffer.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSampleBuffer.h
@@ -15,10 +15,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIFOSampleBuffer.h 11 2008-02-10 16:26:55Z oparviai $
+// $Id: FIFOSampleBuffer.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -107,7 +107,7 @@ public:
     /// When using this function to output samples, also remember to 'remove' the
     /// output samples from the buffer by calling the 
     /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const;
+    virtual SAMPLETYPE *ptrBegin();
 
     /// Returns a pointer to the end of the used part of the sample buffer (i.e. 
     /// where the new samples are to be inserted). This function may be used for 
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSamplePipe.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSamplePipe.h
index e5aa1d0bf..b5fc3b779 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSamplePipe.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/FIFOSamplePipe.h
@@ -17,10 +17,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-04-13 16:18:48 +0300 (Mon, 13 Apr 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIFOSamplePipe.h 11 2008-02-10 16:26:55Z oparviai $
+// $Id: FIFOSamplePipe.h 69 2009-04-13 13:18:48Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -59,6 +59,10 @@ namespace soundtouch
 class FIFOSamplePipe
 {
 public:
+    // virtual default destructor
+    virtual ~FIFOSamplePipe() {}
+
+
     /// Returns a pointer to the beginning of the output samples. 
     /// This function is provided for accessing the output samples directly. 
     /// Please be careful for not to corrupt the book-keeping!
@@ -66,7 +70,7 @@ public:
     /// When using this function to output samples, also remember to 'remove' the
     /// output samples from the buffer by calling the 
     /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const = 0;
+    virtual SAMPLETYPE *ptrBegin() = 0;
 
     /// Adds 'numSamples' pcs of samples from the 'samples' memory position to
     /// the sample buffer.
@@ -166,7 +170,7 @@ protected:
     /// When using this function to output samples, also remember to 'remove' the
     /// output samples from the buffer by calling the 
     /// 'receiveSamples(numSamples)' function
-    virtual SAMPLETYPE *ptrBegin() const
+    virtual SAMPLETYPE *ptrBegin()
     {
         return output->ptrBegin();
     }
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/STTypes.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/STTypes.h
index 1c11c1f32..cad502ffa 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/STTypes.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/STTypes.h
@@ -8,10 +8,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-11 13:36:36 +0200 (Sun, 11 Jan 2009) $
+// Last changed  : $Date: 2009-05-17 14:30:57 +0300 (Sun, 17 May 2009) $
 // File revision : $Revision: 3 $
 //
-// $Id: STTypes.h 47 2009-01-11 11:36:36Z oparviai $
+// $Id: STTypes.h 70 2009-05-17 11:30:57Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -87,7 +87,7 @@ namespace soundtouch
  
  #endif
 
-    #if defined(WIN32) || defined(__i386__) || defined(__x86_64__)
+	#ifndef _WIN64
         /// Define this to allow X86-specific assembler/intrinsic optimizations. 
         /// Notice that library contains also usual C++ versions of each of these
         /// these routines, so if you're having difficulties getting the optimized 
@@ -115,7 +115,7 @@ namespace soundtouch
             #error "conflicting sample types defined"
         #endif // FLOAT_SAMPLES
 
-        #if defined ALLOW_X86_OPTIMIZATIONS && !defined(_WIN64)	// FIXME: Link error when enabled on x64 filters only
+        #ifdef ALLOW_X86_OPTIMIZATIONS
             // Allow MMX optimizations
             #define ALLOW_MMX   1
         #endif
@@ -129,7 +129,7 @@ namespace soundtouch
 
         #ifdef ALLOW_X86_OPTIMIZATIONS
                 // Allow 3DNow! and SSE optimizations
-            #if defined(WIN32)
+            #if WIN32
                 #define ALLOW_3DNOW     1
             #endif
 
@@ -139,4 +139,11 @@ namespace soundtouch
     #endif  // INTEGER_SAMPLES
 };
 
+
+// When this #define is active, eliminates a clicking sound when the "rate" or "pitch" 
+// parameter setting crosses from value <1 to >=1 or vice versa during processing. 
+// Default is off as such crossover is untypical case and involves a slight sound 
+// quality compromise.
+//#define PREVENT_CLICK_AT_RATE_CROSSOVER   1
+
 #endif
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/SoundTouch.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/SoundTouch.h
index 611a192b9..0e042d3c0 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/SoundTouch.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/include/SoundTouch.h
@@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-12-25 19:03:48 +0200 (Thu, 25 Dec 2008) $
+// Last changed  : $Date: 2009-12-28 22:10:14 +0200 (Mon, 28 Dec 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.h 40 2008-12-25 17:03:48Z oparviai $
+// $Id: SoundTouch.h 78 2009-12-28 20:10:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -79,10 +79,10 @@ namespace soundtouch
 {
 
 /// Soundtouch library version string
-#define SOUNDTOUCH_VERSION          "1.4.0"
+#define SOUNDTOUCH_VERSION          "1.5.0"
 
 /// SoundTouch library version id
-#define SOUNDTOUCH_VERSION_ID       (10400)
+#define SOUNDTOUCH_VERSION_ID       (10500)
 
 //
 // Available setting IDs for the 'setSetting' & 'get_setting' functions:
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/3dnow_win.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/3dnow_win.cpp
index 9da06c613..f0a9d7ecc 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/3dnow_win.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/3dnow_win.cpp
@@ -35,10 +35,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-25 16:13:39 +0200 (Sun, 25 Jan 2009) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: 3dnow_win.cpp 51 2009-01-25 14:13:39Z oparviai $
+// $Id: 3dnow_win.cpp 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -66,15 +66,13 @@
 #include "cpu_detect.h"
 #include "STTypes.h"
 
-
-using namespace soundtouch;
-
-#ifdef ALLOW_3DNOW
-
 #ifndef WIN32
 #error "wrong platform - this source code file is exclusively for Win32 platform"
 #endif
 
+using namespace soundtouch;
+
+#ifdef ALLOW_3DNOW
 // 3DNow! routines available only with float sample type    
 
 //////////////////////////////////////////////////////////////////////////////
@@ -84,17 +82,13 @@ using namespace soundtouch;
 //////////////////////////////////////////////////////////////////////////////
 
 #include "TDStretch.h"
-//#include <limits.h>
-
-// these are declared in 'TDStretch.cpp'
-// extern int scanOffsets[4][24];
 
 
 // Calculates cross correlation of two buffers
 double TDStretch3DNow::calcCrossCorrStereo(const float *pV1, const float *pV2) const
 {
-    uint overlapLengthLocal = overlapLength;
-    float corr;
+    int overlapLengthLocal = overlapLength;
+    float corr = 0;
 
     // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
     /*
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/BPMDetect.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/BPMDetect.cpp
index 6609f55c5..405f514bf 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/BPMDetect.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/BPMDetect.cpp
@@ -26,10 +26,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-12-25 19:54:41 +0200 (Thu, 25 Dec 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: BPMDetect.cpp 43 2008-12-25 17:54:41Z oparviai $
+// $Id: BPMDetect.cpp 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -66,8 +66,6 @@ using namespace soundtouch;
 #define INPUT_BLOCK_SAMPLES       2048
 #define DECIMATED_BLOCK_SAMPLES   256
 
-typedef unsigned short ushort;
-
 /// decay constant for calculating RMS volume sliding average approximation 
 /// (time constant is about 10 sec)
 const float avgdecay = 0.99986f;
@@ -77,18 +75,13 @@ const float avgnorm = (1 - avgdecay);
 
 
 
-BPMDetect::BPMDetect(int numChannels, int sampleRate)
+BPMDetect::BPMDetect(int numChannels, int aSampleRate)
 {
-    xcorr = NULL;
-
-    buffer = new FIFOSampleBuffer();
+    this->sampleRate = aSampleRate;
+    this->channels = numChannels;
 
     decimateSum = 0;
     decimateCount = 0;
-    decimateBy = 0;
-
-    this->sampleRate = sampleRate;
-    this->channels = numChannels;
 
     envelopeAccu = 0;
 
@@ -103,7 +96,26 @@ BPMDetect::BPMDetect(int numChannels, int sampleRate)
     RMSVolumeAccu = (0.092f * 0.092f) / avgnorm;
 #endif
 
-    init(numChannels, sampleRate);
+    // choose decimation factor so that result is approx. 500 Hz
+    decimateBy = sampleRate / 500;
+    assert(decimateBy > 0);
+    assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
+
+    // Calculate window length & starting item according to desired min & max bpms
+    windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
+    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
+
+    assert(windowLen > windowStart);
+
+    // allocate new working objects
+    xcorr = new float[windowLen];
+    memset(xcorr, 0, windowLen * sizeof(float));
+
+    // allocate processing buffer
+    buffer = new FIFOSampleBuffer();
+    // we do processing in mono mode
+    buffer->setChannels(1);
+    buffer->clear();
 }
 
 
@@ -115,7 +127,9 @@ BPMDetect::~BPMDetect()
 }
 
 
-/// low-pass filter & decimate to about 500 Hz. return number of outputted samples.
+
+/// convert to mono, low-pass filter & decimate to about 500 Hz. 
+/// return number of outputted samples.
 ///
 /// Decimation is used to remove the unnecessary frequencies and thus to reduce 
 /// the amount of data needed to be processed as calculating autocorrelation 
@@ -130,17 +144,25 @@ int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
     int count, outcount;
     LONG_SAMPLETYPE out;
 
-    assert(decimateBy != 0);
+    assert(channels > 0);
+    assert(decimateBy > 0);
     outcount = 0;
     for (count = 0; count < numsamples; count ++) 
     {
-        decimateSum += src[count];
+        int j;
+
+        // convert to mono and accumulate
+        for (j = 0; j < channels; j ++)
+        {
+            decimateSum += src[j];
+        }
+        src += j;
 
         decimateCount ++;
         if (decimateCount >= decimateBy) 
         {
             // Store every Nth sample only
-            out = (LONG_SAMPLETYPE)(decimateSum / decimateBy);
+            out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
             decimateSum = 0;
             decimateCount = 0;
 #ifdef INTEGER_SAMPLES
@@ -231,27 +253,27 @@ void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples)
 
 
 
-void BPMDetect::inputSamples(SAMPLETYPE *samples, int numSamples)
+void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
 {
     SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
 
-    // convert from stereo to mono if necessary
-    if (channels == 2)
+    // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
+    while (numSamples > 0)
     {
-        int i;
+        int block;
+        int decSamples;
 
-        for (i = 0; i < numSamples; i ++)
-        {
-            samples[i] = (samples[i * 2] + samples[i * 2 + 1]) / 2;
-        }
-    }
-    
-    // decimate
-    numSamples = decimate(decimated, samples, numSamples);
+        block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
+
+        // decimate. note that converts to mono at the same time
+        decSamples = decimate(decimated, samples, block);
+        samples += block * channels;
+        numSamples -= block;
 
-    // envelope new samples and add them to buffer
-    calcEnvelope(decimated, numSamples);
-    buffer->putSamples(decimated, numSamples);
+        // envelope new samples and add them to buffer
+        calcEnvelope(decimated, decSamples);
+        buffer->putSamples(decimated, decSamples);
+    }
 
     // when the buffer has enought samples for processing...
     if ((int)buffer->numSamples() > windowLen) 
@@ -259,7 +281,7 @@ void BPMDetect::inputSamples(SAMPLETYPE *samples, int numSamples)
         int processLength;
 
         // how many samples are processed
-        processLength = buffer->numSamples() - windowLen;
+        processLength = (int)buffer->numSamples() - windowLen;
 
         // ... calculate autocorrelations for oldest samples...
         updateXCorr(processLength);
@@ -269,31 +291,6 @@ void BPMDetect::inputSamples(SAMPLETYPE *samples, int numSamples)
 }
 
 
-void BPMDetect::init(int numChannels, int sampleRate)
-{
-    this->sampleRate = sampleRate;
-
-    // choose decimation factor so that result is approx. 500 Hz
-    decimateBy = sampleRate / 500;
-    assert(decimateBy > 0);
-    assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
-
-    // Calculate window length & starting item according to desired min & max bpms
-    windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
-    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
-
-    assert(windowLen > windowStart);
-
-    // allocate new working objects
-    xcorr = new float[windowLen];
-    memset(xcorr, 0, windowLen * sizeof(float));
-
-    // we do processing in mono mode
-    buffer->setChannels(1);
-    buffer->clear();
-}
-
-
 
 float BPMDetect::getBpm()
 {
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIFOSampleBuffer.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIFOSampleBuffer.cpp
index 5d784fc90..01f64b083 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIFOSampleBuffer.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIFOSampleBuffer.cpp
@@ -15,10 +15,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-02-27 19:24:42 +0200 (Fri, 27 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIFOSampleBuffer.cpp 11 2008-02-10 16:26:55Z oparviai $
+// $Id: FIFOSampleBuffer.cpp 68 2009-02-27 17:24:42Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -63,6 +63,7 @@ FIFOSampleBuffer::FIFOSampleBuffer(int numChannels)
     samplesInBuffer = 0;
     bufferPos = 0;
     channels = (uint)numChannels;
+    ensureCapacity(32);     // allocate initial capacity 
 }
 
 
@@ -151,8 +152,9 @@ SAMPLETYPE *FIFOSampleBuffer::ptrEnd(uint slackCapacity)
 // When using this function to output samples, also remember to 'remove' the
 // outputted samples from the buffer by calling the 
 // 'receiveSamples(numSamples)' function
-SAMPLETYPE *FIFOSampleBuffer::ptrBegin() const
+SAMPLETYPE *FIFOSampleBuffer::ptrBegin()
 {
+    assert(buffer);
     return buffer + bufferPos * channels;
 }
 
@@ -175,8 +177,12 @@ void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement)
         {
             throw std::runtime_error("Couldn't allocate memory!\n");
         }
+        // Align the buffer to begin at 16byte cache line boundary for optimal performance
         temp = (SAMPLETYPE *)(((ulong)tempUnaligned + 15) & (ulong)-16);
-        memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE));
+        if (samplesInBuffer)
+        {
+            memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE));
+        }
         delete[] bufferUnaligned;
         buffer = temp;
         bufferUnaligned = tempUnaligned;
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.cpp
index f775526c2..159df256d 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.cpp
@@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-05-09 07:48:34 +0300 (Fri, 09 May 2008) $
+// Last changed  : $Date: 2009-02-25 19:13:51 +0200 (Wed, 25 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.cpp 26 2008-05-09 04:48:34Z oparviai $
+// $Id: FIRFilter.cpp 67 2009-02-25 17:13:51Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -181,7 +181,7 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
     assert(length == newLength);
 
     resultDivFactor = uResultDivFactor;
-    resultDivider = (SAMPLETYPE)::pow((float)2, (float)resultDivFactor);
+    resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);
 
     delete[] filterCoeffs;
     filterCoeffs = new SAMPLETYPE[length];
@@ -229,6 +229,7 @@ void * FIRFilter::operator new(size_t s)
 
 FIRFilter * FIRFilter::newInstance()
 {
+#ifndef _WIN64
     uint uExtensions;
 
     uExtensions = detectCPUextensions();
@@ -262,6 +263,8 @@ FIRFilter * FIRFilter::newInstance()
     else
 #endif // ALLOW_3DNOW
 
+#endif	// _WIN64
+
     {
         // ISA optimizations not supported, use plain C version
         return ::new FIRFilter;
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.h
index ed97adfbe..5713f7bb2 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/FIRFilter.h
@@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: FIRFilter.h 11 2008-02-10 16:26:55Z oparviai $
+// $Id: FIRFilter.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -42,6 +42,7 @@
 #ifndef FIRFilter_H
 #define FIRFilter_H
 
+#include <stddef.h>
 #include "STTypes.h"
 
 namespace soundtouch
@@ -103,7 +104,7 @@ public:
 
 #ifdef ALLOW_MMX
 
-    /// Class that implements MMX optimized functions exclusive for 16bit integer samples type.
+/// Class that implements MMX optimized functions exclusive for 16bit integer samples type.
     class FIRFilterMMX : public FIRFilter
     {
     protected:
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.cpp
index e1a290329..03f60bfa9 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.cpp
@@ -11,10 +11,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-12-25 19:54:41 +0200 (Thu, 25 Dec 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: PeakFinder.cpp 43 2008-12-25 17:54:41Z oparviai $
+// $Id: PeakFinder.cpp 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -51,6 +51,7 @@ using namespace soundtouch;
 
 PeakFinder::PeakFinder()
 {
+    minPos = maxPos = 0;
 }
 
 
@@ -140,13 +141,15 @@ double PeakFinder::calcMassCenter(const float *data, int firstPos, int lastPos)
         sum += (float)i * data[i];
         wsum += data[i];
     }
+
+    if (wsum < 1e-6) return 0;
     return sum / wsum;
 }
 
 
 
 /// get exact center of peak near given position by calculating local mass of center
-double PeakFinder::getPeakCenter(const float *data, int peakpos)
+double PeakFinder::getPeakCenter(const float *data, int peakpos) const
 {
     float peakLevel;            // peak level
     int crosspos1, crosspos2;   // position where the peak 'hump' crosses cutting level
@@ -178,15 +181,15 @@ double PeakFinder::getPeakCenter(const float *data, int peakpos)
 
 
 
-double PeakFinder::detectPeak(const float *data, int minPos, int maxPos) 
+double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos) 
 {
 
     int i;
     int peakpos;                // position of peak level
     double highPeak, peak;
 
-    this->minPos = minPos;
-    this->maxPos = maxPos;
+    this->minPos = aminPos;
+    this->maxPos = amaxPos;
 
     // find absolute peak
     peakpos = minPos;
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.h
index 736270c49..e3640cc6d 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/PeakFinder.h
@@ -9,10 +9,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-12-25 19:54:41 +0200 (Thu, 25 Dec 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: PeakFinder.h 43 2008-12-25 17:54:41Z oparviai $
+// $Id: PeakFinder.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -71,7 +71,7 @@ protected:
                      ) const;
 
     /// get exact center of peak near given position by calculating local mass of center
-    double getPeakCenter(const float *data, int peakpos);
+    double getPeakCenter(const float *data, int peakpos) const;
 
 public:
     /// Constructor. 
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.cpp
index c8ed479f0..7e0b277d6 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.cpp
@@ -10,10 +10,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-11 13:34:24 +0200 (Sun, 11 Jan 2009) $
+// Last changed  : $Date: 2009-10-31 16:37:24 +0200 (Sat, 31 Oct 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: RateTransposer.cpp 45 2009-01-11 11:34:24Z oparviai $
+// $Id: RateTransposer.cpp 74 2009-10-31 14:37:24Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -158,7 +158,7 @@ BOOL RateTransposer::isAAFilterEnabled() const
 }
 
 
-AAFilter *RateTransposer::getAAFilter() const
+AAFilter *RateTransposer::getAAFilter()
 {
     return pAAFilter;
 }
@@ -248,9 +248,9 @@ void RateTransposer::downsample(const SAMPLETYPE *src, uint nSamples)
 
     // If the parameter 'uRate' value is larger than 'SCALE', first apply the
     // anti-alias filter to remove high frequencies (prevent them from folding
-    // over the lover frequencies), then transpose. */
+    // over the lover frequencies), then transpose.
 
-    // Add the new samples to the end of the storeBuffer */
+    // Add the new samples to the end of the storeBuffer
     storeBuffer.putSamples(src, nSamples);
 
     // Anti-alias filter the samples to prevent folding and output the filtered 
@@ -262,6 +262,8 @@ void RateTransposer::downsample(const SAMPLETYPE *src, uint nSamples)
     count = pAAFilter->evaluate(tempBuffer.ptrEnd(sizeTemp), 
         storeBuffer.ptrBegin(), sizeTemp, (uint)numChannels);
 
+	if (count == 0) return;
+
     // Remove the filtered samples from 'storeBuffer'
     storeBuffer.receiveSamples(count);
 
@@ -398,7 +400,9 @@ uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *sr
     unsigned int i, used;
     LONG_SAMPLETYPE temp, vol1;
 
-    used = 0;    
+    if (nSamples == 0) return 0;  // no samples, no work
+
+	used = 0;    
     i = 0;
 
     // Process the last sample saved from the previous call first...
@@ -548,19 +552,20 @@ uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src,
     }
     fSlopeCount -= 1.0f;
 
-    if (nSamples == 1) goto end;
-
-    while (1)
+    if (nSamples > 1)
     {
-        while (fSlopeCount > 1.0f) 
+        while (1)
         {
-            fSlopeCount -= 1.0f;
-            used ++;
-            if (used >= nSamples - 1) goto end;
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]);
+            i++;
+            fSlopeCount += fRate;
         }
-        dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]);
-        i++;
-        fSlopeCount += fRate;
     }
 end:
     // Store the last sample for the next round
@@ -593,25 +598,26 @@ uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *sr
     // now always (iSlopeCount > 1.0f)
     fSlopeCount -= 1.0f;
 
-    if (nSamples == 1) goto end;
-
-    while (1)
+    if (nSamples > 1)
     {
-        while (fSlopeCount > 1.0f) 
+        while (1)
         {
-            fSlopeCount -= 1.0f;
-            used ++;
-            if (used >= nSamples - 1) goto end;
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            srcPos = 2 * used;
+
+            dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] 
+                + fSlopeCount * src[srcPos + 2]);
+            dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] 
+                + fSlopeCount * src[srcPos + 3]);
+
+            i++;
+            fSlopeCount += fRate;
         }
-        srcPos = 2 * used;
-
-        dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] 
-            + fSlopeCount * src[srcPos + 2]);
-        dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] 
-            + fSlopeCount * src[srcPos + 3]);
-
-        i++;
-        fSlopeCount += fRate;
     }
 end:
     // Store the last sample for the next round
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.h
index 64ccf5c2a..f035af2c0 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/RateTransposer.h
@@ -14,10 +14,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: RateTransposer.h 11 2008-02-10 16:26:55Z oparviai $
+// $Id: RateTransposer.h 63 2009-02-21 16:00:14Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -45,6 +45,7 @@
 #ifndef RateTransposer_H
 #define RateTransposer_H
 
+#include <stddef.h>
 #include "AAFilter.h"
 #include "FIFOSamplePipe.h"
 #include "FIFOSampleBuffer.h"
@@ -90,7 +91,7 @@ protected:
     virtual uint transposeMono(SAMPLETYPE *dest, 
                        const SAMPLETYPE *src, 
                        uint numSamples) = 0;
-    uint transpose(SAMPLETYPE *dest, 
+    inline uint transpose(SAMPLETYPE *dest, 
                    const SAMPLETYPE *src, 
                    uint numSamples);
 
@@ -127,7 +128,7 @@ public:
     FIFOSamplePipe *getStore() { return &storeBuffer; };
 
     /// Return anti-alias filter object
-    AAFilter *getAAFilter() const;
+    AAFilter *getAAFilter();
 
     /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
     void enableAAFilter(BOOL newMode);
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/SoundTouch.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/SoundTouch.cpp
index e66dde982..aa7ac0284 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/SoundTouch.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/SoundTouch.cpp
@@ -41,10 +41,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-05-19 07:57:30 +0300 (Tue, 19 May 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: SoundTouch.cpp 11 2008-02-10 16:26:55Z oparviai $
+// $Id: SoundTouch.cpp 73 2009-05-19 04:57:30Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -87,7 +87,7 @@ using namespace soundtouch;
 #define TEST_FLOAT_EQUAL(a, b)  (fabs(a - b) < 1e-10)
 
 
-/// Print library version string
+/// Print library version string for autoconf
 extern "C" void soundtouch_ac_test()
 {
     printf("SoundTouch Version: %s\n",SOUNDTOUCH_VERSION);
@@ -149,8 +149,8 @@ void SoundTouch::setChannels(uint numChannels)
         throw std::runtime_error("Illegal number of channels");
     }
     channels = numChannels;
-    pRateTransposer->setChannels(numChannels);
-    pTDStretch->setChannels(numChannels);
+    pRateTransposer->setChannels((int)numChannels);
+    pTDStretch->setChannels((int)numChannels);
 }
 
 
@@ -243,23 +243,8 @@ void SoundTouch::calcEffectiveRateAndTempo()
     if (!TEST_FLOAT_EQUAL(rate,oldRate)) pRateTransposer->setRate(rate);
     if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo);
 
-    if (rate > 1.0f) 
-    {
-        if (output != pRateTransposer) 
-        {
-            FIFOSamplePipe *transOut;
-
-            assert(output == pTDStretch);
-            // move samples in the current output buffer to the output of pRateTransposer
-            transOut = pRateTransposer->getOutput();
-            transOut->moveSamples(*output);
-            // move samples in tempo changer's input to pitch transposer's input
-            pRateTransposer->moveSamples(*pTDStretch->getInput());
-
-            output = pRateTransposer;
-        }
-    } 
-    else 
+#ifndef PREVENT_CLICK_AT_RATE_CROSSOVER
+    if (rate <= 1.0f) 
     {
         if (output != pTDStretch) 
         {
@@ -273,9 +258,25 @@ void SoundTouch::calcEffectiveRateAndTempo()
             pTDStretch->moveSamples(*pRateTransposer->getStore());
 
             output = pTDStretch;
-
         }
     }
+    else
+#endif
+    {
+        if (output != pRateTransposer) 
+        {
+            FIFOSamplePipe *transOut;
+
+            assert(output == pTDStretch);
+            // move samples in the current output buffer to the output of pRateTransposer
+            transOut = pRateTransposer->getOutput();
+            transOut->moveSamples(*output);
+            // move samples in tempo changer's input to pitch transposer's input
+            pRateTransposer->moveSamples(*pTDStretch->getInput());
+
+            output = pRateTransposer;
+        }
+    } 
 }
 
 
@@ -284,7 +285,7 @@ void SoundTouch::setSampleRate(uint srate)
 {
     bSrateSet = TRUE;
     // set sample rate, leave other tempo changer parameters as they are.
-    pTDStretch->setParameters(srate);
+    pTDStretch->setParameters((int)srate);
 }
 
 
@@ -316,6 +317,7 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
         pTDStretch->putSamples(samples, nSamples);
     } 
     */
+#ifndef PREVENT_CLICK_AT_RATE_CROSSOVER
     else if (rate <= 1.0f) 
     {
         // transpose the rate down, output the transposed sound to tempo changer buffer
@@ -324,8 +326,8 @@ void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
         pTDStretch->moveSamples(*pRateTransposer);
     } 
     else 
+#endif
     {
-        assert(rate > 1.0f);
         // evaluate the tempo changer, then transpose the rate up, 
         assert(output == pRateTransposer);
         pTDStretch->putSamples(samples, nSamples);
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.cpp
index 0773b967d..062524c84 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.cpp
@@ -13,10 +13,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-25 15:43:54 +0200 (Sun, 25 Jan 2009) $
+// Last changed  : $Date: 2009-12-28 21:27:04 +0200 (Mon, 28 Dec 2009) $
 // File revision : $Revision: 1.12 $
 //
-// $Id: TDStretch.cpp 49 2009-01-25 13:43:54Z oparviai $
+// $Id: TDStretch.cpp 77 2009-12-28 19:27:04Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -45,19 +45,18 @@
 #include <limits.h>
 #include <assert.h>
 #include <math.h>
+#include <float.h>
 #include <stdexcept>
 
 #include "STTypes.h"
 #include "cpu_detect.h"
 #include "TDStretch.h"
 
-using namespace soundtouch;
+#include <stdio.h>
 
-#ifndef min
-//#define min(a,b) (((a) > (b)) ? (b) : (a))
-#define max(a,b) (((a) < (b)) ? (b) : (a))
-#endif
+using namespace soundtouch;
 
+#define max(x, y) (((x) > (y)) ? (x) : (y))
 
 
 /*****************************************************************************
@@ -66,17 +65,18 @@ using namespace soundtouch;
  *
  *****************************************************************************/
 
-
 // Table for the hierarchical mixing position seeking algorithm
-static const int _scanOffsets[4][24]={
-    { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806, 
-      868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0}, 
+static const short _scanOffsets[5][24]={
+    { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
+      868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
     {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
     { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
     {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
-        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0}};
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
+    { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
+      116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
 
 /*****************************************************************************
  *
@@ -89,7 +89,6 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
 {
     bQuickSeek = FALSE;
     channels = 2;
-    bMidBufferDirty = FALSE;
 
     pMidBuffer = NULL;
     pRefMidBufferUnaligned = NULL;
@@ -98,9 +97,14 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
     bAutoSeqSetting = TRUE;
     bAutoSeekSetting = TRUE;
 
+//    outDebt = 0;
+    skipFract = 0;
+
     tempo = 1.0f;
     setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
     setTempo(1.0f);
+
+    clear();
 }
 
 
@@ -133,8 +137,10 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
     {
         this->sequenceMs = aSequenceMS;
         bAutoSeqSetting = FALSE;
-    } else {
-        // zero or below, use automatic setting
+    } 
+    else if (aSequenceMS == 0)
+    {
+        // if zero, use automatic setting
         bAutoSeqSetting = TRUE;
     }
 
@@ -142,8 +148,10 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
     {
         this->seekWindowMs = aSeekWindowMS;
         bAutoSeekSetting = FALSE;
-    } else {
-        // zero or below, use automatic setting
+    } 
+    else if (aSeekWindowMS == 0) 
+    {
+        // if zero, use automatic setting
         bAutoSeekSetting = TRUE;
     }
 
@@ -201,11 +209,7 @@ void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
 
 void TDStretch::clearMidBuffer()
 {
-    if (bMidBufferDirty) 
-    {
-        memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
-        bMidBufferDirty = FALSE;
-    }
+    memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
 }
 
 
@@ -220,8 +224,7 @@ void TDStretch::clearInput()
 void TDStretch::clear()
 {
     outputBuffer.clear();
-    inputBuffer.clear();
-    clearMidBuffer();
+    clearInput();
 }
 
 
@@ -299,13 +302,13 @@ inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, ui
 int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos) 
 {
     int bestOffs;
-    LONG_SAMPLETYPE bestCorr, corr;
+    double bestCorr, corr;
     int i;
 
     // Slopes the amplitudes of the 'midBuffer' samples
     precalcCorrReferenceStereo();
 
-    bestCorr = INT_MIN;
+    bestCorr = FLT_MIN;
     bestOffs = 0;
 
     // Scans for the best correlation value by testing each possible position
@@ -314,7 +317,10 @@ int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
     {
         // Calculates correlation value for the mixing position corresponding
         // to 'i'
-        corr = calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
+        corr = (double)calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
+        // heuristic rule to slightly favour values close to mid of the range
+        double tmp = (double)(2 * i - seekLength) / (double)seekLength;
+        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
 
         // Checks for the highest correlation value
         if (corr > bestCorr) 
@@ -340,14 +346,14 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
 {
     int j;
     int bestOffs;
-    LONG_SAMPLETYPE bestCorr, corr;
+    double bestCorr, corr;
     int scanCount, corrOffset, tempOffset;
 
     // Slopes the amplitude of the 'midBuffer' samples
     precalcCorrReferenceStereo();
 
-    bestCorr = INT_MIN;
-    bestOffs = 0;
+    bestCorr = FLT_MIN;
+    bestOffs = _scanOffsets[0][0];
     corrOffset = 0;
     tempOffset = 0;
 
@@ -367,7 +373,10 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
 
             // Calculates correlation value for the mixing position corresponding
             // to 'tempOffset'
-            corr = calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
+            corr = (double)calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
+            // heuristic rule to slightly favour values close to mid of the range
+            double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+            corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
 
             // Checks for the highest correlation value
             if (corr > bestCorr) 
@@ -396,14 +405,14 @@ int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
 int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos) 
 {
     int bestOffs;
-    LONG_SAMPLETYPE bestCorr, corr;
+    double bestCorr, corr;
     int tempOffset;
     const SAMPLETYPE *compare;
 
     // Slopes the amplitude of the 'midBuffer' samples
     precalcCorrReferenceMono();
 
-    bestCorr = INT_MIN;
+    bestCorr = FLT_MIN;
     bestOffs = 0;
 
     // Scans for the best correlation value by testing each possible position
@@ -414,7 +423,10 @@ int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos)
 
         // Calculates correlation value for the mixing position corresponding
         // to 'tempOffset'
-        corr = calcCrossCorrMono(pRefMidBuffer, compare);
+        corr = (double)calcCrossCorrMono(pRefMidBuffer, compare);
+        // heuristic rule to slightly favour values close to mid of the range
+        double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
 
         // Checks for the highest correlation value
         if (corr > bestCorr) 
@@ -440,14 +452,14 @@ int TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos)
 {
     int j;
     int bestOffs;
-    LONG_SAMPLETYPE bestCorr, corr;
+    double bestCorr, corr;
     int scanCount, corrOffset, tempOffset;
 
     // Slopes the amplitude of the 'midBuffer' samples
     precalcCorrReferenceMono();
 
-    bestCorr = INT_MIN;
-    bestOffs = 0;
+    bestCorr = FLT_MIN;
+    bestOffs = _scanOffsets[0][0];
     corrOffset = 0;
     tempOffset = 0;
 
@@ -467,7 +479,10 @@ int TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos)
 
             // Calculates correlation value for the mixing position corresponding
             // to 'tempOffset'
-            corr = calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
+            corr = (double)calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
+            // heuristic rule to slightly favour values close to mid of the range
+            double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+            corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
 
             // Checks for the highest correlation value
             if (corr > bestCorr) 
@@ -513,7 +528,7 @@ void TDStretch::calcSeqParameters()
     #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
     #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
 
-    #define CHECK_LIMITS(x, mi, ma) ((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))
+    #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
 
     double seq, seek;
     
@@ -533,6 +548,10 @@ void TDStretch::calcSeqParameters()
 
     // Update seek window lengths
     seekWindowLength = (sampleRate * sequenceMs) / 1000;
+    if (seekWindowLength < 2 * overlapLength) 
+    {
+        seekWindowLength = 2 * overlapLength;
+    }
     seekLength = (sampleRate * seekWindowMs) / 1000;
 }
 
@@ -551,11 +570,11 @@ void TDStretch::setTempo(float newTempo)
 
     // Calculate ideal skip length (according to tempo value) 
     nominalSkip = tempo * (seekWindowLength - overlapLength);
-    skipFract = 0;
     intskip = (int)(nominalSkip + 0.5f);
 
     // Calculate how many samples are needed in the 'inputBuffer' to 
     // process another batch of samples
+    //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
     sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
 }
 
@@ -606,6 +625,8 @@ void TDStretch::processNominalTempo()
 }
 */
 
+#include <stdio.h>
+
 // Processes as many processing frames of the samples 'inputBuffer', store
 // the result into 'outputBuffer'
 void TDStretch::processSamples()
@@ -623,22 +644,9 @@ void TDStretch::processSamples()
     }
     */
 
-    if (bMidBufferDirty == FALSE) 
-    {
-        // if midBuffer is empty, move the first samples of the input stream 
-        // into it
-        if ((int)inputBuffer.numSamples() < overlapLength) 
-        {
-            // wait until we've got overlapLength samples
-            return;
-        }
-        memcpy(pMidBuffer, inputBuffer.ptrBegin(), channels * overlapLength * sizeof(SAMPLETYPE));
-        inputBuffer.receiveSamples((uint)overlapLength);
-        bMidBufferDirty = TRUE;
-    }
-
     // Process samples as long as there are enough samples in 'inputBuffer'
     // to form a processing frame.
+//    while ((int)inputBuffer.numSamples() >= sampleReq - (outDebt / 4)) 
     while ((int)inputBuffer.numSamples() >= sampleReq) 
     {
         // If tempo differs from the normal ('SCALE'), scan for the best overlapping
@@ -652,20 +660,33 @@ void TDStretch::processSamples()
         overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
         outputBuffer.putSamples((uint)overlapLength);
 
-        // ... then copy sequence samples from 'inputBuffer' to output
-        temp = (seekWindowLength - 2 * overlapLength);// & 0xfffffffe;
-        if (temp > 0)
+        // ... then copy sequence samples from 'inputBuffer' to output:
+        temp = (seekLength / 2 - offset);
+
+        // compensate cumulated output length diff vs. ideal output
+//        temp -= outDebt / 4;
+
+        // update ideal vs. true output difference 
+//        outDebt += temp;
+
+        // length of sequence
+//        temp += (seekWindowLength - 2 * overlapLength);
+        temp = (seekWindowLength - 2 * overlapLength);
+
+        // crosscheck that we don't have buffer overflow...
+        if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
         {
-            outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
+            continue;    // just in case, shouldn't really happen
         }
 
+        outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
+
         // Copies the end of the current sequence from 'inputBuffer' to 
         // 'midBuffer' for being mixed with the beginning of the next 
         // processing sequence and so on
-        assert(offset + seekWindowLength <= (int)inputBuffer.numSamples());
-        memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + seekWindowLength - overlapLength), 
+        assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
+        memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 
             channels * sizeof(SAMPLETYPE) * overlapLength);
-        bMidBufferDirty = TRUE;
 
         // Remove the processed samples from the input buffer. Update
         // the difference between integer & nominal skip step to 'skipFract'
@@ -705,7 +726,6 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
         delete[] pRefMidBufferUnaligned;
 
         pMidBuffer = new SAMPLETYPE[overlapLength * 2];
-        bMidBufferDirty = TRUE;
         clearMidBuffer();
 
         pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];
@@ -727,7 +747,8 @@ void * TDStretch::operator new(size_t s)
 
 TDStretch * TDStretch::newInstance()
 {
-    uint uExtensions;
+#ifndef _WIN64
+	uint uExtensions;
 
     uExtensions = detectCPUextensions();
 
@@ -762,6 +783,8 @@ TDStretch * TDStretch::newInstance()
     else
 #endif // ALLOW_3DNOW
 
+#endif // _WIN64
+
     {
         // ISA optimizations not supported, use plain C version
         return ::new TDStretch;
@@ -816,18 +839,18 @@ void TDStretch::precalcCorrReferenceMono()
 
 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
 // version of the routine.
-void TDStretch::overlapStereo(short *output, const short *input) const
+void TDStretch::overlapStereo(short *poutput, const short *input) const
 {
     int i;
     short temp;
-    uint cnt2;
+    int cnt2;
 
-    for (i = 0; i < (int)overlapLength ; i ++) 
+    for (i = 0; i < overlapLength ; i ++) 
     {
         temp = (short)(overlapLength - i);
         cnt2 = 2 * i;
-        output[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
-        output[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
+        poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
+        poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
     }
 }
 
@@ -841,15 +864,19 @@ static int _getClosest2Power(double value)
 /// Calculates overlap period length in samples.
 /// Integer version rounds overlap length to closest power of 2
 /// for a divide scaling operation.
-void TDStretch::calculateOverlapLength(int overlapMs)
+void TDStretch::calculateOverlapLength(int aoverlapMs)
 {
     int newOvl;
 
-    assert(overlapMs >= 0);
-    overlapDividerBits = _getClosest2Power((sampleRate * overlapMs) / 1000.0);
+    assert(aoverlapMs >= 0);
+
+    // calculate overlap length so that it's power of 2 - thus it's easy to do
+    // integer division by right-shifting. Term "-1" at end is to account for 
+    // the extra most significatnt bit left unused in result by signed multiplication 
+    overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
     if (overlapDividerBits > 9) overlapDividerBits = 9;
-    if (overlapDividerBits < 4) overlapDividerBits = 4;
-    newOvl = (int)pow(2, (double) overlapDividerBits);
+    if (overlapDividerBits < 3) overlapDividerBits = 3;
+    newOvl = (int)pow(2.0, (int)overlapDividerBits + 1);    // +1 => account for -1 above
 
     acceptNewOverlapLength(newOvl);
 
@@ -863,31 +890,41 @@ void TDStretch::calculateOverlapLength(int overlapMs)
 long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const
 {
     long corr;
+    long norm;
     int i;
 
-    corr = 0;
+    corr = norm = 0;
     for (i = 1; i < overlapLength; i ++) 
     {
         corr += (mixingPos[i] * compare[i]) >> overlapDividerBits;
+        norm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
     }
 
-    return corr;
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
 }
 
 
 long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const
 {
     long corr;
+    long norm;
     int i;
 
-    corr = 0;
+    corr = norm = 0;
     for (i = 2; i < 2 * overlapLength; i += 2) 
     {
         corr += (mixingPos[i] * compare[i] +
                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;
+        norm += (mixingPos[i] * mixingPos[i] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits;
     }
 
-    return corr;
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
 }
 
 #endif // INTEGER_SAMPLES
@@ -974,31 +1011,38 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
 double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const
 {
     double corr;
+    double norm;
     int i;
 
-    corr = 0;
+    corr = norm = 0;
     for (i = 1; i < overlapLength; i ++) 
     {
         corr += mixingPos[i] * compare[i];
+        norm += mixingPos[i] * mixingPos[i];
     }
 
-    return corr;
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+    return corr / sqrt(norm);
 }
 
 
 double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const
 {
     double corr;
+    double norm;
     int i;
 
-    corr = 0;
+    corr = norm = 0;
     for (i = 2; i < 2 * overlapLength; i += 2) 
     {
         corr += mixingPos[i] * compare[i] +
                 mixingPos[i + 1] * compare[i + 1];
+        norm += mixingPos[i] * mixingPos[i] + 
+                mixingPos[i + 1] * mixingPos[i + 1];
     }
 
-    return corr;
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+    return corr / sqrt(norm);
 }
 
 #endif // FLOAT_SAMPLES
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.h b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.h
index 48ea28833..00d1f3e31 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.h
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/TDStretch.h
@@ -4,8 +4,8 @@
 /// while maintaining the original pitch by using a time domain WSOLA-like method 
 /// with several performance-increasing tweaks.
 ///
-/// Note : MMX optimized functions reside in a separate, platform-specific file, 
-/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
+/// Note : MMX/SSE optimized functions reside in separate, platform-specific files 
+/// 'mmx_optimized.cpp' and 'sse_optimized.cpp'
 ///
 /// Author        : Copyright (c) Olli Parviainen
 /// Author e-mail : oparviai 'at' iki.fi
@@ -13,10 +13,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-25 15:43:54 +0200 (Sun, 25 Jan 2009) $
+// Last changed  : $Date: 2009-05-17 14:35:13 +0300 (Sun, 17 May 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: TDStretch.h 49 2009-01-25 13:43:54Z oparviai $
+// $Id: TDStretch.h 71 2009-05-17 11:35:13Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -44,6 +44,7 @@
 #ifndef TDStretch_H
 #define TDStretch_H
 
+#include <stddef.h>
 #include "STTypes.h"
 #include "RateTransposer.h"
 #include "FIFOSamplePipe.h"
@@ -51,7 +52,13 @@
 namespace soundtouch
 {
 
-// Default values for sound processing parameters:
+/// Default values for sound processing parameters:
+/// Notice that the default parameters are tuned for contemporary popular music 
+/// processing. For speech processing applications these parameters suit better:
+///     #define DEFAULT_SEQUENCE_MS     40
+///     #define DEFAULT_SEEKWINDOW_MS   15
+///     #define DEFAULT_OVERLAP_MS      8
+///
 
 /// Default length of a single processing sequence, in milliseconds. This determines to how 
 /// long sequences the original sound is chopped in the time-stretch algorithm.
@@ -61,7 +68,7 @@ namespace soundtouch
 /// and vice versa.
 ///
 /// Increasing this value reduces computational burden & vice versa.
-//#define DEFAULT_SEQUENCE_MS         130
+//#define DEFAULT_SEQUENCE_MS         40
 #define DEFAULT_SEQUENCE_MS         USE_AUTO_SEQUENCE_LEN
 
 /// Giving this value for the sequence length sets automatic parameter value
@@ -80,7 +87,7 @@ namespace soundtouch
 /// around, try reducing this setting.
 ///
 /// Increasing this value increases computational burden & vice versa.
-//#define DEFAULT_SEEKWINDOW_MS       25
+//#define DEFAULT_SEEKWINDOW_MS       15
 #define DEFAULT_SEEKWINDOW_MS       USE_AUTO_SEEKWINDOW_LEN
 
 /// Giving this value for the seek window length sets automatic parameter value
@@ -113,7 +120,6 @@ protected:
     int overlapLength;
     int seekLength;
     int seekWindowLength;
-    int maxOffset;
     int overlapDividerBits;
     int slopingDivider;
     float nominalSkip;
@@ -121,7 +127,8 @@ protected:
     FIFOSampleBuffer outputBuffer;
     FIFOSampleBuffer inputBuffer;
     BOOL bQuickSeek;
-    BOOL bMidBufferDirty;
+//    int outDebt;
+//    BOOL bMidBufferDirty;
 
     int sampleRate;
     int sequenceMs;
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/cpu_detect_x86_win.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/cpu_detect_x86_win.cpp
index d9287b613..c6c542467 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/cpu_detect_x86_win.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/cpu_detect_x86_win.cpp
@@ -12,10 +12,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// Last changed  : $Date: 2009-02-13 18:22:48 +0200 (Fri, 13 Feb 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: cpu_detect_x86_win.cpp 11 2008-02-10 16:26:55Z oparviai $
+// $Id: cpu_detect_x86_win.cpp 62 2009-02-13 16:22:48Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -41,15 +41,10 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "cpu_detect.h"
-#include "intrin.h"
-
-typedef enum PROCESSOR_TYPE
-{
-	PROCESSOR_AMD,
-	PROCESSOR_INTEL,
-	PROCESSOR_UNKNOWN
-};
 
+#ifndef WIN32
+#error wrong platform - this source code file is exclusively for Win32 platform
+#endif
 
 //////////////////////////////////////////////////////////////////////////////
 //
@@ -76,53 +71,59 @@ uint detectCPUextensions(void)
 
     if (_dwDisabledISA == 0xffffffff) return 0;
 
-unsigned		nHighestFeature;
-unsigned		nHighestFeatureEx;
-int				nBuff[4];
-char			szMan[13];
-char			szFeatures[256];
-PROCESSOR_TYPE	nType;
-
-	// Get CPU manufacturer and highest CPUID
-	__cpuid(nBuff, 0);
-	nHighestFeature = (unsigned)nBuff[0];
-	*(int*)&szMan[0] = nBuff[1];
-	*(int*)&szMan[4] = nBuff[3];
-	*(int*)&szMan[8] = nBuff[2];
-	szMan[12] = 0;
-	if(strcmp(szMan, "AuthenticAMD") == 0)
-		nType = PROCESSOR_AMD;
-	else if(strcmp(szMan, "GenuineIntel") == 0)
-		nType = PROCESSOR_INTEL;
-	else
-		nType = PROCESSOR_UNKNOWN;
-
-	// Get highest extended feature
-	__cpuid(nBuff, 0x80000000);
-	nHighestFeatureEx = (unsigned)nBuff[0];
-
-	// Get CPU features
-	szFeatures[0]	= 0;
-	if(nHighestFeature >= 1)
-	{
-		__cpuid(nBuff, 1);
-		if(nBuff[3] & 1<<23)	res|=SUPPORT_MMX;
-		if(nBuff[3] & 1<<25)	res|=SUPPORT_SSE;
-		if(nBuff[3] & 1<<26)	res|=SUPPORT_SSE2;
-	}
-
-	// AMD specific:
-	if(nType == PROCESSOR_AMD)
-	{
-		// Get extended features
-		__cpuid(nBuff, 0x80000000);
-		if(nHighestFeatureEx >= 0x80000001)
-		{
-			__cpuid(nBuff, 0x80000001);
-			if(nBuff[3] & 1<<31)	res|=SUPPORT_3DNOW;
-		}
-	}
-	
+    _asm 
+    {
+        ; check if 'cpuid' instructions is available by toggling eflags bit 21
+        ;
+        xor     esi, esi            ; clear esi = result register
+
+        pushfd                      ; save eflags to stack
+        mov     eax,dword ptr [esp] ; load eax from stack (with eflags)
+        mov     ecx, eax            ; save the original eflags values to ecx
+        xor     eax, 0x00200000     ; toggle bit 21
+        mov     dword ptr [esp],eax ; store toggled eflags to stack
+        popfd                       ; load eflags from stack
+
+        pushfd                      ; save updated eflags to stack
+        mov     eax,dword ptr [esp] ; load eax from stack
+        popfd                       ; pop stack to restore stack pointer
+
+        xor     edx, edx            ; clear edx for defaulting no mmx
+        cmp     eax, ecx            ; compare to original eflags values
+        jz      end                 ; jumps to 'end' if cpuid not present
+
+        ; cpuid instruction available, test for presence of mmx instructions 
+        mov     eax, 1
+        cpuid
+        test    edx, 0x00800000
+        jz      end                 ; branch if MMX not available
+
+        or      esi, SUPPORT_MMX    ; otherwise add MMX support bit
+
+        test    edx, 0x02000000
+        jz      test3DNow           ; branch if SSE not available
+
+        or      esi, SUPPORT_SSE    ; otherwise add SSE support bit
+
+    test3DNow:
+        ; test for precense of AMD extensions
+        mov     eax, 0x80000000
+        cpuid
+        cmp     eax, 0x80000000
+        jbe     end                ; branch if no AMD extensions detected
+
+        ; test for precense of 3DNow! extension
+        mov     eax, 0x80000001
+        cpuid
+        test    edx, 0x80000000
+        jz      end                 ; branch if 3DNow! not detected
+
+        or      esi, SUPPORT_3DNOW  ; otherwise add 3DNow support bit
+
+    end:
+
+        mov     res, esi
+    }
 
     return res & ~_dwDisabledISA;
 }
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/mmx_optimized.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/mmx_optimized.cpp
index 647e39c5a..539ee57c8 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/mmx_optimized.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/mmx_optimized.cpp
@@ -20,10 +20,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-25 16:13:39 +0200 (Sun, 25 Jan 2009) $
+// Last changed  : $Date: 2009-10-31 16:53:23 +0200 (Sat, 31 Oct 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: mmx_optimized.cpp 51 2009-01-25 14:13:39Z oparviai $
+// $Id: mmx_optimized.cpp 75 2009-10-31 14:53:23Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -68,6 +68,7 @@ using namespace soundtouch;
 #include "TDStretch.h"
 #include <mmintrin.h>
 #include <limits.h>
+#include <math.h>
 
 
 // Calculates cross correlation of two buffers
@@ -75,21 +76,21 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
 {
     const __m64 *pVec1, *pVec2;
     __m64 shifter;
-    __m64 accu;
-    long corr;
+    __m64 accu, normaccu;
+    long corr, norm;
     int i;
    
     pVec1 = (__m64*)pV1;
     pVec2 = (__m64*)pV2;
 
     shifter = _m_from_int(overlapDividerBits);
-    accu = _mm_setzero_si64();
+    normaccu = accu = _mm_setzero_si64();
 
     // Process 4 parallel sets of 2 * stereo samples each during each 
     // round to improve CPU-level parallellization.
     for (i = 0; i < overlapLength / 8; i ++)
     {
-        __m64 temp;
+        __m64 temp, temp2;
 
         // dictionary of instructions:
         // _m_pmaddwd   : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
@@ -98,11 +99,17 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
 
         temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]),
                             _mm_madd_pi16(pVec1[1], pVec2[1]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]),
+                             _mm_madd_pi16(pVec1[1], pVec1[1]));
         accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
 
         temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]),
                             _mm_madd_pi16(pVec1[3], pVec2[3]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]),
+                             _mm_madd_pi16(pVec1[3], pVec1[3]));
         accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
 
         pVec1 += 4;
         pVec2 += 4;
@@ -114,10 +121,16 @@ long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
     accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
     corr = _m_to_int(accu);
 
+    normaccu = _mm_add_pi32(normaccu, _mm_srli_si64(normaccu, 32));
+    norm = _m_to_int(normaccu);
+
     // Clear MMS state
     _m_empty();
 
-    return corr;
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * USHRT_MAX / sqrt((double)norm));
     // Note: Warning about the missing EMMS instruction is harmless
     // as it'll be called elsewhere.
 }
@@ -154,7 +167,9 @@ void TDStretchMMX::overlapStereo(short *output, const short *input) const
     mix2  = _mm_add_pi16(mix1, adder);
     adder = _mm_add_pi16(adder, adder);
 
-    shifter = _m_from_int(overlapDividerBits);
+    // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
+    // overlapDividerBits calculation earlier.
+    shifter = _m_from_int(overlapDividerBits + 1);
 
     for (i = 0; i < overlapLength / 4; i ++)
     {
@@ -255,7 +270,7 @@ uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numS
 
     if (length < 2) return 0;
 
-    for (i = 0; i < numSamples / 2; i ++)
+    for (i = 0; i < (numSamples - length) / 2; i ++)
     {
         __m64 accu1;
         __m64 accu2;
diff --git a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/sse_optimized.cpp b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/sse_optimized.cpp
index c2a869492..7659be682 100644
--- a/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/sse_optimized.cpp
+++ b/src/filters/renderer/MpcAudioRenderer/SoundTouch/source/sse_optimized.cpp
@@ -23,10 +23,10 @@
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2009-01-25 16:13:39 +0200 (Sun, 25 Jan 2009) $
+// Last changed  : $Date: 2009-12-28 22:32:57 +0200 (Mon, 28 Dec 2009) $
 // File revision : $Revision: 4 $
 //
-// $Id: sse_optimized.cpp 51 2009-01-25 14:13:39Z oparviai $
+// $Id: sse_optimized.cpp 80 2009-12-28 20:32:57Z oparviai $
 //
 ////////////////////////////////////////////////////////////////////////////////
 //
@@ -68,13 +68,15 @@ using namespace soundtouch;
 
 #include "TDStretch.h"
 #include <xmmintrin.h>
+#include <math.h>
 
 // Calculates cross correlation of two buffers
 double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const
 {
     int i;
-    float *pVec1;
-    __m128 vSum, *pVec2;
+    const float *pVec1;
+    const __m128 *pVec2;
+    __m128 vSum, vNorm;
 
     // Note. It means a major slow-down if the routine needs to tolerate 
     // unaligned __m128 memory accesses. It's way faster if we can skip 
@@ -104,39 +106,52 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
 
     // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
     // Note: pV2 _must_ be aligned to 16-bit boundary, pV1 need not.
-    pVec1 = (float*)pV1;
-    pVec2 = (__m128*)pV2;
-    vSum = _mm_setzero_ps();
+    pVec1 = (const float*)pV1;
+    pVec2 = (const __m128*)pV2;
+    vSum = vNorm = _mm_setzero_ps();
 
     // Unroll the loop by factor of 4 * 4 operations
     for (i = 0; i < overlapLength / 8; i ++) 
     {
+        __m128 vTemp;
         // vSum += pV1[0..3] * pV2[0..3]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pVec1),pVec2[0]));
+        vTemp = _MM_LOAD(pVec1);
+        vSum  = _mm_add_ps(vSum,  _mm_mul_ps(vTemp ,pVec2[0]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
 
         // vSum += pV1[4..7] * pV2[4..7]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pVec1 + 4), pVec2[1]));
+        vTemp = _MM_LOAD(pVec1 + 4);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[1]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
 
         // vSum += pV1[8..11] * pV2[8..11]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pVec1 + 8), pVec2[2]));
+        vTemp = _MM_LOAD(pVec1 + 8);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[2]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
 
         // vSum += pV1[12..15] * pV2[12..15]
-        vSum = _mm_add_ps(vSum, _mm_mul_ps(_MM_LOAD(pVec1 + 12), pVec2[3]));
+        vTemp = _MM_LOAD(pVec1 + 12);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[3]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
 
         pVec1 += 16;
         pVec2 += 4;
     }
 
     // return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
+    float *pvNorm = (float*)&vNorm;
+    double norm = sqrt(pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+
     float *pvSum = (float*)&vSum;
-    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]);
+    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / norm;
 
-    /* This is approximately corresponding routine in C-language:
-    double corr;
+    /* This is approximately corresponding routine in C-language yet without normalization:
+    double corr, norm;
     uint i;
 
     // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
-    corr = 0.0;
+    corr = norm = 0.0;
     for (i = 0; i < overlapLength / 8; i ++) 
     {
         corr += pV1[0] * pV2[0] +
@@ -156,13 +171,16 @@ double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) con
                 pV1[14] * pV2[14] +
                 pV1[15] * pV2[15];
 
+	for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
+
         pV1 += 16;
         pV2 += 16;
     }
+    return corr / sqrt(norm);
     */
 
-    /* This is corresponding routine in assembler. This may be teeny-weeny bit faster
-       than intrinsic version, but more difficult to maintain & get compiled on multiple
+    /* This is a bit outdated, corresponding routine in assembler. This may be teeny-weeny bit
+       faster than intrinsic version, but more difficult to maintain & get compiled on multiple
        platforms.
 
     uint overlapLengthLocal = overlapLength;
@@ -300,14 +318,14 @@ uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint n
     // filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
     for (j = 0; j < count; j += 2)
     {
-        float *pSrc;
+        const float *pSrc;
         const __m128 *pFil;
         __m128 sum1, sum2;
         uint i;
 
-        pSrc = (float*)source;              // source audio data
-        pFil = (__m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
-                                            // are aligned to 16-byte boundary
+        pSrc = (const float*)source;              // source audio data
+        pFil = (const __m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
+                                                  // are aligned to 16-byte boundary
         sum1 = sum2 = _mm_setzero_ps();
 
         for (i = 0; i < length / 8; i ++)
author	Spec-Chum <spec-chum@users.sourceforge.net>	2010-02-05 17:56:52 +0300
committer	Spec-Chum <spec-chum@users.sourceforge.net>	2010-02-05 17:56:52 +0300
commit	33bd1ff9d035530baf765b5e19424af707d9d781 (patch)
tree	3225aadad423fd78abf8bf760d0d063db027536f /src/filters/renderer/MpcAudioRenderer/SoundTouch
parent	0acc867ab62d247fd92de0cca343cd2ee73c50cb (diff)