mirror of
				https://github.com/RetroDECK/Duckstation.git
				synced 2025-04-10 19:15:14 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1107 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1107 lines
		
	
	
		
			34 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| ///////////////////////////////////////////////////////////////////////////////
 | |
| /// 
 | |
| /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
 | |
| /// while maintaining the original pitch by using a time domain WSOLA-like 
 | |
| /// method with several performance-increasing tweaks.
 | |
| ///
 | |
| /// Notes : MMX optimized functions reside in a separate, platform-specific 
 | |
| /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
 | |
| ///
 | |
| /// This source file contains OpenMP optimizations that allow speeding up the
 | |
| /// corss-correlation algorithm by executing it in several threads / CPU cores 
 | |
| /// in parallel. See the following article link for more detailed discussion 
 | |
| /// about SoundTouch OpenMP optimizations:
 | |
| /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
 | |
| ///
 | |
| /// Author        : Copyright (c) Olli Parviainen
 | |
| /// Author e-mail : oparviai 'at' iki.fi
 | |
| /// SoundTouch WWW: http://www.surina.net/soundtouch
 | |
| ///
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // License :
 | |
| //
 | |
| //  SoundTouch audio processing library
 | |
| //  Copyright (c) Olli Parviainen
 | |
| //
 | |
| //  This library is free software; you can redistribute it and/or
 | |
| //  modify it under the terms of the GNU Lesser General Public
 | |
| //  License as published by the Free Software Foundation; either
 | |
| //  version 2.1 of the License, or (at your option) any later version.
 | |
| //
 | |
| //  This library is distributed in the hope that it will be useful,
 | |
| //  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| //  Lesser General Public License for more details.
 | |
| //
 | |
| //  You should have received a copy of the GNU Lesser General Public
 | |
| //  License along with this library; if not, write to the Free Software
 | |
| //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| //
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #include <string.h>
 | |
| #include <limits.h>
 | |
| #include <assert.h>
 | |
| #include <math.h>
 | |
| #include <float.h>
 | |
| 
 | |
| #include "STTypes.h"
 | |
| #include "cpu_detect.h"
 | |
| #include "TDStretch.h"
 | |
| 
 | |
| using namespace soundtouch;
 | |
| 
 | |
| #define max(x, y) (((x) > (y)) ? (x) : (y))
 | |
| 
 | |
| /*****************************************************************************
 | |
|  *
 | |
|  * Constant definitions
 | |
|  *
 | |
|  *****************************************************************************/
 | |
| 
 | |
| // Table for the hierarchical mixing position seeking algorithm
 | |
| const short _scanOffsets[5][24]={
 | |
|     { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
 | |
|       868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
 | |
|     {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
 | |
|         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
 | |
|     { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
 | |
|         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
 | |
|     {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
 | |
|         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
 | |
|     { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
 | |
|       116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
 | |
| 
 | |
| /*****************************************************************************
 | |
|  *
 | |
|  * Implementation of the class 'TDStretch'
 | |
|  *
 | |
|  *****************************************************************************/
 | |
| 
 | |
| 
 | |
| TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
 | |
| {
 | |
|     bQuickSeek = false;
 | |
|     channels = 2;
 | |
| 
 | |
|     pMidBuffer = NULL;
 | |
|     pMidBufferUnaligned = NULL;
 | |
|     overlapLength = 0;
 | |
| 
 | |
|     bAutoSeqSetting = true;
 | |
|     bAutoSeekSetting = true;
 | |
| 
 | |
|     tempo = 1.0f;
 | |
|     setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
 | |
|     setTempo(1.0f);
 | |
| 
 | |
|     clear();
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| TDStretch::~TDStretch()
 | |
| {
 | |
|     delete[] pMidBufferUnaligned;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // Sets routine control parameters. These control are certain time constants
 | |
| // defining how the sound is stretched to the desired duration.
 | |
| //
 | |
| // 'sampleRate' = sample rate of the sound
 | |
| // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
 | |
| // 'seekwindowMS' = seeking window length for scanning the best overlapping 
 | |
| //      position (default = 28 ms)
 | |
| // 'overlapMS' = overlapping length (default = 12 ms)
 | |
| 
 | |
| void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 
 | |
|                               int aSeekWindowMS, int aOverlapMS)
 | |
| {
 | |
|     // accept only positive parameter values - if zero or negative, use old values instead
 | |
|     if (aSampleRate > 0)
 | |
|     {
 | |
|         if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
 | |
|         this->sampleRate = aSampleRate;
 | |
|     }
 | |
| 
 | |
|     if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
 | |
| 
 | |
|     if (aSequenceMS > 0)
 | |
|     {
 | |
|         this->sequenceMs = aSequenceMS;
 | |
|         bAutoSeqSetting = false;
 | |
|     } 
 | |
|     else if (aSequenceMS == 0)
 | |
|     {
 | |
|         // if zero, use automatic setting
 | |
|         bAutoSeqSetting = true;
 | |
|     }
 | |
| 
 | |
|     if (aSeekWindowMS > 0) 
 | |
|     {
 | |
|         this->seekWindowMs = aSeekWindowMS;
 | |
|         bAutoSeekSetting = false;
 | |
|     } 
 | |
|     else if (aSeekWindowMS == 0) 
 | |
|     {
 | |
|         // if zero, use automatic setting
 | |
|         bAutoSeekSetting = true;
 | |
|     }
 | |
| 
 | |
|     calcSeqParameters();
 | |
| 
 | |
|     calculateOverlapLength(overlapMs);
 | |
| 
 | |
|     // set tempo to recalculate 'sampleReq'
 | |
|     setTempo(tempo);
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| /// Get routine control parameters, see setParameters() function.
 | |
| /// Any of the parameters to this function can be NULL, in such case corresponding parameter
 | |
| /// value isn't returned.
 | |
| void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
 | |
| {
 | |
|     if (pSampleRate)
 | |
|     {
 | |
|         *pSampleRate = sampleRate;
 | |
|     }
 | |
| 
 | |
|     if (pSequenceMs)
 | |
|     {
 | |
|         *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
 | |
|     }
 | |
| 
 | |
|     if (pSeekWindowMs)
 | |
|     {
 | |
|         *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
 | |
|     }
 | |
| 
 | |
|     if (pOverlapMs)
 | |
|     {
 | |
|         *pOverlapMs = overlapMs;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'pInput'
 | |
| void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
 | |
| {
 | |
|     int i;
 | |
|     SAMPLETYPE m1, m2;
 | |
| 
 | |
|     m1 = (SAMPLETYPE)0;
 | |
|     m2 = (SAMPLETYPE)overlapLength;
 | |
| 
 | |
|     for (i = 0; i < overlapLength ; i ++)
 | |
|     {
 | |
|         pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
 | |
|         m1 += 1;
 | |
|         m2 -= 1;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| void TDStretch::clearMidBuffer()
 | |
| {
 | |
|     memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
 | |
| }
 | |
| 
 | |
| 
 | |
| void TDStretch::clearInput()
 | |
| {
 | |
|     inputBuffer.clear();
 | |
|     clearMidBuffer();
 | |
|     isBeginning = true;
 | |
|     maxnorm = 0;
 | |
|     maxnormf = 1e8;
 | |
|     skipFract = 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Clears the sample buffers
 | |
| void TDStretch::clear()
 | |
| {
 | |
|     outputBuffer.clear();
 | |
|     clearInput();
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
 | |
| // to enable
 | |
| void TDStretch::enableQuickSeek(bool enable)
 | |
| {
 | |
|     bQuickSeek = enable;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Returns nonzero if the quick seeking algorithm is enabled.
 | |
| bool TDStretch::isQuickSeekEnabled() const
 | |
| {
 | |
|     return bQuickSeek;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Seeks for the optimal overlap-mixing position.
 | |
| int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
 | |
| {
 | |
|     if (bQuickSeek) 
 | |
|     {
 | |
|         return seekBestOverlapPositionQuick(refPos);
 | |
|     }
 | |
|     else 
 | |
|     {
 | |
|         return seekBestOverlapPositionFull(refPos);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
 | |
| // of 'ovlPos'.
 | |
| inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
 | |
| {
 | |
| #ifndef USE_MULTICH_ALWAYS
 | |
|     if (channels == 1)
 | |
|     {
 | |
|         // mono sound.
 | |
|         overlapMono(pOutput, pInput + ovlPos);
 | |
|     }
 | |
|     else if (channels == 2)
 | |
|     {
 | |
|         // stereo sound
 | |
|         overlapStereo(pOutput, pInput + 2 * ovlPos);
 | |
|     } 
 | |
|     else 
 | |
| #endif // USE_MULTICH_ALWAYS
 | |
|     {
 | |
|         assert(channels > 0);
 | |
|         overlapMulti(pOutput, pInput + channels * ovlPos);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
 | |
| // routine
 | |
| //
 | |
| // The best position is determined as the position where the two overlapped
 | |
| // sample sequences are 'most alike', in terms of the highest cross-correlation
 | |
| // value over the overlapping period
 | |
| int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 
 | |
| {
 | |
|     int bestOffs;
 | |
|     double bestCorr;
 | |
|     int i;
 | |
|     double norm;
 | |
| 
 | |
|     bestCorr = -FLT_MAX;
 | |
|     bestOffs = 0;
 | |
| 
 | |
|     // Scans for the best correlation value by testing each possible position
 | |
|     // over the permitted range.
 | |
|     bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
 | |
|     bestCorr = (bestCorr + 0.1) * 0.75;
 | |
| 
 | |
|     #pragma omp parallel for
 | |
|     for (i = 1; i < seekLength; i ++)
 | |
|     {
 | |
|         double corr;
 | |
|         // Calculates correlation value for the mixing position corresponding to 'i'
 | |
| #if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
 | |
|         // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
 | |
|         // iterate the loop in sequential order
 | |
|         // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
 | |
|         corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
 | |
| #else
 | |
|         // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
 | |
|         // as "calcCrossCorr", but saves time by reusing & updating previously stored 
 | |
|         // "norm" value
 | |
|         corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
 | |
| #endif
 | |
|         // heuristic rule to slightly favour values close to mid of the range
 | |
|         double tmp = (double)(2 * i - seekLength) / (double)seekLength;
 | |
|         corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
 | |
| 
 | |
|         // Checks for the highest correlation value
 | |
|         if (corr > bestCorr) 
 | |
|         {
 | |
|             // For optimal performance, enter critical section only in case that best value found.
 | |
|             // in such case repeat 'if' condition as it's possible that parallel execution may have
 | |
|             // updated the bestCorr value in the mean time
 | |
|             #pragma omp critical
 | |
|             if (corr > bestCorr)
 | |
|             {
 | |
|                 bestCorr = corr;
 | |
|                 bestOffs = i;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
| #ifdef SOUNDTOUCH_INTEGER_SAMPLES
 | |
|     adaptNormalizer();
 | |
| #endif
 | |
| 
 | |
|     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
 | |
|     clearCrossCorrState();
 | |
| 
 | |
|     return bestOffs;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Quick seek algorithm for improved runtime-performance: First roughly scans through the 
 | |
| // correlation area, and then scan surroundings of two best preliminary correlation candidates
 | |
| // with improved precision
 | |
| //
 | |
| // Based on testing:
 | |
| // - This algorithm gives on average 99% as good match as the full algorithm
 | |
| // - this quick seek algorithm finds the best match on ~90% of cases
 | |
| // - on those 10% of cases when this algorithm doesn't find best match, 
 | |
| //   it still finds on average ~90% match vs. the best possible match
 | |
| int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
 | |
| {
 | |
| #define _MIN(a, b)   (((a) < (b)) ? (a) : (b))
 | |
| #define SCANSTEP    16
 | |
| #define SCANWIND    8
 | |
| 
 | |
|     int bestOffs;
 | |
|     int i;
 | |
|     int bestOffs2;
 | |
|     float bestCorr, corr;
 | |
|     float bestCorr2;
 | |
|     double norm;
 | |
| 
 | |
|     // note: 'float' types used in this function in case that the platform would need to use software-fp
 | |
| 
 | |
|     bestCorr =
 | |
|     bestCorr2 = -FLT_MAX;
 | |
|     bestOffs = 
 | |
|     bestOffs2 = SCANWIND;
 | |
| 
 | |
|     // Scans for the best correlation value by testing each possible position
 | |
|     // over the permitted range. Look for two best matches on the first pass to
 | |
|     // increase possibility of ideal match.
 | |
|     //
 | |
|     // Begin from "SCANSTEP" instead of SCANWIND to make the calculation
 | |
|     // catch the 'middlepoint' of seekLength vector as that's the a-priori 
 | |
|     // expected best match position
 | |
|     //
 | |
|     // Roughly:
 | |
|     // - 15% of cases find best result directly on the first round,
 | |
|     // - 75% cases find better match on 2nd round around the best match from 1st round
 | |
|     // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
 | |
|     for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
 | |
|     {
 | |
|         // Calculates correlation value for the mixing position corresponding
 | |
|         // to 'i'
 | |
|         corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
 | |
|         // heuristic rule to slightly favour values close to mid of the seek range
 | |
|         float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
 | |
|         corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
 | |
| 
 | |
|         // Checks for the highest correlation value
 | |
|         if (corr > bestCorr)
 | |
|         {
 | |
|             // found new best match. keep the previous best as 2nd best match
 | |
|             bestCorr2 = bestCorr;
 | |
|             bestOffs2 = bestOffs;
 | |
|             bestCorr = corr;
 | |
|             bestOffs = i;
 | |
|         }
 | |
|         else if (corr > bestCorr2)
 | |
|         {
 | |
|             // not new best, but still new 2nd best match
 | |
|             bestCorr2 = corr;
 | |
|             bestOffs2 = i;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Scans surroundings of the found best match with small stepping
 | |
|     int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
 | |
|     for (i = bestOffs - SCANWIND; i < end; i++)
 | |
|     {
 | |
|         if (i == bestOffs) continue;    // this offset already calculated, thus skip
 | |
| 
 | |
|         // Calculates correlation value for the mixing position corresponding
 | |
|         // to 'i'
 | |
|         corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
 | |
|         // heuristic rule to slightly favour values close to mid of the range
 | |
|         float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
 | |
|         corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
 | |
| 
 | |
|         // Checks for the highest correlation value
 | |
|         if (corr > bestCorr)
 | |
|         {
 | |
|             bestCorr = corr;
 | |
|             bestOffs = i;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Scans surroundings of the 2nd best match with small stepping
 | |
|     end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
 | |
|     for (i = bestOffs2 - SCANWIND; i < end; i++)
 | |
|     {
 | |
|         if (i == bestOffs2) continue;    // this offset already calculated, thus skip
 | |
| 
 | |
|         // Calculates correlation value for the mixing position corresponding
 | |
|         // to 'i'
 | |
|         corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
 | |
|         // heuristic rule to slightly favour values close to mid of the range
 | |
|         float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
 | |
|         corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
 | |
| 
 | |
|         // Checks for the highest correlation value
 | |
|         if (corr > bestCorr)
 | |
|         {
 | |
|             bestCorr = corr;
 | |
|             bestOffs = i;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
 | |
|     clearCrossCorrState();
 | |
| 
 | |
| #ifdef SOUNDTOUCH_INTEGER_SAMPLES
 | |
|     adaptNormalizer();
 | |
| #endif
 | |
| 
 | |
|     return bestOffs;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| /// For integer algorithm: adapt normalization factor divider with music so that 
 | |
| /// it'll not be pessimistically restrictive that can degrade quality on quieter sections
 | |
| /// yet won't cause integer overflows either
 | |
| void TDStretch::adaptNormalizer()
 | |
| {
 | |
|     // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
 | |
|     // too low values during pauses in music
 | |
|     if ((maxnorm > 1000) || (maxnormf > 40000000))
 | |
|     { 
 | |
|         //norm averaging filter
 | |
|         maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
 | |
| 
 | |
|         if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
 | |
|         {
 | |
|             // large values, so increase divider
 | |
|             overlapDividerBitsNorm++;
 | |
|             if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
 | |
|         }
 | |
|         else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
 | |
|         {
 | |
|             // extra small values, decrease divider
 | |
|             overlapDividerBitsNorm--;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     maxnorm = 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /// clear cross correlation routine state if necessary 
 | |
| void TDStretch::clearCrossCorrState()
 | |
| {
 | |
|     // default implementation is empty.
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Calculates processing sequence length according to tempo setting
 | |
| void TDStretch::calcSeqParameters()
 | |
| {
 | |
|     // Adjust tempo param according to tempo, so that variating processing sequence length is used
 | |
|     // at various tempo settings, between the given low...top limits
 | |
|     #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)
 | |
|     #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)
 | |
| 
 | |
|     // sequence-ms setting values at above low & top tempo
 | |
|     #define AUTOSEQ_AT_MIN      90.0
 | |
|     #define AUTOSEQ_AT_MAX      40.0
 | |
|     #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
 | |
|     #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
 | |
| 
 | |
|     // seek-window-ms setting values at above low & top tempoq
 | |
|     #define AUTOSEEK_AT_MIN     20.0
 | |
|     #define AUTOSEEK_AT_MAX     15.0
 | |
|     #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
 | |
|     #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
 | |
| 
 | |
|     #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
 | |
| 
 | |
|     double seq, seek;
 | |
|     
 | |
|     if (bAutoSeqSetting)
 | |
|     {
 | |
|         seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
 | |
|         seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
 | |
|         sequenceMs = (int)(seq + 0.5);
 | |
|     }
 | |
| 
 | |
|     if (bAutoSeekSetting)
 | |
|     {
 | |
|         seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
 | |
|         seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
 | |
|         seekWindowMs = (int)(seek + 0.5);
 | |
|     }
 | |
| 
 | |
|     // Update seek window lengths
 | |
|     seekWindowLength = (sampleRate * sequenceMs) / 1000;
 | |
|     if (seekWindowLength < 2 * overlapLength) 
 | |
|     {
 | |
|         seekWindowLength = 2 * overlapLength;
 | |
|     }
 | |
|     seekLength = (sampleRate * seekWindowMs) / 1000;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
 | |
| // tempo, larger faster tempo.
 | |
| void TDStretch::setTempo(double newTempo)
 | |
| {
 | |
|     int intskip;
 | |
| 
 | |
|     tempo = newTempo;
 | |
| 
 | |
|     // Calculate new sequence duration
 | |
|     calcSeqParameters();
 | |
| 
 | |
|     // Calculate ideal skip length (according to tempo value) 
 | |
|     nominalSkip = tempo * (seekWindowLength - overlapLength);
 | |
|     intskip = (int)(nominalSkip + 0.5);
 | |
| 
 | |
|     // Calculate how many samples are needed in the 'inputBuffer' to 
 | |
|     // process another batch of samples
 | |
|     //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
 | |
|     sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // Sets the number of channels, 1 = mono, 2 = stereo
 | |
| void TDStretch::setChannels(int numChannels)
 | |
| {
 | |
|     if (!verifyNumberOfChannels(numChannels) ||
 | |
|         (channels == numChannels)) return;
 | |
| 
 | |
|     channels = numChannels;
 | |
|     inputBuffer.setChannels(channels);
 | |
|     outputBuffer.setChannels(channels);
 | |
| 
 | |
|     // re-init overlap/buffer
 | |
|     overlapLength=0;
 | |
|     setParameters(sampleRate);
 | |
| }
 | |
| 
 | |
| 
 | |
| // nominal tempo, no need for processing, just pass the samples through
 | |
| // to outputBuffer
 | |
| /*
 | |
| void TDStretch::processNominalTempo()
 | |
| {
 | |
|     assert(tempo == 1.0f);
 | |
| 
 | |
|     if (bMidBufferDirty) 
 | |
|     {
 | |
|         // If there are samples in pMidBuffer waiting for overlapping,
 | |
|         // do a single sliding overlapping with them in order to prevent a 
 | |
|         // clicking distortion in the output sound
 | |
|         if (inputBuffer.numSamples() < overlapLength) 
 | |
|         {
 | |
|             // wait until we've got overlapLength input samples
 | |
|             return;
 | |
|         }
 | |
|         // Mix the samples in the beginning of 'inputBuffer' with the 
 | |
|         // samples in 'midBuffer' using sliding overlapping 
 | |
|         overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
 | |
|         outputBuffer.putSamples(overlapLength);
 | |
|         inputBuffer.receiveSamples(overlapLength);
 | |
|         clearMidBuffer();
 | |
|         // now we've caught the nominal sample flow and may switch to
 | |
|         // bypass mode
 | |
|     }
 | |
| 
 | |
|     // Simply bypass samples from input to output
 | |
|     outputBuffer.moveSamples(inputBuffer);
 | |
| }
 | |
| */
 | |
| 
 | |
| 
 | |
| // Processes as many processing frames of the samples 'inputBuffer', store
 | |
| // the result into 'outputBuffer'
 | |
| void TDStretch::processSamples()
 | |
| {
 | |
|     int ovlSkip;
 | |
|     int offset = 0;
 | |
|     int temp;
 | |
| 
 | |
|     /* Removed this small optimization - can introduce a click to sound when tempo setting
 | |
|        crosses the nominal value
 | |
|     if (tempo == 1.0f) 
 | |
|     {
 | |
|         // tempo not changed from the original, so bypass the processing
 | |
|         processNominalTempo();
 | |
|         return;
 | |
|     }
 | |
|     */
 | |
| 
 | |
|     // Process samples as long as there are enough samples in 'inputBuffer'
 | |
|     // to form a processing frame.
 | |
|     while ((int)inputBuffer.numSamples() >= sampleReq) 
 | |
|     {
 | |
|         if (isBeginning == false)
 | |
|         {
 | |
|             // apart from the very beginning of the track, 
 | |
|             // scan for the best overlapping position & do overlap-add
 | |
|             offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
 | |
| 
 | |
|             // Mix the samples in the 'inputBuffer' at position of 'offset' with the 
 | |
|             // samples in 'midBuffer' using sliding overlapping
 | |
|             // ... first partially overlap with the end of the previous sequence
 | |
|             // (that's in 'midBuffer')
 | |
|             overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
 | |
|             outputBuffer.putSamples((uint)overlapLength);
 | |
|             offset += overlapLength;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             // Adjust processing offset at beginning of track by not perform initial overlapping
 | |
|             // and compensating that in the 'input buffer skip' calculation
 | |
|             isBeginning = false;
 | |
|             int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
 | |
| 
 | |
|             #ifdef ST_SIMD_AVOID_UNALIGNED
 | |
|             // in SIMD mode, round the skip amount to value corresponding to aligned memory address
 | |
|             if (channels == 1)
 | |
|             {
 | |
|                 skip &= -4;
 | |
|             }
 | |
|             else if (channels == 2)
 | |
|             {
 | |
|                 skip &= -2;
 | |
|             }
 | |
|             #endif
 | |
|             skipFract -= skip;
 | |
|             if (skipFract <= -nominalSkip)
 | |
|             {
 | |
|                 skipFract = -nominalSkip;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // ... then copy sequence samples from 'inputBuffer' to output:
 | |
| 
 | |
|         // crosscheck that we don't have buffer overflow...
 | |
|         if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
 | |
|         {
 | |
|             continue;    // just in case, shouldn't really happen
 | |
|         }
 | |
| 
 | |
|         // length of sequence
 | |
|         temp = (seekWindowLength - 2 * overlapLength);
 | |
|         outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
 | |
| 
 | |
|         // Copies the end of the current sequence from 'inputBuffer' to 
 | |
|         // 'midBuffer' for being mixed with the beginning of the next 
 | |
|         // processing sequence and so on
 | |
|         assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
 | |
|         memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp), 
 | |
|             channels * sizeof(SAMPLETYPE) * overlapLength);
 | |
| 
 | |
|         // Remove the processed samples from the input buffer. Update
 | |
|         // the difference between integer & nominal skip step to 'skipFract'
 | |
|         // in order to prevent the error from accumulating over time.
 | |
|         skipFract += nominalSkip;   // real skip size
 | |
|         ovlSkip = (int)skipFract;   // rounded to integer skip
 | |
|         skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip
 | |
|         inputBuffer.receiveSamples((uint)ovlSkip);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Adds 'numsamples' pcs of samples from the 'samples' memory position into
 | |
| // the input of the object.
 | |
| void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
 | |
| {
 | |
|     // Add the samples into the input buffer
 | |
|     inputBuffer.putSamples(samples, nSamples);
 | |
|     // Process the samples in input buffer
 | |
|     processSamples();
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
 | |
| void TDStretch::acceptNewOverlapLength(int newOverlapLength)
 | |
| {
 | |
|     int prevOvl;
 | |
| 
 | |
|     assert(newOverlapLength >= 0);
 | |
|     prevOvl = overlapLength;
 | |
|     overlapLength = newOverlapLength;
 | |
| 
 | |
|     if (overlapLength > prevOvl)
 | |
|     {
 | |
|         delete[] pMidBufferUnaligned;
 | |
| 
 | |
|         pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
 | |
|         // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
 | |
|         pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
 | |
| 
 | |
|         clearMidBuffer();
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Operator 'new' is overloaded so that it automatically creates a suitable instance 
 | |
| // depending on if we've a MMX/SSE/etc-capable CPU available or not.
 | |
| void * TDStretch::operator new(size_t s)
 | |
| {
 | |
|     // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
 | |
|     ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
 | |
|     return newInstance();
 | |
| }
 | |
| 
 | |
| 
 | |
| TDStretch * TDStretch::newInstance()
 | |
| {
 | |
|     uint uExtensions;
 | |
| 
 | |
|     uExtensions = detectCPUextensions();
 | |
| 
 | |
|     // Check if MMX/SSE instruction set extensions supported by CPU
 | |
| 
 | |
| #ifdef SOUNDTOUCH_ALLOW_MMX
 | |
|     // MMX routines available only with integer sample types
 | |
|     if (uExtensions & SUPPORT_MMX)
 | |
|     {
 | |
|         return ::new TDStretchMMX;
 | |
|     }
 | |
|     else
 | |
| #endif // SOUNDTOUCH_ALLOW_MMX
 | |
| 
 | |
| 
 | |
| #ifdef SOUNDTOUCH_ALLOW_SSE
 | |
|     if (uExtensions & SUPPORT_SSE)
 | |
|     {
 | |
|         // SSE support
 | |
|         return ::new TDStretchSSE;
 | |
|     }
 | |
|     else
 | |
| #endif // SOUNDTOUCH_ALLOW_SSE
 | |
| 
 | |
|     {
 | |
|         // ISA optimizations not supported, use plain C version
 | |
|         return ::new TDStretch;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // Integer arithmetic specific algorithm implementations.
 | |
| //
 | |
| //////////////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| #ifdef SOUNDTOUCH_INTEGER_SAMPLES
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
 | |
| // version of the routine.
 | |
| void TDStretch::overlapStereo(short *poutput, const short *input) const
 | |
| {
 | |
|     int i;
 | |
|     short temp;
 | |
|     int cnt2;
 | |
| 
 | |
|     for (i = 0; i < overlapLength ; i ++)
 | |
|     {
 | |
|         temp = (short)(overlapLength - i);
 | |
|         cnt2 = 2 * i;
 | |
|         poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
 | |
|         poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
 | |
| // version of the routine.
 | |
| void TDStretch::overlapMulti(short *poutput, const short *input) const
 | |
| {
 | |
|     short m1;
 | |
|     int i = 0;
 | |
| 
 | |
|     for (m1 = 0; m1 < overlapLength; m1 ++)
 | |
|     {
 | |
|         short m2 = (short)(overlapLength - m1);
 | |
|         for (int c = 0; c < channels; c ++)
 | |
|         {
 | |
|             poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;
 | |
|             i++;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| // Calculates the x having the closest 2^x value for the given value
 | |
| static int _getClosest2Power(double value)
 | |
| {
 | |
|     return (int)(log(value) / log(2.0) + 0.5);
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Calculates overlap period length in samples.
 | |
| /// Integer version rounds overlap length to closest power of 2
 | |
| /// for a divide scaling operation.
 | |
| void TDStretch::calculateOverlapLength(int aoverlapMs)
 | |
| {
 | |
|     int newOvl;
 | |
| 
 | |
|     assert(aoverlapMs >= 0);
 | |
| 
 | |
|     // calculate overlap length so that it's power of 2 - thus it's easy to do
 | |
|     // integer division by right-shifting. Term "-1" at end is to account for 
 | |
|     // the extra most significatnt bit left unused in result by signed multiplication 
 | |
|     overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
 | |
|     if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
 | |
|     if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
 | |
|     newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1);    // +1 => account for -1 above
 | |
| 
 | |
|     acceptNewOverlapLength(newOvl);
 | |
| 
 | |
|     overlapDividerBitsNorm = overlapDividerBitsPure;
 | |
| 
 | |
|     // calculate sloping divider so that crosscorrelation operation won't 
 | |
|     // overflow 32-bit register. Max. sum of the crosscorrelation sum without 
 | |
|     // divider would be 2^30*(N^3-N)/3, where N = overlap length
 | |
|     slopingDivider = (newOvl * newOvl - 1) / 3;
 | |
| }
 | |
| 
 | |
| 
 | |
| double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
 | |
| {
 | |
|     long corr;
 | |
|     unsigned long lnorm;
 | |
|     int i;
 | |
| 
 | |
|     #ifdef ST_SIMD_AVOID_UNALIGNED
 | |
|         // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
 | |
|         if (((ulongptr)mixingPos) & 15) return -1e50;
 | |
|     #endif
 | |
| 
 | |
|     // hint compiler autovectorization that loop length is divisible by 8
 | |
|     int ilength = (channels * overlapLength) & -8;
 | |
| 
 | |
|     corr = lnorm = 0;
 | |
|     // Same routine for stereo and mono
 | |
|     for (i = 0; i < ilength; i += 2)
 | |
|     {
 | |
|         corr += (mixingPos[i] * compare[i] + 
 | |
|                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
 | |
|         lnorm += (mixingPos[i] * mixingPos[i] + 
 | |
|                   mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
 | |
|         // do intermediate scalings to avoid integer overflow
 | |
|     }
 | |
| 
 | |
|     if (lnorm > maxnorm)
 | |
|     {
 | |
|         // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
 | |
|         #pragma omp critical
 | |
|         if (lnorm > maxnorm)
 | |
|         {
 | |
|             maxnorm = lnorm;
 | |
|         }
 | |
|     }
 | |
|     // Normalize result by dividing by sqrt(norm) - this step is easiest 
 | |
|     // done using floating point operation
 | |
|     norm = (double)lnorm;
 | |
|     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
 | |
| double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
 | |
| {
 | |
|     long corr;
 | |
|     long lnorm;
 | |
|     int i;
 | |
| 
 | |
|     // hint compiler autovectorization that loop length is divisible by 8
 | |
|     int ilength = (channels * overlapLength) & -8;
 | |
| 
 | |
|     // cancel first normalizer tap from previous round
 | |
|     lnorm = 0;
 | |
|     for (i = 1; i <= channels; i ++)
 | |
|     {
 | |
|         lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
 | |
|     }
 | |
| 
 | |
|     corr = 0;
 | |
|     // Same routine for stereo and mono.
 | |
|     for (i = 0; i < ilength; i += 2) 
 | |
|     {
 | |
|         corr += (mixingPos[i] * compare[i] + 
 | |
|                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
 | |
|     }
 | |
| 
 | |
|     // update normalizer with last samples of this round
 | |
|     for (int j = 0; j < channels; j ++)
 | |
|     {
 | |
|         i --;
 | |
|         lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
 | |
|     }
 | |
| 
 | |
|     norm += (double)lnorm;
 | |
|     if (norm > maxnorm)
 | |
|     {
 | |
|         maxnorm = (unsigned long)norm;
 | |
|     }
 | |
| 
 | |
|     // Normalize result by dividing by sqrt(norm) - this step is easiest 
 | |
|     // done using floating point operation
 | |
|     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
 | |
| }
 | |
| 
 | |
| #endif // SOUNDTOUCH_INTEGER_SAMPLES
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // Floating point arithmetic specific algorithm implementations.
 | |
| //
 | |
| 
 | |
| #ifdef SOUNDTOUCH_FLOAT_SAMPLES
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'pInput'
 | |
| void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
 | |
| {
 | |
|     int i;
 | |
|     float fScale;
 | |
|     float f1;
 | |
|     float f2;
 | |
| 
 | |
|     fScale = 1.0f / (float)overlapLength;
 | |
| 
 | |
|     f1 = 0;
 | |
|     f2 = 1.0f;
 | |
| 
 | |
|     for (i = 0; i < 2 * (int)overlapLength ; i += 2) 
 | |
|     {
 | |
|         pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
 | |
|         pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
 | |
| 
 | |
|         f1 += fScale;
 | |
|         f2 -= fScale;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| // Overlaps samples in 'midBuffer' with the samples in 'input'. 
 | |
| void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
 | |
| {
 | |
|     int i;
 | |
|     float fScale;
 | |
|     float f1;
 | |
|     float f2;
 | |
| 
 | |
|     fScale = 1.0f / (float)overlapLength;
 | |
| 
 | |
|     f1 = 0;
 | |
|     f2 = 1.0f;
 | |
| 
 | |
|     i=0;
 | |
|     for (int i2 = 0; i2 < overlapLength; i2 ++)
 | |
|     {
 | |
|         // note: Could optimize this slightly by taking into account that always channels > 2
 | |
|         for (int c = 0; c < channels; c ++)
 | |
|         {
 | |
|             pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
 | |
|             i++;
 | |
|         }
 | |
|         f1 += fScale;
 | |
|         f2 -= fScale;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Calculates overlapInMsec period length in samples.
 | |
| void TDStretch::calculateOverlapLength(int overlapInMsec)
 | |
| {
 | |
|     int newOvl;
 | |
| 
 | |
|     assert(overlapInMsec >= 0);
 | |
|     newOvl = (sampleRate * overlapInMsec) / 1000;
 | |
|     if (newOvl < 16) newOvl = 16;
 | |
| 
 | |
|     // must be divisible by 8
 | |
|     newOvl -= newOvl % 8;
 | |
| 
 | |
|     acceptNewOverlapLength(newOvl);
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Calculate cross-correlation
 | |
| double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
 | |
| {
 | |
|     float corr;
 | |
|     float norm;
 | |
|     int i;
 | |
| 
 | |
|     #ifdef ST_SIMD_AVOID_UNALIGNED
 | |
|         // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
 | |
|         if (((ulongptr)mixingPos) & 15) return -1e50;
 | |
|     #endif
 | |
| 
 | |
|     // hint compiler autovectorization that loop length is divisible by 8
 | |
|     int ilength = (channels * overlapLength) & -8;
 | |
| 
 | |
|     corr = norm = 0;
 | |
|     // Same routine for stereo and mono
 | |
|     for (i = 0; i < ilength; i ++)
 | |
|     {
 | |
|         corr += mixingPos[i] * compare[i];
 | |
|         norm += mixingPos[i] * mixingPos[i];
 | |
|     }
 | |
| 
 | |
|     anorm = norm;
 | |
|     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
 | |
| }
 | |
| 
 | |
| 
 | |
| /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
 | |
| double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
 | |
| {
 | |
|     float corr;
 | |
|     int i;
 | |
| 
 | |
|     corr = 0;
 | |
| 
 | |
|     // cancel first normalizer tap from previous round
 | |
|     for (i = 1; i <= channels; i ++)
 | |
|     {
 | |
|         norm -= mixingPos[-i] * mixingPos[-i];
 | |
|     }
 | |
| 
 | |
|     // hint compiler autovectorization that loop length is divisible by 8
 | |
|     int ilength = (channels * overlapLength) & -8;
 | |
| 
 | |
|     // Same routine for stereo and mono
 | |
|     for (i = 0; i < ilength; i ++)
 | |
|     {
 | |
|         corr += mixingPos[i] * compare[i];
 | |
|     }
 | |
| 
 | |
|     // update normalizer with last samples of this round
 | |
|     for (int j = 0; j < channels; j ++)
 | |
|     {
 | |
|         i --;
 | |
|         norm += mixingPos[i] * mixingPos[i];
 | |
|     }
 | |
| 
 | |
|     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
 | |
| }
 | |
| 
 | |
| 
 | |
| #endif // SOUNDTOUCH_FLOAT_SAMPLES
 | 
