doc/1.10/sampleutil_8cpp_source.html

00001 // sampleutil.cpp
00002 // Created 10/5/2009 by RJ Ryan (rryan@mit.edu)
00003
00004 #ifdef __SSE__
00005 #include <xmmintrin.h>
00006 #endif
00007
00008 #ifdef __WINDOWS__
00009 #pragma intrinsic(fabs)sc
00010 #endif
00011
00012 #include <QtDebug>
00013
00014 #include "sampleutil.h"
00015
00016 #ifdef __SSE__
00017 // Disable optimizations by default. They aren't ready for general use yet. Turn
00018 // this flag on if you feel brave.
00019 bool SampleUtil::m_sOptimizationsOn = false; // true;
00020 #else
00021 bool SampleUtil::m_sOptimizationsOn = false;
00022 #endif
00023
00024 // static
00025 CSAMPLE* SampleUtil::alloc(int size) {
00026     // TODO(XXX) align the array
00027     return new CSAMPLE[size];
00028 }
00029
00030 void SampleUtil::free(CSAMPLE* pBuffer) {
00031     delete [] pBuffer;
00032 }
00033
00034 // static
00035 void SampleUtil::applyGain(CSAMPLE* pBuffer,
00036                            CSAMPLE gain, int iNumSamples) {
00037     if (gain == 1.0f)
00038         return;
00039     if (gain == 0.0f) {
00040         memset(pBuffer, 0, sizeof(pBuffer[0]) * iNumSamples);
00041         return;
00042     }
00043
00044     if (m_sOptimizationsOn) {
00045         return sseApplyGain(pBuffer, gain, iNumSamples);
00046     }
00047
00048     for (int i = 0; i < iNumSamples; ++i) {
00049         pBuffer[i] *= gain;
00050     }
00051 }
00052
00053 // static
00054 void SampleUtil::sseApplyGain(_ALIGN_16 CSAMPLE* pBuffer,
00055                               _ALIGN_16 CSAMPLE gain, int iNumSamples) {
00056 #ifdef __SSE__
00057     assert_aligned(pBuffer);
00058     __m128 vSamples;
00059     __m128 vGain = _mm_set1_ps(gain);
00060     while (iNumSamples >= 4) {
00061         vSamples = _mm_loadu_ps(pBuffer);
00062         vSamples = _mm_mul_ps(vSamples, vGain);
00063         _mm_store_ps(pBuffer, vSamples);
00064
00065         iNumSamples -= 4;
00066         pBuffer += 4;
00067     }
00068     if (iNumSamples > 0) {
00069         qDebug() << "Not div by 4";
00070     }
00071     while (iNumSamples > 0) {
00072         *pBuffer = *pBuffer * gain;
00073         pBuffer++;
00074         iNumSamples--;
00075     }
00076 #endif
00077 }
00078
00079 // static
00080 void SampleUtil::applyAlternatingGain(CSAMPLE* pBuffer,
00081                                       CSAMPLE gain1, CSAMPLE gain2,
00082                                       int iNumSamples) {
00083     Q_ASSERT(iNumSamples % 2 == 0);
00084     // This handles gain1 == 1.0 && gain2 == 1.0f as well.
00085     if (gain1 == gain2) {
00086         return applyGain(pBuffer, gain1, iNumSamples);
00087     }
00088     if (m_sOptimizationsOn)
00089         return sseApplyAlternatingGain(pBuffer, gain1, gain2, iNumSamples);
00090
00091     for (int i = 0; i < iNumSamples; i += 2) {
00092         pBuffer[i] *= gain1;
00093         pBuffer[i+1] *= gain2;
00094     }
00095 }
00096
00097 // static
00098 void SampleUtil::sseApplyAlternatingGain(CSAMPLE* pBuffer,
00099                                          CSAMPLE gain1, CSAMPLE gain2,
00100                                          int iNumSamples) {
00101 #ifdef __SSE__
00102     Q_ASSERT(iNumSamples % 2 == 0);
00103     assert_aligned(pBuffer);
00104     __m128 vSamples;
00105     __m128 vGain = _mm_set_ps(gain2, gain1, gain2, gain1);
00106     while (iNumSamples >= 4) {
00107         vSamples = _mm_loadu_ps(pBuffer);
00108         vSamples = _mm_mul_ps(vSamples, vGain);
00109         _mm_store_ps(pBuffer, vSamples);
00110
00111         iNumSamples -= 4;
00112         pBuffer += 4;
00113     }
00114     if (iNumSamples > 0) {
00115         qDebug() << "Not div by 4";
00116     }
00117     while (iNumSamples > 0) {
00118         *pBuffer = *pBuffer * gain1;
00119         pBuffer++;
00120         *pBuffer = *pBuffer * gain2;
00121         pBuffer++;
00122         iNumSamples -= 2;
00123     }
00124 #endif
00125 }
00126
00127 // static
00128 void SampleUtil::addWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc,
00129                              CSAMPLE gain, int iNumSamples) {
00130     if (gain == 0.0f)
00131         return;
00132     if (m_sOptimizationsOn)
00133         return sseAddWithGain(pDest, pSrc, gain, iNumSamples);
00134
00135     for (int i = 0; i < iNumSamples; ++i) {
00136         pDest[i] += pSrc[i] * gain;
00137     }
00138 }
00139
00140 // static
00141 void SampleUtil::sseAddWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc,
00142                                 CSAMPLE gain, int iNumSamples) {
00143 #ifdef __SSE__
00144     assert_aligned(pDest);
00145     assert_aligned(pSrc);
00146     __m128 vSrcSamples;
00147     __m128 vDestSamples;
00148     __m128 vGain = _mm_set1_ps(gain);
00149     while (iNumSamples >= 4) {
00150         vSrcSamples = _mm_loadu_ps(pSrc);
00151         vSrcSamples = _mm_mul_ps(vSrcSamples, vGain);
00152         vDestSamples = _mm_loadu_ps(pDest);
00153         _mm_store_ps(pDest, _mm_add_ps(vDestSamples, vSrcSamples));
00154         iNumSamples -= 4;
00155         pDest += 4;
00156         pSrc += 4;
00157     }
00158     if (iNumSamples > 0) {
00159         qDebug() << "Not div by 4";
00160     }
00161     while (iNumSamples > 0) {
00162         *pDest = *pDest + *pSrc * gain;
00163         pDest++;
00164         pSrc++;
00165         iNumSamples--;
00166     }
00167 #endif
00168 }
00169
00170 // static
00171 void SampleUtil::add2WithGain(CSAMPLE* pDest,
00172                               const CSAMPLE* pSrc1, CSAMPLE gain1,
00173                               const CSAMPLE* pSrc2, CSAMPLE gain2,
00174                               int iNumSamples) {
00175     if (gain1 == 0.0f) {
00176         return addWithGain(pDest, pSrc2, gain2, iNumSamples);
00177     } else if (gain2 == 0.0f) {
00178         return addWithGain(pDest, pSrc1, gain1, iNumSamples);
00179     }
00180
00181     if (m_sOptimizationsOn)
00182         return sseAdd2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples);
00183
00184     for (int i = 0; i < iNumSamples; ++i) {
00185         pDest[i] += pSrc1[i] * gain1 + pSrc2[i] * gain2;
00186     }
00187 }
00188
00189 // static
00190 void SampleUtil::sseAdd2WithGain(CSAMPLE* pDest,
00191                                  const CSAMPLE* pSrc1, CSAMPLE gain1,
00192                                  const CSAMPLE* pSrc2, CSAMPLE gain2,
00193                                  int iNumSamples) {
00194 #ifdef __SSE__
00195     assert_aligned(pSrc1);
00196     assert_aligned(pSrc2);
00197     assert_aligned(pDest);
00198     __m128 vSrc1Samples;
00199     __m128 vSrc2Samples;
00200     __m128 vDestSamples;
00201     __m128 vGain1 = _mm_set1_ps(gain1);
00202     __m128 vGain2 = _mm_set1_ps(gain2);
00203     while (iNumSamples >= 4) {
00204         vSrc1Samples = _mm_loadu_ps(pSrc1);
00205         vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1);
00206         vSrc2Samples = _mm_loadu_ps(pSrc2);
00207         vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2);
00208         vDestSamples = _mm_loadu_ps(pDest);
00209         vDestSamples = _mm_add_ps(vDestSamples, vSrc1Samples);
00210         vDestSamples = _mm_add_ps(vDestSamples, vSrc2Samples);
00211         _mm_store_ps(pDest, vDestSamples);
00212         iNumSamples -= 4;
00213         pDest += 4;
00214         pSrc1 += 4;
00215         pSrc2 += 4;
00216     }
00217     if (iNumSamples > 0) {
00218         qDebug() << "Not div by 4";
00219     }
00220     while (iNumSamples > 0) {
00221         *pDest = *pDest + *pSrc1 * gain1 + *pSrc2 * gain2;
00222         pDest++;
00223         pSrc1++;
00224         pSrc2++;
00225         iNumSamples--;
00226     }
00227 #endif
00228 }
00229
00230 // static
00231 void SampleUtil::add3WithGain(CSAMPLE* pDest,
00232                               const CSAMPLE* pSrc1, CSAMPLE gain1,
00233                               const CSAMPLE* pSrc2, CSAMPLE gain2,
00234                               const CSAMPLE* pSrc3, CSAMPLE gain3,
00235                               int iNumSamples) {
00236     if (gain1 == 0.0f) {
00237         return add2WithGain(pDest, pSrc2, gain2, pSrc3, gain3, iNumSamples);
00238     } else if (gain2 == 0.0f) {
00239         return add2WithGain(pDest, pSrc1, gain1, pSrc3, gain3, iNumSamples);
00240     } else if (gain3 == 0.0f) {
00241         return add2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples);
00242     }
00243
00244     if (m_sOptimizationsOn)
00245         return sseAdd3WithGain(pDest, pSrc1, gain1, pSrc2, gain2,
00246                                pSrc3, gain3, iNumSamples);
00247
00248     for (int i = 0; i < iNumSamples; ++i) {
00249         pDest[i] += pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3;
00250     }
00251 }
00252
00253 // static
00254 void SampleUtil::sseAdd3WithGain(CSAMPLE* pDest,
00255                                  const CSAMPLE* pSrc1, CSAMPLE gain1,
00256                                  const CSAMPLE* pSrc2, CSAMPLE gain2,
00257                                  const CSAMPLE* pSrc3, CSAMPLE gain3,
00258                                  int iNumSamples) {
00259 #ifdef __SSE__
00260     assert_aligned(pDest);
00261     assert_aligned(pSrc1);
00262     assert_aligned(pSrc2);
00263     assert_aligned(pSrc3);
00264     __m128 vSrc1Samples;
00265     __m128 vSrc2Samples;
00266     __m128 vSrc3Samples;
00267     __m128 vDestSamples;
00268     __m128 vGain1 = _mm_set1_ps(gain1);
00269     __m128 vGain2 = _mm_set1_ps(gain2);
00270     __m128 vGain3 = _mm_set1_ps(gain3);
00271     while (iNumSamples >= 4) {
00272         vSrc1Samples = _mm_loadu_ps(pSrc1);
00273         vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1);
00274         vSrc2Samples = _mm_loadu_ps(pSrc2);
00275         vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2);
00276         vSrc3Samples = _mm_loadu_ps(pSrc3);
00277         vSrc3Samples = _mm_mul_ps(vSrc3Samples, vGain3);
00278         vDestSamples = _mm_loadu_ps(pDest);
00279         vDestSamples = _mm_add_ps(vDestSamples, vSrc1Samples);
00280         vDestSamples = _mm_add_ps(vDestSamples, vSrc2Samples);
00281         vDestSamples = _mm_add_ps(vDestSamples, vSrc3Samples);
00282         _mm_store_ps(pDest, vDestSamples);
00283         iNumSamples -= 4;
00284         pDest += 4;
00285         pSrc1 += 4;
00286         pSrc2 += 4;
00287         pSrc3 += 4;
00288     }
00289     if (iNumSamples > 0) {
00290         qDebug() << "Not div by 4";
00291     }
00292     while (iNumSamples > 0) {
00293         *pDest = *pDest + *pSrc1 * gain1 + *pSrc2 * gain2 + *pSrc3 * gain3;
00294         pDest++;
00295         pSrc1++;
00296         pSrc2++;
00297         pSrc3++;
00298         iNumSamples--;
00299     }
00300 #endif
00301 }
00302
00303
00304 // static
00305 void SampleUtil::copyWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc,
00306                               CSAMPLE gain, int iNumSamples) {
00307     if (pDest == pSrc) {
00308         return applyGain(pDest, gain, iNumSamples);
00309     }
00310     if (gain == 1.0f) {
00311         memcpy(pDest, pSrc, sizeof(pDest[0]) * iNumSamples);
00312         return;
00313     }
00314     if (gain == 0.0f) {
00315         memset(pDest, 0, sizeof(pDest[0]) * iNumSamples);
00316         return;
00317     }
00318
00319     if (m_sOptimizationsOn) {
00320         return sseCopyWithGain(pDest, pSrc, gain, iNumSamples);
00321     }
00322
00323     for (int i = 0; i < iNumSamples; ++i) {
00324         pDest[i] = pSrc[i] * gain;
00325     }
00326
00327     // OR! need to test which fares better
00328     // memcpy(pDest, pSrc, sizeof(pDest[0]) * iNumSamples);
00329     // applyGain(pDest, gain);
00330 }
00331
00332 // static
00333 void SampleUtil::sseCopyWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc,
00334                                  CSAMPLE gain, int iNumSamples) {
00335 #ifdef __SSE__
00336     assert_aligned(pDest);
00337     assert_aligned(pSrc);
00338     __m128 vSrcSamples;
00339     __m128 vGain = _mm_set1_ps(gain);
00340     while (iNumSamples >= 4) {
00341         vSrcSamples = _mm_loadu_ps(pSrc);
00342         vSrcSamples = _mm_mul_ps(vSrcSamples, vGain);
00343         _mm_store_ps(pDest, vSrcSamples);
00344         iNumSamples -= 4;
00345         pDest += 4;
00346         pSrc += 4;
00347     }
00348     if (iNumSamples > 0) {
00349         qDebug() << "Not div by 4";
00350     }
00351     while (iNumSamples > 0) {
00352         *pDest = *pSrc * gain;
00353         pDest++;
00354         pSrc++;
00355         iNumSamples--;
00356     }
00357 #endif
00358 }
00359
00360
00361 // static
00362 void SampleUtil::copy2WithGain(CSAMPLE* pDest,
00363                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00364                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00365                                int iNumSamples) {
00366     if (gain1 == 0.0f) {
00367         return copyWithGain(pDest, pSrc2, gain2, iNumSamples);
00368     }
00369     if (gain2 == 0.0f) {
00370         return copyWithGain(pDest, pSrc1, gain1, iNumSamples);
00371     }
00372     if (m_sOptimizationsOn) {
00373         return sseCopy2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples);
00374     }
00375
00376     for (int i = 0; i < iNumSamples; ++i) {
00377         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2;
00378     }
00379 }
00380
00381 // static
00382 void SampleUtil::sseCopy2WithGain(CSAMPLE* pDest,
00383                                   const CSAMPLE* pSrc1, CSAMPLE gain1,
00384                                   const CSAMPLE* pSrc2, CSAMPLE gain2,
00385                                   int iNumSamples) {
00386 #ifdef __SSE__
00387     assert_aligned(pDest);
00388     assert_aligned(pSrc1);
00389     assert_aligned(pSrc2);
00390     __m128 vSrc1Samples;
00391     __m128 vSrc2Samples;
00392     __m128 vGain1 = _mm_set1_ps(gain1);
00393     __m128 vGain2 = _mm_set1_ps(gain2);
00394     while (iNumSamples >= 4) {
00395         vSrc1Samples = _mm_loadu_ps(pSrc1);
00396         vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1);
00397         vSrc2Samples = _mm_loadu_ps(pSrc2);
00398         vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2);
00399         _mm_store_ps(pDest, _mm_add_ps(vSrc1Samples, vSrc2Samples));
00400         iNumSamples -= 4;
00401         pDest += 4;
00402         pSrc1 += 4;
00403         pSrc2 += 4;
00404     }
00405     if (iNumSamples > 0) {
00406         qDebug() << "Not div by 4";
00407     }
00408     while (iNumSamples > 0) {
00409         *pDest = *pSrc1 * gain1 + *pSrc2 * gain2;
00410         pDest++;
00411         pSrc1++;
00412         pSrc2++;
00413         iNumSamples--;
00414     }
00415 #endif
00416 }
00417
00418 // static
00419 void SampleUtil::copy3WithGain(CSAMPLE* pDest,
00420                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00421                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00422                                const CSAMPLE* pSrc3, CSAMPLE gain3,
00423                                int iNumSamples) {
00424     if (gain1 == 0.0f) {
00425         return copy2WithGain(pDest, pSrc2, gain2, pSrc3, gain3, iNumSamples);
00426     }
00427     if (gain2 == 0.0f) {
00428         return copy2WithGain(pDest, pSrc1, gain1, pSrc3, gain3, iNumSamples);
00429     }
00430     if (gain3 == 0.0f) {
00431         return copy2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples);
00432     }
00433     if (m_sOptimizationsOn) {
00434         return sseCopy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2,
00435                                 pSrc3, gain3, iNumSamples);
00436     }
00437
00438     for (int i = 0; i < iNumSamples; ++i) {
00439         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3;
00440     }
00441 }
00442
00443 // static
00444 void SampleUtil::sseCopy3WithGain(CSAMPLE* pDest,
00445                                   const CSAMPLE* pSrc1, CSAMPLE gain1,
00446                                   const CSAMPLE* pSrc2, CSAMPLE gain2,
00447                                   const CSAMPLE* pSrc3, CSAMPLE gain3,
00448                                   int iNumSamples) {
00449 #ifdef __SSE__
00450     assert_aligned(pDest);
00451     assert_aligned(pSrc1);
00452     assert_aligned(pSrc2);
00453     assert_aligned(pSrc3);
00454     __m128 vSrc1Samples;
00455     __m128 vSrc2Samples;
00456     __m128 vSrc3Samples;
00457     __m128 vGain1 = _mm_set1_ps(gain1);
00458     __m128 vGain2 = _mm_set1_ps(gain2);
00459     __m128 vGain3 = _mm_set1_ps(gain3);
00460     while (iNumSamples >= 4) {
00461         vSrc1Samples = _mm_loadu_ps(pSrc1);
00462         vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1);
00463         vSrc2Samples = _mm_loadu_ps(pSrc2);
00464         vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2);
00465         vSrc3Samples = _mm_loadu_ps(pSrc3);
00466         vSrc3Samples = _mm_mul_ps(vSrc3Samples, vGain3);
00467
00468         vSrc1Samples = _mm_add_ps(vSrc1Samples, vSrc2Samples);
00469         vSrc1Samples = _mm_add_ps(vSrc1Samples, vSrc3Samples);
00470         _mm_store_ps(pDest, vSrc1Samples);
00471         iNumSamples -= 4;
00472         pDest += 4;
00473         pSrc1 += 4;
00474         pSrc2 += 4;
00475         pSrc3 += 4;
00476     }
00477     if (iNumSamples > 0) {
00478         qDebug() << "Not div by 4";
00479     }
00480     while (iNumSamples > 0) {
00481         *pDest = *pSrc1 * gain1 + *pSrc2 * gain2 + *pSrc3 * gain3;
00482         pDest++;
00483         pSrc1++;
00484         pSrc2++;
00485         pSrc3++;
00486         iNumSamples--;
00487     }
00488 #endif
00489 }
00490
00491 // static
00492 void SampleUtil::copy4WithGain(CSAMPLE* pDest,
00493                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00494                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00495                                const CSAMPLE* pSrc3, CSAMPLE gain3,
00496                                const CSAMPLE* pSrc4, CSAMPLE gain4,
00497                                int iNumSamples) {
00498     if (gain1 == 0.0f) {
00499         return copy3WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, iNumSamples);
00500     }
00501     if (gain2 == 0.0f) {
00502         return copy3WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, iNumSamples);
00503     }
00504     if (gain3 == 0.0f) {
00505         return copy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, iNumSamples);
00506     }
00507     if (gain4 == 0.0f) {
00508         return copy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, iNumSamples);
00509     }
00510     if (m_sOptimizationsOn) {
00511         // TODO(rryan) implement SSE for this? worth it?
00512     }
00513     for (int i = 0; i < iNumSamples; ++i) {
00514         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + pSrc4[i] * gain4;
00515     }
00516 }
00517
00518 // static
00519 void SampleUtil::copy5WithGain(CSAMPLE* pDest,
00520                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00521                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00522                                const CSAMPLE* pSrc3, CSAMPLE gain3,
00523                                const CSAMPLE* pSrc4, CSAMPLE gain4,
00524                                const CSAMPLE* pSrc5, CSAMPLE gain5,
00525                                int iNumSamples) {
00526     if (gain1 == 0.0f) {
00527         return copy4WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, pSrc5, gain5, iNumSamples);
00528     }
00529     if (gain2 == 0.0f) {
00530         return copy4WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, pSrc5, gain5, iNumSamples);
00531     }
00532     if (gain3 == 0.0f) {
00533         return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, pSrc5, gain5, iNumSamples);
00534     }
00535     if (gain4 == 0.0f) {
00536         return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, pSrc5, gain5, iNumSamples);
00537     }
00538     if (gain5 == 0.0f) {
00539         return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, iNumSamples);
00540     }
00541
00542     if (m_sOptimizationsOn) {
00543         // TODO(rryan) implement SSE for this? worth it?
00544     }
00545
00546     for (int i = 0; i < iNumSamples; ++i) {
00547         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + pSrc4[i] * gain4 + pSrc5[i] * gain5;
00548     }
00549 }
00550
00551 // static
00552 void SampleUtil::copy6WithGain(CSAMPLE* pDest,
00553                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00554                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00555                                const CSAMPLE* pSrc3, CSAMPLE gain3,
00556                                const CSAMPLE* pSrc4, CSAMPLE gain4,
00557                                const CSAMPLE* pSrc5, CSAMPLE gain5,
00558                                const CSAMPLE* pSrc6, CSAMPLE gain6,
00559                                int iNumSamples) {
00560     if (gain1 == 0.0f) {
00561         return copy5WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4,
00562                              pSrc5, gain5, pSrc6, gain6, iNumSamples);
00563     }
00564     if (gain2 == 0.0f) {
00565         return copy5WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4,
00566                              pSrc5, gain5, pSrc6, gain6, iNumSamples);
00567     }
00568     if (gain3 == 0.0f) {
00569         return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4,
00570                              pSrc5, gain5, pSrc6, gain6, iNumSamples);
00571     }
00572     if (gain4 == 0.0f) {
00573         return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00574                              pSrc5, gain5, pSrc6, gain6, iNumSamples);
00575     }
00576     if (gain5 == 0.0f) {
00577         return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00578                              pSrc4, gain4, pSrc6, gain6, iNumSamples);
00579     }
00580     if (gain6 == 0.0f) {
00581         return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00582                              pSrc4, gain4, pSrc5, gain5, iNumSamples);
00583     }
00584     if (m_sOptimizationsOn) {
00585         // TODO(rryan) implement SSE for this? worth it?
00586     }
00587     for (int i = 0; i < iNumSamples; ++i) {
00588         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 +
00589                 pSrc4[i] * gain4 + pSrc5[i] * gain5 + pSrc6[i] * gain6;
00590     }
00591 }
00592
00593 // static
00594 void SampleUtil::copy7WithGain(CSAMPLE* pDest,
00595                                const CSAMPLE* pSrc1, CSAMPLE gain1,
00596                                const CSAMPLE* pSrc2, CSAMPLE gain2,
00597                                const CSAMPLE* pSrc3, CSAMPLE gain3,
00598                                const CSAMPLE* pSrc4, CSAMPLE gain4,
00599                                const CSAMPLE* pSrc5, CSAMPLE gain5,
00600                                const CSAMPLE* pSrc6, CSAMPLE gain6,
00601                                const CSAMPLE* pSrc7, CSAMPLE gain7,
00602                                int iNumSamples) {
00603     if (gain1 == 0.0f) {
00604         return copy6WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4,
00605                              pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples);
00606     }
00607     if (gain2 == 0.0f) {
00608         return copy6WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4,
00609                              pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples);
00610     }
00611     if (gain3 == 0.0f) {
00612         return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4,
00613                              pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples);
00614     }
00615     if (gain4 == 0.0f) {
00616         return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00617                              pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples);
00618     }
00619     if (gain5 == 0.0f) {
00620         return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00621                              pSrc4, gain4, pSrc6, gain6, pSrc7, gain7, iNumSamples);
00622     }
00623     if (gain6 == 0.0f) {
00624         return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00625                              pSrc4, gain4, pSrc5, gain5, pSrc7, gain7, iNumSamples);
00626     }
00627     if (gain7 == 0.0f) {
00628         return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3,
00629                              pSrc4, gain4, pSrc5, gain5, pSrc6, gain6, iNumSamples);
00630     }
00631     if (m_sOptimizationsOn) {
00632         // TODO(rryan) implement SSE for this? worth it?
00633     }
00634     for (int i = 0; i < iNumSamples; ++i) {
00635         pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 +
00636                 pSrc4[i] * gain4 + pSrc5[i] * gain5 + pSrc6[i] * gain6 + pSrc7[i] * gain7;
00637     }
00638 }
00639
00640 // static
00641 void SampleUtil::convert(CSAMPLE* pDest, const SAMPLE* pSrc,
00642                          int iNumSamples) {
00643     if (m_sOptimizationsOn) {
00644         return sseConvert(pDest, pSrc, iNumSamples);
00645     }
00646
00647     for (int i = 0; i < iNumSamples; ++i) {
00648         pDest[i] = pSrc[i];
00649     }
00650 }
00651
00652 // static
00653 void SampleUtil::sseConvert(CSAMPLE* pDest, const SAMPLE* pSrc,
00654                             int iNumSamples) {
00655 #ifdef __SSE__
00656     assert_aligned(pDest);
00657     assert_aligned(pSrc);
00658     __m64 vSrcSamples;
00659     __m128 vDestSamples;
00660     while (iNumSamples >= 4) {
00661         vSrcSamples = *((__m64*)pSrc); // ????
00662         vDestSamples = _mm_cvtpi16_ps(vSrcSamples);
00663
00664         // WTF _mm_cvtpi16_ps randomly shuffles the high 32-bits to the low
00665         // 32-bits. We have to shuffle them back to normal order.
00666         // 0,1,2,3 in produces 2,3,0,1
00667         vDestSamples = _mm_shuffle_ps(vDestSamples, vDestSamples, _MM_SHUFFLE(1,0,3,2));
00668         _mm_store_ps(pDest, vDestSamples);
00669
00670         iNumSamples -= 4;
00671         pDest += 4;
00672         pSrc += 4;
00673     }
00674     if (iNumSamples > 0) {
00675         qDebug() << "Not div by 4";
00676     }
00677     while (iNumSamples > 0) {
00678         *pDest = *pSrc;
00679         pDest++;
00680         pSrc++;
00681         iNumSamples--;
00682     }
00683 #endif
00684 }
00685
00686 // static
00687 void SampleUtil::sumAbsPerChannel(CSAMPLE* pfAbsL, CSAMPLE* pfAbsR,
00688                                   const CSAMPLE* pBuffer, int iNumSamples) {
00689     Q_ASSERT(iNumSamples % 2 == 0);
00690     if (m_sOptimizationsOn) {
00691         return sseSumAbsPerChannel(pfAbsL, pfAbsR, pBuffer, iNumSamples);
00692     }
00693
00694     CSAMPLE fAbsL = 0.0f;
00695     CSAMPLE fAbsR = 0.0f;
00696
00697     for (int i = 0; i < iNumSamples; i += 2) {
00698         fAbsL += fabs(pBuffer[i]);
00699         fAbsR += fabs(pBuffer[i+1]);
00700     }
00701
00702     *pfAbsL = fAbsL;
00703     *pfAbsR = fAbsR;
00704 }
00705
00706 void SampleUtil::sseSumAbsPerChannel(CSAMPLE* pfAbsL, CSAMPLE* pfAbsR,
00707                                      const CSAMPLE* pBuffer, int iNumSamples) {
00708 #ifdef __SSE__
00709     assert_aligned(pBuffer);
00710     CSAMPLE fAbsL = 0.0f;
00711     CSAMPLE fAbsR = 0.0f;
00712
00713     __m128 vSrcSamples;
00714     __m128 vSum = _mm_setzero_ps();
00715     // This mask will clear an IEEE754 float's sign bit
00716     static _ALIGN_16 int32_t l_bitmask[] =
00717             {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
00718     const __m128 vSignMask = _mm_load_ps((float*)l_bitmask);
00719
00720     while (iNumSamples >= 4) {
00721         vSrcSamples = _mm_loadu_ps(pBuffer);
00722         vSrcSamples = _mm_and_ps(vSrcSamples, vSignMask);
00723         vSum = _mm_add_ps(vSum, vSrcSamples);
00724         iNumSamples -= 4;
00725         pBuffer += 4;
00726     }
00727
00728     _ALIGN_16 CSAMPLE result[4];
00729     assert_aligned(result);
00730     _mm_store_ps(result, vSum);
00731     fAbsL = result[0] + result[2];
00732     fAbsR = result[1] + result[3];
00733     if (iNumSamples > 0) {
00734         qDebug() << "Not div by 4";
00735     }
00736     while (iNumSamples >= 2) {
00737         fAbsL += fabs(*pBuffer++);
00738         fAbsR += fabs(*pBuffer++);
00739         iNumSamples -= 2;
00740     }
00741
00742     *pfAbsL = fAbsL;
00743     *pfAbsR = fAbsR;
00744 #endif
00745 }
00746
00747 // static
00748 bool SampleUtil::isOutsideRange(CSAMPLE fMax, CSAMPLE fMin,
00749                                 const CSAMPLE* pBuffer, int iNumSamples) {
00750     if (m_sOptimizationsOn) {
00751         return sseIsOutsideRange(fMax, fMin, pBuffer, iNumSamples);
00752     }
00753
00754     for (int i = 0; i < iNumSamples; ++i) {
00755         CSAMPLE sample = pBuffer[i];
00756         if (sample > fMax) {
00757             return true;
00758         } else if (sample < fMin) {
00759             return true;
00760         }
00761     }
00762     return false;
00763 }
00764
00765 // static
00766 bool SampleUtil::sseIsOutsideRange(CSAMPLE fMax, CSAMPLE fMin,
00767                                    const CSAMPLE* pBuffer, int iNumSamples) {
00768 #ifdef __SSE__
00769     assert_aligned(pBuffer);
00770     __m128 vSrcSamples;
00771     __m128 vClamped = _mm_setzero_ps();
00772     __m128 vMax = _mm_set1_ps(fMax);
00773     __m128 vMin = _mm_set1_ps(fMin);
00774     while (iNumSamples >= 4) {
00775         vSrcSamples = _mm_loadu_ps(pBuffer);
00776         vClamped = _mm_or_ps(vClamped, _mm_cmplt_ps(vSrcSamples, vMin));
00777         vClamped = _mm_or_ps(vClamped, _mm_cmpgt_ps(vSrcSamples, vMax));
00778         iNumSamples -= 4;
00779         pBuffer += 4;
00780     }
00781     _ALIGN_16 CSAMPLE clamp[4];
00782     assert_aligned(clamp);
00783     _mm_store_ps(clamp, vClamped);
00784     if (clamp[0] != 0 || clamp[1] != 0 ||
00785         clamp[2] != 0 || clamp[3] != 0) {
00786         return true;
00787     }
00788     if (iNumSamples > 0) {
00789         qDebug() << "Not div by 4";
00790     }
00791     while (iNumSamples > 0) {
00792         CSAMPLE sample = *pBuffer;
00793         if (sample > fMax) {
00794             return true;
00795         } else if (sample < fMin) {
00796             return true;
00797         }
00798         pBuffer++;
00799         iNumSamples--;
00800     }
00801 #endif
00802     return false;
00803 }
00804
00805 // static
00806 bool SampleUtil::copyClampBuffer(CSAMPLE fMax, CSAMPLE fMin,
00807                                  CSAMPLE* pDest, const CSAMPLE* pSrc,
00808                                  int iNumSamples) {
00809     if (m_sOptimizationsOn) {
00810         return sseCopyClampBuffer(fMax, fMin, pDest, pSrc, iNumSamples);
00811     }
00812
00813     bool clamped = false;
00814     if (pSrc == pDest) {
00815         for (int i = 0; i < iNumSamples; ++i) {
00816             CSAMPLE sample = pSrc[i];
00817             if (sample > fMax) {
00818                 clamped = true;
00819                 pDest[i] = fMax;
00820             } else if (sample < fMin) {
00821                 clamped = true;
00822                 pDest[i] = fMin;
00823             }
00824         }
00825     } else {
00826         for (int i = 0; i < iNumSamples; ++i) {
00827             CSAMPLE sample = pSrc[i];
00828             if (sample > fMax) {
00829                 sample = fMax;
00830                 clamped = true;
00831             } else if (sample < fMin) {
00832                 sample = fMin;
00833                 clamped = true;
00834             }
00835             pDest[i] = sample;
00836         }
00837     }
00838     return clamped;
00839 }
00840
00841 // static
00842 bool SampleUtil::sseCopyClampBuffer(CSAMPLE fMax, CSAMPLE fMin,
00843                                     CSAMPLE* pDest, const CSAMPLE* pSrc,
00844                                     int iNumSamples) {
00845     bool clamped = false;
00846 #ifdef __SSE__
00847     assert_aligned(pDest);
00848     assert_aligned(pSrc);
00849     __m128 vSrcSamples;
00850     __m128 vClamped = _mm_setzero_ps();
00851     __m128 vMax = _mm_set1_ps(fMax);
00852     __m128 vMin = _mm_set1_ps(fMin);
00853     while (iNumSamples >= 4) {
00854         vSrcSamples = _mm_loadu_ps(pSrc);
00855         vClamped = _mm_or_ps(vClamped, _mm_cmplt_ps(vSrcSamples, vMin));
00856         vClamped = _mm_or_ps(vClamped, _mm_cmpgt_ps(vSrcSamples, vMax));
00857         vSrcSamples = _mm_max_ps(vSrcSamples, vMin);
00858         vSrcSamples = _mm_min_ps(vSrcSamples, vMax);
00859         _mm_store_ps(pDest, vSrcSamples);
00860         iNumSamples -= 4;
00861         pDest += 4;
00862         pSrc += 4;
00863     }
00864     _ALIGN_16 CSAMPLE clamp[4];
00865     assert_aligned(clamp);
00866     _mm_store_ps(clamp, vClamped);
00867     if (clamp[0] != 0 || clamp[1] != 0 ||
00868         clamp[2] != 0 || clamp[3] != 0) {
00869         clamped = true;
00870     }
00871     if (iNumSamples > 0) {
00872         qDebug() << "Not div by 4";
00873     }
00874     while (iNumSamples > 0) {
00875         CSAMPLE sample = *pSrc;
00876         if (sample > fMax) {
00877             sample = fMax;
00878             clamped = true;
00879         } else if (sample < fMin) {
00880             sample = fMax;
00881             clamped = true;
00882         }
00883         *pDest = sample;
00884         pDest++;
00885         pSrc++;
00886         iNumSamples--;
00887     }
00888 #endif
00889     return clamped;
00890 }
00891
00892 // static
00893 void SampleUtil::interleaveBuffer(CSAMPLE* pDest,
00894                                   const CSAMPLE* pSrc1, const CSAMPLE* pSrc2,
00895                                   int iNumSamples) {
00896     if (m_sOptimizationsOn) {
00897         return sseInterleaveBuffer(pDest, pSrc1, pSrc2, iNumSamples);
00898     }
00899
00900     for (int i = 0; i < iNumSamples; ++i) {
00901         pDest[2*i] = pSrc1[i];
00902         pDest[2*i+1] = pSrc2[i];
00903     }
00904 }
00905
00906 // static
00907 void SampleUtil::sseInterleaveBuffer(CSAMPLE* pDest,
00908                                      const CSAMPLE* pSrc1, const CSAMPLE* pSrc2,
00909                                      int iNumSamples) {
00910 #ifdef __SSE__
00911     assert_aligned(pDest);
00912     assert_aligned(pSrc1);
00913     assert_aligned(pSrc2);
00914     __m128 vSrc1Samples;
00915     __m128 vSrc2Samples;
00916     __m128 vLow;
00917     __m128 vHigh;
00918     while (iNumSamples >= 4) {
00919         vSrc1Samples = _mm_loadu_ps(pSrc1);
00920         vSrc2Samples = _mm_loadu_ps(pSrc2);
00921         // vSrc1Samples is l1,l2,l3,l4
00922         // vSrc2Samples is r1,r2,r3,r4
00923         vLow = _mm_unpacklo_ps(vSrc1Samples, vSrc2Samples);
00924         // vLow is l1,r1,l2,r2
00925         vHigh = _mm_unpackhi_ps(vSrc1Samples, vSrc2Samples);
00926         // vHigh is l3,r3,l4,r4
00927         _mm_store_ps(pDest, vLow);
00928         _mm_store_ps(pDest+4, vHigh);
00929         iNumSamples -= 4;
00930         pSrc1 += 4;
00931         pSrc2 += 4;
00932         pDest += 8;
00933     }
00934     while (iNumSamples > 0) {
00935         *pDest++ = *pSrc1++;
00936         *pDest++ = *pSrc2++;
00937         iNumSamples--;
00938     }
00939 #endif
00940 }
00941
00942
00943 // static
00944 void SampleUtil::deinterleaveBuffer(CSAMPLE* pDest1, CSAMPLE* pDest2,
00945                                   const CSAMPLE* pSrc, int iNumSamples) {
00946     if (m_sOptimizationsOn) {
00947         return sseDeinterleaveBuffer(pDest1, pDest2, pSrc, iNumSamples);
00948     }
00949
00950     for (int i = 0; i < iNumSamples; ++i) {
00951         pDest1[i] = pSrc[i*2];
00952         pDest2[i] = pSrc[i*2+1];
00953     }
00954 }
00955
00956 // static
00957 void SampleUtil::sseDeinterleaveBuffer(CSAMPLE* pDest1, CSAMPLE* pDest2,
00958                                        const CSAMPLE* pSrc, int iNumSamples) {
00959 #ifdef __SSE__
00960     assert_aligned(pDest1);
00961     assert_aligned(pDest2);
00962     assert_aligned(pSrc);
00963     __m128 vSrc1Samples;
00964     __m128 vSrc2Samples;
00965     __m128 vDst1Samples;
00966     __m128 vDst2Samples;
00967     while (iNumSamples >= 4) {
00968         vSrc1Samples = _mm_loadu_ps(pSrc);
00969         vSrc2Samples = _mm_loadu_ps(pSrc+4);
00970         // vSrc1Samples is l1,r1,l2,r2
00971         // vSrc2Samples is l3,r3,l4,r4
00972
00973         // First shuffle the middle elements of both.
00974         vSrc1Samples = _mm_shuffle_ps(vSrc1Samples, vSrc1Samples,
00975                                       _MM_SHUFFLE(3, 1, 2, 0));
00976                                       //_MM_SHUFFLE(0, 2, 1, 3));
00977         vSrc2Samples = _mm_shuffle_ps(vSrc2Samples, vSrc2Samples,
00978                                       _MM_SHUFFLE(3, 1, 2, 0));
00979                                       //_MM_SHUFFLE(0, 2, 1, 3));
00980         // vSrc1Samples is now l1,l2,r1,r2
00981         // vSrc2Samples is now l3,l4,r3,r4
00982
00983         // Now move the low half of src2 into the high of src1 to make dst1. To
00984         // make dst2, move the high half of src1 into the low half of src2.
00985         vDst1Samples = _mm_movelh_ps(vSrc1Samples, vSrc2Samples);
00986         vDst2Samples = _mm_movehl_ps(vSrc2Samples, vSrc1Samples);
00987         // vDst1Samples is now l1,l2,l3,l4
00988         // vDst2Samples is now r1,r2,r3,r4
00989
00990         _mm_store_ps(pDest1, vDst1Samples);
00991         _mm_store_ps(pDest2, vDst2Samples);
00992         iNumSamples -= 4;
00993         pSrc += 8;
00994         pDest1 += 4;
00995         pDest2 += 4;
00996     }
00997     while (iNumSamples > 0) {
00998         *pDest1++ = *pSrc++;
00999         *pDest2++ = *pSrc++;
01000         iNumSamples--;
01001     }
01002 #endif
01003 }
01004
01005 void SampleUtil::setOptimizations(bool opt) {
01006     qDebug() << "Opts" << opt;
01007     m_sOptimizationsOn = opt;
01008 }