![]() |
Mixxx
|
00001 // sampleutil.cpp 00002 // Created 10/5/2009 by RJ Ryan (rryan@mit.edu) 00003 00004 #ifdef __SSE__ 00005 #include <xmmintrin.h> 00006 #endif 00007 00008 #ifdef __WINDOWS__ 00009 #pragma intrinsic(fabs)sc 00010 #endif 00011 00012 #include <QtDebug> 00013 00014 #include "sampleutil.h" 00015 00016 #ifdef __SSE__ 00017 // Disable optimizations by default. They aren't ready for general use yet. Turn 00018 // this flag on if you feel brave. 00019 bool SampleUtil::m_sOptimizationsOn = false; // true; 00020 #else 00021 bool SampleUtil::m_sOptimizationsOn = false; 00022 #endif 00023 00024 // static 00025 CSAMPLE* SampleUtil::alloc(int size) { 00026 // TODO(XXX) align the array 00027 return new CSAMPLE[size]; 00028 } 00029 00030 void SampleUtil::free(CSAMPLE* pBuffer) { 00031 delete [] pBuffer; 00032 } 00033 00034 // static 00035 void SampleUtil::applyGain(CSAMPLE* pBuffer, 00036 CSAMPLE gain, int iNumSamples) { 00037 if (gain == 1.0f) 00038 return; 00039 if (gain == 0.0f) { 00040 memset(pBuffer, 0, sizeof(pBuffer[0]) * iNumSamples); 00041 return; 00042 } 00043 00044 if (m_sOptimizationsOn) { 00045 return sseApplyGain(pBuffer, gain, iNumSamples); 00046 } 00047 00048 for (int i = 0; i < iNumSamples; ++i) { 00049 pBuffer[i] *= gain; 00050 } 00051 } 00052 00053 // static 00054 void SampleUtil::sseApplyGain(_ALIGN_16 CSAMPLE* pBuffer, 00055 _ALIGN_16 CSAMPLE gain, int iNumSamples) { 00056 #ifdef __SSE__ 00057 assert_aligned(pBuffer); 00058 __m128 vSamples; 00059 __m128 vGain = _mm_set1_ps(gain); 00060 while (iNumSamples >= 4) { 00061 vSamples = _mm_loadu_ps(pBuffer); 00062 vSamples = _mm_mul_ps(vSamples, vGain); 00063 _mm_store_ps(pBuffer, vSamples); 00064 00065 iNumSamples -= 4; 00066 pBuffer += 4; 00067 } 00068 if (iNumSamples > 0) { 00069 qDebug() << "Not div by 4"; 00070 } 00071 while (iNumSamples > 0) { 00072 *pBuffer = *pBuffer * gain; 00073 pBuffer++; 00074 iNumSamples--; 00075 } 00076 #endif 00077 } 00078 00079 // static 00080 void SampleUtil::applyAlternatingGain(CSAMPLE* pBuffer, 00081 CSAMPLE gain1, CSAMPLE gain2, 00082 int iNumSamples) { 00083 Q_ASSERT(iNumSamples % 2 == 0); 00084 // This handles gain1 == 1.0 && gain2 == 1.0f as well. 00085 if (gain1 == gain2) { 00086 return applyGain(pBuffer, gain1, iNumSamples); 00087 } 00088 if (m_sOptimizationsOn) 00089 return sseApplyAlternatingGain(pBuffer, gain1, gain2, iNumSamples); 00090 00091 for (int i = 0; i < iNumSamples; i += 2) { 00092 pBuffer[i] *= gain1; 00093 pBuffer[i+1] *= gain2; 00094 } 00095 } 00096 00097 // static 00098 void SampleUtil::sseApplyAlternatingGain(CSAMPLE* pBuffer, 00099 CSAMPLE gain1, CSAMPLE gain2, 00100 int iNumSamples) { 00101 #ifdef __SSE__ 00102 Q_ASSERT(iNumSamples % 2 == 0); 00103 assert_aligned(pBuffer); 00104 __m128 vSamples; 00105 __m128 vGain = _mm_set_ps(gain2, gain1, gain2, gain1); 00106 while (iNumSamples >= 4) { 00107 vSamples = _mm_loadu_ps(pBuffer); 00108 vSamples = _mm_mul_ps(vSamples, vGain); 00109 _mm_store_ps(pBuffer, vSamples); 00110 00111 iNumSamples -= 4; 00112 pBuffer += 4; 00113 } 00114 if (iNumSamples > 0) { 00115 qDebug() << "Not div by 4"; 00116 } 00117 while (iNumSamples > 0) { 00118 *pBuffer = *pBuffer * gain1; 00119 pBuffer++; 00120 *pBuffer = *pBuffer * gain2; 00121 pBuffer++; 00122 iNumSamples -= 2; 00123 } 00124 #endif 00125 } 00126 00127 // static 00128 void SampleUtil::addWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc, 00129 CSAMPLE gain, int iNumSamples) { 00130 if (gain == 0.0f) 00131 return; 00132 if (m_sOptimizationsOn) 00133 return sseAddWithGain(pDest, pSrc, gain, iNumSamples); 00134 00135 for (int i = 0; i < iNumSamples; ++i) { 00136 pDest[i] += pSrc[i] * gain; 00137 } 00138 } 00139 00140 // static 00141 void SampleUtil::sseAddWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc, 00142 CSAMPLE gain, int iNumSamples) { 00143 #ifdef __SSE__ 00144 assert_aligned(pDest); 00145 assert_aligned(pSrc); 00146 __m128 vSrcSamples; 00147 __m128 vDestSamples; 00148 __m128 vGain = _mm_set1_ps(gain); 00149 while (iNumSamples >= 4) { 00150 vSrcSamples = _mm_loadu_ps(pSrc); 00151 vSrcSamples = _mm_mul_ps(vSrcSamples, vGain); 00152 vDestSamples = _mm_loadu_ps(pDest); 00153 _mm_store_ps(pDest, _mm_add_ps(vDestSamples, vSrcSamples)); 00154 iNumSamples -= 4; 00155 pDest += 4; 00156 pSrc += 4; 00157 } 00158 if (iNumSamples > 0) { 00159 qDebug() << "Not div by 4"; 00160 } 00161 while (iNumSamples > 0) { 00162 *pDest = *pDest + *pSrc * gain; 00163 pDest++; 00164 pSrc++; 00165 iNumSamples--; 00166 } 00167 #endif 00168 } 00169 00170 // static 00171 void SampleUtil::add2WithGain(CSAMPLE* pDest, 00172 const CSAMPLE* pSrc1, CSAMPLE gain1, 00173 const CSAMPLE* pSrc2, CSAMPLE gain2, 00174 int iNumSamples) { 00175 if (gain1 == 0.0f) { 00176 return addWithGain(pDest, pSrc2, gain2, iNumSamples); 00177 } else if (gain2 == 0.0f) { 00178 return addWithGain(pDest, pSrc1, gain1, iNumSamples); 00179 } 00180 00181 if (m_sOptimizationsOn) 00182 return sseAdd2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples); 00183 00184 for (int i = 0; i < iNumSamples; ++i) { 00185 pDest[i] += pSrc1[i] * gain1 + pSrc2[i] * gain2; 00186 } 00187 } 00188 00189 // static 00190 void SampleUtil::sseAdd2WithGain(CSAMPLE* pDest, 00191 const CSAMPLE* pSrc1, CSAMPLE gain1, 00192 const CSAMPLE* pSrc2, CSAMPLE gain2, 00193 int iNumSamples) { 00194 #ifdef __SSE__ 00195 assert_aligned(pSrc1); 00196 assert_aligned(pSrc2); 00197 assert_aligned(pDest); 00198 __m128 vSrc1Samples; 00199 __m128 vSrc2Samples; 00200 __m128 vDestSamples; 00201 __m128 vGain1 = _mm_set1_ps(gain1); 00202 __m128 vGain2 = _mm_set1_ps(gain2); 00203 while (iNumSamples >= 4) { 00204 vSrc1Samples = _mm_loadu_ps(pSrc1); 00205 vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1); 00206 vSrc2Samples = _mm_loadu_ps(pSrc2); 00207 vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2); 00208 vDestSamples = _mm_loadu_ps(pDest); 00209 vDestSamples = _mm_add_ps(vDestSamples, vSrc1Samples); 00210 vDestSamples = _mm_add_ps(vDestSamples, vSrc2Samples); 00211 _mm_store_ps(pDest, vDestSamples); 00212 iNumSamples -= 4; 00213 pDest += 4; 00214 pSrc1 += 4; 00215 pSrc2 += 4; 00216 } 00217 if (iNumSamples > 0) { 00218 qDebug() << "Not div by 4"; 00219 } 00220 while (iNumSamples > 0) { 00221 *pDest = *pDest + *pSrc1 * gain1 + *pSrc2 * gain2; 00222 pDest++; 00223 pSrc1++; 00224 pSrc2++; 00225 iNumSamples--; 00226 } 00227 #endif 00228 } 00229 00230 // static 00231 void SampleUtil::add3WithGain(CSAMPLE* pDest, 00232 const CSAMPLE* pSrc1, CSAMPLE gain1, 00233 const CSAMPLE* pSrc2, CSAMPLE gain2, 00234 const CSAMPLE* pSrc3, CSAMPLE gain3, 00235 int iNumSamples) { 00236 if (gain1 == 0.0f) { 00237 return add2WithGain(pDest, pSrc2, gain2, pSrc3, gain3, iNumSamples); 00238 } else if (gain2 == 0.0f) { 00239 return add2WithGain(pDest, pSrc1, gain1, pSrc3, gain3, iNumSamples); 00240 } else if (gain3 == 0.0f) { 00241 return add2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples); 00242 } 00243 00244 if (m_sOptimizationsOn) 00245 return sseAdd3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, 00246 pSrc3, gain3, iNumSamples); 00247 00248 for (int i = 0; i < iNumSamples; ++i) { 00249 pDest[i] += pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3; 00250 } 00251 } 00252 00253 // static 00254 void SampleUtil::sseAdd3WithGain(CSAMPLE* pDest, 00255 const CSAMPLE* pSrc1, CSAMPLE gain1, 00256 const CSAMPLE* pSrc2, CSAMPLE gain2, 00257 const CSAMPLE* pSrc3, CSAMPLE gain3, 00258 int iNumSamples) { 00259 #ifdef __SSE__ 00260 assert_aligned(pDest); 00261 assert_aligned(pSrc1); 00262 assert_aligned(pSrc2); 00263 assert_aligned(pSrc3); 00264 __m128 vSrc1Samples; 00265 __m128 vSrc2Samples; 00266 __m128 vSrc3Samples; 00267 __m128 vDestSamples; 00268 __m128 vGain1 = _mm_set1_ps(gain1); 00269 __m128 vGain2 = _mm_set1_ps(gain2); 00270 __m128 vGain3 = _mm_set1_ps(gain3); 00271 while (iNumSamples >= 4) { 00272 vSrc1Samples = _mm_loadu_ps(pSrc1); 00273 vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1); 00274 vSrc2Samples = _mm_loadu_ps(pSrc2); 00275 vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2); 00276 vSrc3Samples = _mm_loadu_ps(pSrc3); 00277 vSrc3Samples = _mm_mul_ps(vSrc3Samples, vGain3); 00278 vDestSamples = _mm_loadu_ps(pDest); 00279 vDestSamples = _mm_add_ps(vDestSamples, vSrc1Samples); 00280 vDestSamples = _mm_add_ps(vDestSamples, vSrc2Samples); 00281 vDestSamples = _mm_add_ps(vDestSamples, vSrc3Samples); 00282 _mm_store_ps(pDest, vDestSamples); 00283 iNumSamples -= 4; 00284 pDest += 4; 00285 pSrc1 += 4; 00286 pSrc2 += 4; 00287 pSrc3 += 4; 00288 } 00289 if (iNumSamples > 0) { 00290 qDebug() << "Not div by 4"; 00291 } 00292 while (iNumSamples > 0) { 00293 *pDest = *pDest + *pSrc1 * gain1 + *pSrc2 * gain2 + *pSrc3 * gain3; 00294 pDest++; 00295 pSrc1++; 00296 pSrc2++; 00297 pSrc3++; 00298 iNumSamples--; 00299 } 00300 #endif 00301 } 00302 00303 00304 // static 00305 void SampleUtil::copyWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc, 00306 CSAMPLE gain, int iNumSamples) { 00307 if (pDest == pSrc) { 00308 return applyGain(pDest, gain, iNumSamples); 00309 } 00310 if (gain == 1.0f) { 00311 memcpy(pDest, pSrc, sizeof(pDest[0]) * iNumSamples); 00312 return; 00313 } 00314 if (gain == 0.0f) { 00315 memset(pDest, 0, sizeof(pDest[0]) * iNumSamples); 00316 return; 00317 } 00318 00319 if (m_sOptimizationsOn) { 00320 return sseCopyWithGain(pDest, pSrc, gain, iNumSamples); 00321 } 00322 00323 for (int i = 0; i < iNumSamples; ++i) { 00324 pDest[i] = pSrc[i] * gain; 00325 } 00326 00327 // OR! need to test which fares better 00328 // memcpy(pDest, pSrc, sizeof(pDest[0]) * iNumSamples); 00329 // applyGain(pDest, gain); 00330 } 00331 00332 // static 00333 void SampleUtil::sseCopyWithGain(CSAMPLE* pDest, const CSAMPLE* pSrc, 00334 CSAMPLE gain, int iNumSamples) { 00335 #ifdef __SSE__ 00336 assert_aligned(pDest); 00337 assert_aligned(pSrc); 00338 __m128 vSrcSamples; 00339 __m128 vGain = _mm_set1_ps(gain); 00340 while (iNumSamples >= 4) { 00341 vSrcSamples = _mm_loadu_ps(pSrc); 00342 vSrcSamples = _mm_mul_ps(vSrcSamples, vGain); 00343 _mm_store_ps(pDest, vSrcSamples); 00344 iNumSamples -= 4; 00345 pDest += 4; 00346 pSrc += 4; 00347 } 00348 if (iNumSamples > 0) { 00349 qDebug() << "Not div by 4"; 00350 } 00351 while (iNumSamples > 0) { 00352 *pDest = *pSrc * gain; 00353 pDest++; 00354 pSrc++; 00355 iNumSamples--; 00356 } 00357 #endif 00358 } 00359 00360 00361 // static 00362 void SampleUtil::copy2WithGain(CSAMPLE* pDest, 00363 const CSAMPLE* pSrc1, CSAMPLE gain1, 00364 const CSAMPLE* pSrc2, CSAMPLE gain2, 00365 int iNumSamples) { 00366 if (gain1 == 0.0f) { 00367 return copyWithGain(pDest, pSrc2, gain2, iNumSamples); 00368 } 00369 if (gain2 == 0.0f) { 00370 return copyWithGain(pDest, pSrc1, gain1, iNumSamples); 00371 } 00372 if (m_sOptimizationsOn) { 00373 return sseCopy2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples); 00374 } 00375 00376 for (int i = 0; i < iNumSamples; ++i) { 00377 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2; 00378 } 00379 } 00380 00381 // static 00382 void SampleUtil::sseCopy2WithGain(CSAMPLE* pDest, 00383 const CSAMPLE* pSrc1, CSAMPLE gain1, 00384 const CSAMPLE* pSrc2, CSAMPLE gain2, 00385 int iNumSamples) { 00386 #ifdef __SSE__ 00387 assert_aligned(pDest); 00388 assert_aligned(pSrc1); 00389 assert_aligned(pSrc2); 00390 __m128 vSrc1Samples; 00391 __m128 vSrc2Samples; 00392 __m128 vGain1 = _mm_set1_ps(gain1); 00393 __m128 vGain2 = _mm_set1_ps(gain2); 00394 while (iNumSamples >= 4) { 00395 vSrc1Samples = _mm_loadu_ps(pSrc1); 00396 vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1); 00397 vSrc2Samples = _mm_loadu_ps(pSrc2); 00398 vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2); 00399 _mm_store_ps(pDest, _mm_add_ps(vSrc1Samples, vSrc2Samples)); 00400 iNumSamples -= 4; 00401 pDest += 4; 00402 pSrc1 += 4; 00403 pSrc2 += 4; 00404 } 00405 if (iNumSamples > 0) { 00406 qDebug() << "Not div by 4"; 00407 } 00408 while (iNumSamples > 0) { 00409 *pDest = *pSrc1 * gain1 + *pSrc2 * gain2; 00410 pDest++; 00411 pSrc1++; 00412 pSrc2++; 00413 iNumSamples--; 00414 } 00415 #endif 00416 } 00417 00418 // static 00419 void SampleUtil::copy3WithGain(CSAMPLE* pDest, 00420 const CSAMPLE* pSrc1, CSAMPLE gain1, 00421 const CSAMPLE* pSrc2, CSAMPLE gain2, 00422 const CSAMPLE* pSrc3, CSAMPLE gain3, 00423 int iNumSamples) { 00424 if (gain1 == 0.0f) { 00425 return copy2WithGain(pDest, pSrc2, gain2, pSrc3, gain3, iNumSamples); 00426 } 00427 if (gain2 == 0.0f) { 00428 return copy2WithGain(pDest, pSrc1, gain1, pSrc3, gain3, iNumSamples); 00429 } 00430 if (gain3 == 0.0f) { 00431 return copy2WithGain(pDest, pSrc1, gain1, pSrc2, gain2, iNumSamples); 00432 } 00433 if (m_sOptimizationsOn) { 00434 return sseCopy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, 00435 pSrc3, gain3, iNumSamples); 00436 } 00437 00438 for (int i = 0; i < iNumSamples; ++i) { 00439 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3; 00440 } 00441 } 00442 00443 // static 00444 void SampleUtil::sseCopy3WithGain(CSAMPLE* pDest, 00445 const CSAMPLE* pSrc1, CSAMPLE gain1, 00446 const CSAMPLE* pSrc2, CSAMPLE gain2, 00447 const CSAMPLE* pSrc3, CSAMPLE gain3, 00448 int iNumSamples) { 00449 #ifdef __SSE__ 00450 assert_aligned(pDest); 00451 assert_aligned(pSrc1); 00452 assert_aligned(pSrc2); 00453 assert_aligned(pSrc3); 00454 __m128 vSrc1Samples; 00455 __m128 vSrc2Samples; 00456 __m128 vSrc3Samples; 00457 __m128 vGain1 = _mm_set1_ps(gain1); 00458 __m128 vGain2 = _mm_set1_ps(gain2); 00459 __m128 vGain3 = _mm_set1_ps(gain3); 00460 while (iNumSamples >= 4) { 00461 vSrc1Samples = _mm_loadu_ps(pSrc1); 00462 vSrc1Samples = _mm_mul_ps(vSrc1Samples, vGain1); 00463 vSrc2Samples = _mm_loadu_ps(pSrc2); 00464 vSrc2Samples = _mm_mul_ps(vSrc2Samples, vGain2); 00465 vSrc3Samples = _mm_loadu_ps(pSrc3); 00466 vSrc3Samples = _mm_mul_ps(vSrc3Samples, vGain3); 00467 00468 vSrc1Samples = _mm_add_ps(vSrc1Samples, vSrc2Samples); 00469 vSrc1Samples = _mm_add_ps(vSrc1Samples, vSrc3Samples); 00470 _mm_store_ps(pDest, vSrc1Samples); 00471 iNumSamples -= 4; 00472 pDest += 4; 00473 pSrc1 += 4; 00474 pSrc2 += 4; 00475 pSrc3 += 4; 00476 } 00477 if (iNumSamples > 0) { 00478 qDebug() << "Not div by 4"; 00479 } 00480 while (iNumSamples > 0) { 00481 *pDest = *pSrc1 * gain1 + *pSrc2 * gain2 + *pSrc3 * gain3; 00482 pDest++; 00483 pSrc1++; 00484 pSrc2++; 00485 pSrc3++; 00486 iNumSamples--; 00487 } 00488 #endif 00489 } 00490 00491 // static 00492 void SampleUtil::copy4WithGain(CSAMPLE* pDest, 00493 const CSAMPLE* pSrc1, CSAMPLE gain1, 00494 const CSAMPLE* pSrc2, CSAMPLE gain2, 00495 const CSAMPLE* pSrc3, CSAMPLE gain3, 00496 const CSAMPLE* pSrc4, CSAMPLE gain4, 00497 int iNumSamples) { 00498 if (gain1 == 0.0f) { 00499 return copy3WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, iNumSamples); 00500 } 00501 if (gain2 == 0.0f) { 00502 return copy3WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, iNumSamples); 00503 } 00504 if (gain3 == 0.0f) { 00505 return copy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, iNumSamples); 00506 } 00507 if (gain4 == 0.0f) { 00508 return copy3WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, iNumSamples); 00509 } 00510 if (m_sOptimizationsOn) { 00511 // TODO(rryan) implement SSE for this? worth it? 00512 } 00513 for (int i = 0; i < iNumSamples; ++i) { 00514 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + pSrc4[i] * gain4; 00515 } 00516 } 00517 00518 // static 00519 void SampleUtil::copy5WithGain(CSAMPLE* pDest, 00520 const CSAMPLE* pSrc1, CSAMPLE gain1, 00521 const CSAMPLE* pSrc2, CSAMPLE gain2, 00522 const CSAMPLE* pSrc3, CSAMPLE gain3, 00523 const CSAMPLE* pSrc4, CSAMPLE gain4, 00524 const CSAMPLE* pSrc5, CSAMPLE gain5, 00525 int iNumSamples) { 00526 if (gain1 == 0.0f) { 00527 return copy4WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, pSrc5, gain5, iNumSamples); 00528 } 00529 if (gain2 == 0.0f) { 00530 return copy4WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, pSrc5, gain5, iNumSamples); 00531 } 00532 if (gain3 == 0.0f) { 00533 return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, pSrc5, gain5, iNumSamples); 00534 } 00535 if (gain4 == 0.0f) { 00536 return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, pSrc5, gain5, iNumSamples); 00537 } 00538 if (gain5 == 0.0f) { 00539 return copy4WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, iNumSamples); 00540 } 00541 00542 if (m_sOptimizationsOn) { 00543 // TODO(rryan) implement SSE for this? worth it? 00544 } 00545 00546 for (int i = 0; i < iNumSamples; ++i) { 00547 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + pSrc4[i] * gain4 + pSrc5[i] * gain5; 00548 } 00549 } 00550 00551 // static 00552 void SampleUtil::copy6WithGain(CSAMPLE* pDest, 00553 const CSAMPLE* pSrc1, CSAMPLE gain1, 00554 const CSAMPLE* pSrc2, CSAMPLE gain2, 00555 const CSAMPLE* pSrc3, CSAMPLE gain3, 00556 const CSAMPLE* pSrc4, CSAMPLE gain4, 00557 const CSAMPLE* pSrc5, CSAMPLE gain5, 00558 const CSAMPLE* pSrc6, CSAMPLE gain6, 00559 int iNumSamples) { 00560 if (gain1 == 0.0f) { 00561 return copy5WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, 00562 pSrc5, gain5, pSrc6, gain6, iNumSamples); 00563 } 00564 if (gain2 == 0.0f) { 00565 return copy5WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, 00566 pSrc5, gain5, pSrc6, gain6, iNumSamples); 00567 } 00568 if (gain3 == 0.0f) { 00569 return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, 00570 pSrc5, gain5, pSrc6, gain6, iNumSamples); 00571 } 00572 if (gain4 == 0.0f) { 00573 return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00574 pSrc5, gain5, pSrc6, gain6, iNumSamples); 00575 } 00576 if (gain5 == 0.0f) { 00577 return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00578 pSrc4, gain4, pSrc6, gain6, iNumSamples); 00579 } 00580 if (gain6 == 0.0f) { 00581 return copy5WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00582 pSrc4, gain4, pSrc5, gain5, iNumSamples); 00583 } 00584 if (m_sOptimizationsOn) { 00585 // TODO(rryan) implement SSE for this? worth it? 00586 } 00587 for (int i = 0; i < iNumSamples; ++i) { 00588 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + 00589 pSrc4[i] * gain4 + pSrc5[i] * gain5 + pSrc6[i] * gain6; 00590 } 00591 } 00592 00593 // static 00594 void SampleUtil::copy7WithGain(CSAMPLE* pDest, 00595 const CSAMPLE* pSrc1, CSAMPLE gain1, 00596 const CSAMPLE* pSrc2, CSAMPLE gain2, 00597 const CSAMPLE* pSrc3, CSAMPLE gain3, 00598 const CSAMPLE* pSrc4, CSAMPLE gain4, 00599 const CSAMPLE* pSrc5, CSAMPLE gain5, 00600 const CSAMPLE* pSrc6, CSAMPLE gain6, 00601 const CSAMPLE* pSrc7, CSAMPLE gain7, 00602 int iNumSamples) { 00603 if (gain1 == 0.0f) { 00604 return copy6WithGain(pDest, pSrc2, gain2, pSrc3, gain3, pSrc4, gain4, 00605 pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples); 00606 } 00607 if (gain2 == 0.0f) { 00608 return copy6WithGain(pDest, pSrc1, gain1, pSrc3, gain3, pSrc4, gain4, 00609 pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples); 00610 } 00611 if (gain3 == 0.0f) { 00612 return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc4, gain4, 00613 pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples); 00614 } 00615 if (gain4 == 0.0f) { 00616 return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00617 pSrc5, gain5, pSrc6, gain6, pSrc7, gain7, iNumSamples); 00618 } 00619 if (gain5 == 0.0f) { 00620 return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00621 pSrc4, gain4, pSrc6, gain6, pSrc7, gain7, iNumSamples); 00622 } 00623 if (gain6 == 0.0f) { 00624 return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00625 pSrc4, gain4, pSrc5, gain5, pSrc7, gain7, iNumSamples); 00626 } 00627 if (gain7 == 0.0f) { 00628 return copy6WithGain(pDest, pSrc1, gain1, pSrc2, gain2, pSrc3, gain3, 00629 pSrc4, gain4, pSrc5, gain5, pSrc6, gain6, iNumSamples); 00630 } 00631 if (m_sOptimizationsOn) { 00632 // TODO(rryan) implement SSE for this? worth it? 00633 } 00634 for (int i = 0; i < iNumSamples; ++i) { 00635 pDest[i] = pSrc1[i] * gain1 + pSrc2[i] * gain2 + pSrc3[i] * gain3 + 00636 pSrc4[i] * gain4 + pSrc5[i] * gain5 + pSrc6[i] * gain6 + pSrc7[i] * gain7; 00637 } 00638 } 00639 00640 // static 00641 void SampleUtil::convert(CSAMPLE* pDest, const SAMPLE* pSrc, 00642 int iNumSamples) { 00643 if (m_sOptimizationsOn) { 00644 return sseConvert(pDest, pSrc, iNumSamples); 00645 } 00646 00647 for (int i = 0; i < iNumSamples; ++i) { 00648 pDest[i] = pSrc[i]; 00649 } 00650 } 00651 00652 // static 00653 void SampleUtil::sseConvert(CSAMPLE* pDest, const SAMPLE* pSrc, 00654 int iNumSamples) { 00655 #ifdef __SSE__ 00656 assert_aligned(pDest); 00657 assert_aligned(pSrc); 00658 __m64 vSrcSamples; 00659 __m128 vDestSamples; 00660 while (iNumSamples >= 4) { 00661 vSrcSamples = *((__m64*)pSrc); // ???? 00662 vDestSamples = _mm_cvtpi16_ps(vSrcSamples); 00663 00664 // WTF _mm_cvtpi16_ps randomly shuffles the high 32-bits to the low 00665 // 32-bits. We have to shuffle them back to normal order. 00666 // 0,1,2,3 in produces 2,3,0,1 00667 vDestSamples = _mm_shuffle_ps(vDestSamples, vDestSamples, _MM_SHUFFLE(1,0,3,2)); 00668 _mm_store_ps(pDest, vDestSamples); 00669 00670 iNumSamples -= 4; 00671 pDest += 4; 00672 pSrc += 4; 00673 } 00674 if (iNumSamples > 0) { 00675 qDebug() << "Not div by 4"; 00676 } 00677 while (iNumSamples > 0) { 00678 *pDest = *pSrc; 00679 pDest++; 00680 pSrc++; 00681 iNumSamples--; 00682 } 00683 #endif 00684 } 00685 00686 // static 00687 void SampleUtil::sumAbsPerChannel(CSAMPLE* pfAbsL, CSAMPLE* pfAbsR, 00688 const CSAMPLE* pBuffer, int iNumSamples) { 00689 Q_ASSERT(iNumSamples % 2 == 0); 00690 if (m_sOptimizationsOn) { 00691 return sseSumAbsPerChannel(pfAbsL, pfAbsR, pBuffer, iNumSamples); 00692 } 00693 00694 CSAMPLE fAbsL = 0.0f; 00695 CSAMPLE fAbsR = 0.0f; 00696 00697 for (int i = 0; i < iNumSamples; i += 2) { 00698 fAbsL += fabs(pBuffer[i]); 00699 fAbsR += fabs(pBuffer[i+1]); 00700 } 00701 00702 *pfAbsL = fAbsL; 00703 *pfAbsR = fAbsR; 00704 } 00705 00706 void SampleUtil::sseSumAbsPerChannel(CSAMPLE* pfAbsL, CSAMPLE* pfAbsR, 00707 const CSAMPLE* pBuffer, int iNumSamples) { 00708 #ifdef __SSE__ 00709 assert_aligned(pBuffer); 00710 CSAMPLE fAbsL = 0.0f; 00711 CSAMPLE fAbsR = 0.0f; 00712 00713 __m128 vSrcSamples; 00714 __m128 vSum = _mm_setzero_ps(); 00715 // This mask will clear an IEEE754 float's sign bit 00716 static _ALIGN_16 int32_t l_bitmask[] = 00717 {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; 00718 const __m128 vSignMask = _mm_load_ps((float*)l_bitmask); 00719 00720 while (iNumSamples >= 4) { 00721 vSrcSamples = _mm_loadu_ps(pBuffer); 00722 vSrcSamples = _mm_and_ps(vSrcSamples, vSignMask); 00723 vSum = _mm_add_ps(vSum, vSrcSamples); 00724 iNumSamples -= 4; 00725 pBuffer += 4; 00726 } 00727 00728 _ALIGN_16 CSAMPLE result[4]; 00729 assert_aligned(result); 00730 _mm_store_ps(result, vSum); 00731 fAbsL = result[0] + result[2]; 00732 fAbsR = result[1] + result[3]; 00733 if (iNumSamples > 0) { 00734 qDebug() << "Not div by 4"; 00735 } 00736 while (iNumSamples >= 2) { 00737 fAbsL += fabs(*pBuffer++); 00738 fAbsR += fabs(*pBuffer++); 00739 iNumSamples -= 2; 00740 } 00741 00742 *pfAbsL = fAbsL; 00743 *pfAbsR = fAbsR; 00744 #endif 00745 } 00746 00747 // static 00748 bool SampleUtil::isOutsideRange(CSAMPLE fMax, CSAMPLE fMin, 00749 const CSAMPLE* pBuffer, int iNumSamples) { 00750 if (m_sOptimizationsOn) { 00751 return sseIsOutsideRange(fMax, fMin, pBuffer, iNumSamples); 00752 } 00753 00754 for (int i = 0; i < iNumSamples; ++i) { 00755 CSAMPLE sample = pBuffer[i]; 00756 if (sample > fMax) { 00757 return true; 00758 } else if (sample < fMin) { 00759 return true; 00760 } 00761 } 00762 return false; 00763 } 00764 00765 // static 00766 bool SampleUtil::sseIsOutsideRange(CSAMPLE fMax, CSAMPLE fMin, 00767 const CSAMPLE* pBuffer, int iNumSamples) { 00768 #ifdef __SSE__ 00769 assert_aligned(pBuffer); 00770 __m128 vSrcSamples; 00771 __m128 vClamped = _mm_setzero_ps(); 00772 __m128 vMax = _mm_set1_ps(fMax); 00773 __m128 vMin = _mm_set1_ps(fMin); 00774 while (iNumSamples >= 4) { 00775 vSrcSamples = _mm_loadu_ps(pBuffer); 00776 vClamped = _mm_or_ps(vClamped, _mm_cmplt_ps(vSrcSamples, vMin)); 00777 vClamped = _mm_or_ps(vClamped, _mm_cmpgt_ps(vSrcSamples, vMax)); 00778 iNumSamples -= 4; 00779 pBuffer += 4; 00780 } 00781 _ALIGN_16 CSAMPLE clamp[4]; 00782 assert_aligned(clamp); 00783 _mm_store_ps(clamp, vClamped); 00784 if (clamp[0] != 0 || clamp[1] != 0 || 00785 clamp[2] != 0 || clamp[3] != 0) { 00786 return true; 00787 } 00788 if (iNumSamples > 0) { 00789 qDebug() << "Not div by 4"; 00790 } 00791 while (iNumSamples > 0) { 00792 CSAMPLE sample = *pBuffer; 00793 if (sample > fMax) { 00794 return true; 00795 } else if (sample < fMin) { 00796 return true; 00797 } 00798 pBuffer++; 00799 iNumSamples--; 00800 } 00801 #endif 00802 return false; 00803 } 00804 00805 // static 00806 bool SampleUtil::copyClampBuffer(CSAMPLE fMax, CSAMPLE fMin, 00807 CSAMPLE* pDest, const CSAMPLE* pSrc, 00808 int iNumSamples) { 00809 if (m_sOptimizationsOn) { 00810 return sseCopyClampBuffer(fMax, fMin, pDest, pSrc, iNumSamples); 00811 } 00812 00813 bool clamped = false; 00814 if (pSrc == pDest) { 00815 for (int i = 0; i < iNumSamples; ++i) { 00816 CSAMPLE sample = pSrc[i]; 00817 if (sample > fMax) { 00818 clamped = true; 00819 pDest[i] = fMax; 00820 } else if (sample < fMin) { 00821 clamped = true; 00822 pDest[i] = fMin; 00823 } 00824 } 00825 } else { 00826 for (int i = 0; i < iNumSamples; ++i) { 00827 CSAMPLE sample = pSrc[i]; 00828 if (sample > fMax) { 00829 sample = fMax; 00830 clamped = true; 00831 } else if (sample < fMin) { 00832 sample = fMin; 00833 clamped = true; 00834 } 00835 pDest[i] = sample; 00836 } 00837 } 00838 return clamped; 00839 } 00840 00841 // static 00842 bool SampleUtil::sseCopyClampBuffer(CSAMPLE fMax, CSAMPLE fMin, 00843 CSAMPLE* pDest, const CSAMPLE* pSrc, 00844 int iNumSamples) { 00845 bool clamped = false; 00846 #ifdef __SSE__ 00847 assert_aligned(pDest); 00848 assert_aligned(pSrc); 00849 __m128 vSrcSamples; 00850 __m128 vClamped = _mm_setzero_ps(); 00851 __m128 vMax = _mm_set1_ps(fMax); 00852 __m128 vMin = _mm_set1_ps(fMin); 00853 while (iNumSamples >= 4) { 00854 vSrcSamples = _mm_loadu_ps(pSrc); 00855 vClamped = _mm_or_ps(vClamped, _mm_cmplt_ps(vSrcSamples, vMin)); 00856 vClamped = _mm_or_ps(vClamped, _mm_cmpgt_ps(vSrcSamples, vMax)); 00857 vSrcSamples = _mm_max_ps(vSrcSamples, vMin); 00858 vSrcSamples = _mm_min_ps(vSrcSamples, vMax); 00859 _mm_store_ps(pDest, vSrcSamples); 00860 iNumSamples -= 4; 00861 pDest += 4; 00862 pSrc += 4; 00863 } 00864 _ALIGN_16 CSAMPLE clamp[4]; 00865 assert_aligned(clamp); 00866 _mm_store_ps(clamp, vClamped); 00867 if (clamp[0] != 0 || clamp[1] != 0 || 00868 clamp[2] != 0 || clamp[3] != 0) { 00869 clamped = true; 00870 } 00871 if (iNumSamples > 0) { 00872 qDebug() << "Not div by 4"; 00873 } 00874 while (iNumSamples > 0) { 00875 CSAMPLE sample = *pSrc; 00876 if (sample > fMax) { 00877 sample = fMax; 00878 clamped = true; 00879 } else if (sample < fMin) { 00880 sample = fMax; 00881 clamped = true; 00882 } 00883 *pDest = sample; 00884 pDest++; 00885 pSrc++; 00886 iNumSamples--; 00887 } 00888 #endif 00889 return clamped; 00890 } 00891 00892 // static 00893 void SampleUtil::interleaveBuffer(CSAMPLE* pDest, 00894 const CSAMPLE* pSrc1, const CSAMPLE* pSrc2, 00895 int iNumSamples) { 00896 if (m_sOptimizationsOn) { 00897 return sseInterleaveBuffer(pDest, pSrc1, pSrc2, iNumSamples); 00898 } 00899 00900 for (int i = 0; i < iNumSamples; ++i) { 00901 pDest[2*i] = pSrc1[i]; 00902 pDest[2*i+1] = pSrc2[i]; 00903 } 00904 } 00905 00906 // static 00907 void SampleUtil::sseInterleaveBuffer(CSAMPLE* pDest, 00908 const CSAMPLE* pSrc1, const CSAMPLE* pSrc2, 00909 int iNumSamples) { 00910 #ifdef __SSE__ 00911 assert_aligned(pDest); 00912 assert_aligned(pSrc1); 00913 assert_aligned(pSrc2); 00914 __m128 vSrc1Samples; 00915 __m128 vSrc2Samples; 00916 __m128 vLow; 00917 __m128 vHigh; 00918 while (iNumSamples >= 4) { 00919 vSrc1Samples = _mm_loadu_ps(pSrc1); 00920 vSrc2Samples = _mm_loadu_ps(pSrc2); 00921 // vSrc1Samples is l1,l2,l3,l4 00922 // vSrc2Samples is r1,r2,r3,r4 00923 vLow = _mm_unpacklo_ps(vSrc1Samples, vSrc2Samples); 00924 // vLow is l1,r1,l2,r2 00925 vHigh = _mm_unpackhi_ps(vSrc1Samples, vSrc2Samples); 00926 // vHigh is l3,r3,l4,r4 00927 _mm_store_ps(pDest, vLow); 00928 _mm_store_ps(pDest+4, vHigh); 00929 iNumSamples -= 4; 00930 pSrc1 += 4; 00931 pSrc2 += 4; 00932 pDest += 8; 00933 } 00934 while (iNumSamples > 0) { 00935 *pDest++ = *pSrc1++; 00936 *pDest++ = *pSrc2++; 00937 iNumSamples--; 00938 } 00939 #endif 00940 } 00941 00942 00943 // static 00944 void SampleUtil::deinterleaveBuffer(CSAMPLE* pDest1, CSAMPLE* pDest2, 00945 const CSAMPLE* pSrc, int iNumSamples) { 00946 if (m_sOptimizationsOn) { 00947 return sseDeinterleaveBuffer(pDest1, pDest2, pSrc, iNumSamples); 00948 } 00949 00950 for (int i = 0; i < iNumSamples; ++i) { 00951 pDest1[i] = pSrc[i*2]; 00952 pDest2[i] = pSrc[i*2+1]; 00953 } 00954 } 00955 00956 // static 00957 void SampleUtil::sseDeinterleaveBuffer(CSAMPLE* pDest1, CSAMPLE* pDest2, 00958 const CSAMPLE* pSrc, int iNumSamples) { 00959 #ifdef __SSE__ 00960 assert_aligned(pDest1); 00961 assert_aligned(pDest2); 00962 assert_aligned(pSrc); 00963 __m128 vSrc1Samples; 00964 __m128 vSrc2Samples; 00965 __m128 vDst1Samples; 00966 __m128 vDst2Samples; 00967 while (iNumSamples >= 4) { 00968 vSrc1Samples = _mm_loadu_ps(pSrc); 00969 vSrc2Samples = _mm_loadu_ps(pSrc+4); 00970 // vSrc1Samples is l1,r1,l2,r2 00971 // vSrc2Samples is l3,r3,l4,r4 00972 00973 // First shuffle the middle elements of both. 00974 vSrc1Samples = _mm_shuffle_ps(vSrc1Samples, vSrc1Samples, 00975 _MM_SHUFFLE(3, 1, 2, 0)); 00976 //_MM_SHUFFLE(0, 2, 1, 3)); 00977 vSrc2Samples = _mm_shuffle_ps(vSrc2Samples, vSrc2Samples, 00978 _MM_SHUFFLE(3, 1, 2, 0)); 00979 //_MM_SHUFFLE(0, 2, 1, 3)); 00980 // vSrc1Samples is now l1,l2,r1,r2 00981 // vSrc2Samples is now l3,l4,r3,r4 00982 00983 // Now move the low half of src2 into the high of src1 to make dst1. To 00984 // make dst2, move the high half of src1 into the low half of src2. 00985 vDst1Samples = _mm_movelh_ps(vSrc1Samples, vSrc2Samples); 00986 vDst2Samples = _mm_movehl_ps(vSrc2Samples, vSrc1Samples); 00987 // vDst1Samples is now l1,l2,l3,l4 00988 // vDst2Samples is now r1,r2,r3,r4 00989 00990 _mm_store_ps(pDest1, vDst1Samples); 00991 _mm_store_ps(pDest2, vDst2Samples); 00992 iNumSamples -= 4; 00993 pSrc += 8; 00994 pDest1 += 4; 00995 pDest2 += 4; 00996 } 00997 while (iNumSamples > 0) { 00998 *pDest1++ = *pSrc++; 00999 *pDest2++ = *pSrc++; 01000 iNumSamples--; 01001 } 01002 #endif 01003 } 01004 01005 void SampleUtil::setOptimizations(bool opt) { 01006 qDebug() << "Opts" << opt; 01007 m_sOptimizationsOn = opt; 01008 }