1 /* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "config.h" 30 31 #if ENABLE(WEB_AUDIO) 32 33 #include "platform/audio/SincResampler.h" 34 35 #include "platform/audio/AudioBus.h" 36 #include "wtf/MathExtras.h" 37 38 #ifdef __SSE2__ 39 #include <emmintrin.h> 40 #endif 41 42 using namespace std; 43 44 // Input buffer layout, dividing the total buffer into regions (r0 - r5): 45 // 46 // |----------------|----------------------------------------------------------------|----------------| 47 // 48 // blockSize + kernelSize / 2 49 // <--------------------------------------------------------------------------------> 50 // r0 51 // 52 // kernelSize / 2 kernelSize / 2 kernelSize / 2 kernelSize / 2 53 // <---------------> <---------------> <---------------> <---------------> 54 // r1 r2 r3 r4 55 // 56 // blockSize 57 // <--------------------------------------------------------------> 58 // r5 59 60 // The Algorithm: 61 // 62 // 1) Consume input frames into r0 (r1 is zero-initialized). 63 // 2) Position kernel centered at start of r0 (r2) and generate output frames until kernel is centered at start of r4. 64 // or we've finished generating all the output frames. 65 // 3) Copy r3 to r1 and r4 to r2. 66 // 4) Consume input frames into r5 (zero-pad if we run out of input). 67 // 5) Goto (2) until all of input is consumed. 68 // 69 // note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc. 70 71 namespace WebCore { 72 73 SincResampler::SincResampler(double scaleFactor, unsigned kernelSize, unsigned numberOfKernelOffsets) 74 : m_scaleFactor(scaleFactor) 75 , m_kernelSize(kernelSize) 76 , m_numberOfKernelOffsets(numberOfKernelOffsets) 77 , m_kernelStorage(m_kernelSize * (m_numberOfKernelOffsets + 1)) 78 , m_virtualSourceIndex(0) 79 , m_blockSize(512) 80 , m_inputBuffer(m_blockSize + m_kernelSize) // See input buffer layout above. 81 , m_source(0) 82 , m_sourceFramesAvailable(0) 83 , m_sourceProvider(0) 84 , m_isBufferPrimed(false) 85 { 86 initializeKernel(); 87 } 88 89 void SincResampler::initializeKernel() 90 { 91 // Blackman window parameters. 92 double alpha = 0.16; 93 double a0 = 0.5 * (1.0 - alpha); 94 double a1 = 0.5; 95 double a2 = 0.5 * alpha; 96 97 // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter. 98 double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0; 99 100 // The sinc function is an idealized brick-wall filter, but since we're windowing it the 101 // transition from pass to stop does not happen right away. So we should adjust the 102 // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end. 103 // FIXME: this value is empirical and to be more exact should vary depending on m_kernelSize. 104 sincScaleFactor *= 0.9; 105 106 int n = m_kernelSize; 107 int halfSize = n / 2; 108 109 // Generates a set of windowed sinc() kernels. 110 // We generate a range of sub-sample offsets from 0.0 to 1.0. 111 for (unsigned offsetIndex = 0; offsetIndex <= m_numberOfKernelOffsets; ++offsetIndex) { 112 double subsampleOffset = static_cast<double>(offsetIndex) / m_numberOfKernelOffsets; 113 114 for (int i = 0; i < n; ++i) { 115 // Compute the sinc() with offset. 116 double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset); 117 double sinc = !s ? 1.0 : sin(s) / s; 118 sinc *= sincScaleFactor; 119 120 // Compute Blackman window, matching the offset of the sinc(). 121 double x = (i - subsampleOffset) / n; 122 double window = a0 - a1 * cos(2.0 * piDouble * x) + a2 * cos(4.0 * piDouble * x); 123 124 // Window the sinc() function and store at the correct offset. 125 m_kernelStorage[i + offsetIndex * m_kernelSize] = sinc * window; 126 } 127 } 128 } 129 130 void SincResampler::consumeSource(float* buffer, unsigned numberOfSourceFrames) 131 { 132 ASSERT(m_sourceProvider); 133 if (!m_sourceProvider) 134 return; 135 136 // Wrap the provided buffer by an AudioBus for use by the source provider. 137 RefPtr<AudioBus> bus = AudioBus::create(1, numberOfSourceFrames, false); 138 139 // FIXME: Find a way to make the following const-correct: 140 bus->setChannelMemory(0, buffer, numberOfSourceFrames); 141 142 m_sourceProvider->provideInput(bus.get(), numberOfSourceFrames); 143 } 144 145 namespace { 146 147 // BufferSourceProvider is an AudioSourceProvider wrapping an in-memory buffer. 148 149 class BufferSourceProvider : public AudioSourceProvider { 150 public: 151 BufferSourceProvider(const float* source, size_t numberOfSourceFrames) 152 : m_source(source) 153 , m_sourceFramesAvailable(numberOfSourceFrames) 154 { 155 } 156 157 // Consumes samples from the in-memory buffer. 158 virtual void provideInput(AudioBus* bus, size_t framesToProcess) 159 { 160 ASSERT(m_source && bus); 161 if (!m_source || !bus) 162 return; 163 164 float* buffer = bus->channel(0)->mutableData(); 165 166 // Clamp to number of frames available and zero-pad. 167 size_t framesToCopy = min(m_sourceFramesAvailable, framesToProcess); 168 memcpy(buffer, m_source, sizeof(float) * framesToCopy); 169 170 // Zero-pad if necessary. 171 if (framesToCopy < framesToProcess) 172 memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy)); 173 174 m_sourceFramesAvailable -= framesToCopy; 175 m_source += framesToCopy; 176 } 177 178 private: 179 const float* m_source; 180 size_t m_sourceFramesAvailable; 181 }; 182 183 } // namespace 184 185 void SincResampler::process(const float* source, float* destination, unsigned numberOfSourceFrames) 186 { 187 // Resample an in-memory buffer using an AudioSourceProvider. 188 BufferSourceProvider sourceProvider(source, numberOfSourceFrames); 189 190 unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / m_scaleFactor); 191 unsigned remaining = numberOfDestinationFrames; 192 193 while (remaining) { 194 unsigned framesThisTime = min(remaining, m_blockSize); 195 process(&sourceProvider, destination, framesThisTime); 196 197 destination += framesThisTime; 198 remaining -= framesThisTime; 199 } 200 } 201 202 void SincResampler::process(AudioSourceProvider* sourceProvider, float* destination, size_t framesToProcess) 203 { 204 bool isGood = sourceProvider && m_blockSize > m_kernelSize && m_inputBuffer.size() >= m_blockSize + m_kernelSize && !(m_kernelSize % 2); 205 ASSERT(isGood); 206 if (!isGood) 207 return; 208 209 m_sourceProvider = sourceProvider; 210 211 unsigned numberOfDestinationFrames = framesToProcess; 212 213 // Setup various region pointers in the buffer (see diagram above). 214 float* r0 = m_inputBuffer.data() + m_kernelSize / 2; 215 float* r1 = m_inputBuffer.data(); 216 float* r2 = r0; 217 float* r3 = r0 + m_blockSize - m_kernelSize / 2; 218 float* r4 = r0 + m_blockSize; 219 float* r5 = r0 + m_kernelSize / 2; 220 221 // Step (1) 222 // Prime the input buffer at the start of the input stream. 223 if (!m_isBufferPrimed) { 224 consumeSource(r0, m_blockSize + m_kernelSize / 2); 225 m_isBufferPrimed = true; 226 } 227 228 // Step (2) 229 230 while (numberOfDestinationFrames) { 231 while (m_virtualSourceIndex < m_blockSize) { 232 // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are. 233 int sourceIndexI = static_cast<int>(m_virtualSourceIndex); 234 double subsampleRemainder = m_virtualSourceIndex - sourceIndexI; 235 236 double virtualOffsetIndex = subsampleRemainder * m_numberOfKernelOffsets; 237 int offsetIndex = static_cast<int>(virtualOffsetIndex); 238 239 float* k1 = m_kernelStorage.data() + offsetIndex * m_kernelSize; 240 float* k2 = k1 + m_kernelSize; 241 242 // Initialize input pointer based on quantized m_virtualSourceIndex. 243 float* inputP = r1 + sourceIndexI; 244 245 // We'll compute "convolutions" for the two kernels which straddle m_virtualSourceIndex 246 float sum1 = 0; 247 float sum2 = 0; 248 249 // Figure out how much to weight each kernel's "convolution". 250 double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex; 251 252 // Generate a single output sample. 253 int n = m_kernelSize; 254 255 #define CONVOLVE_ONE_SAMPLE \ 256 input = *inputP++; \ 257 sum1 += input * *k1; \ 258 sum2 += input * *k2; \ 259 ++k1; \ 260 ++k2; 261 262 { 263 float input; 264 265 #ifdef __SSE2__ 266 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately. 267 while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) { 268 CONVOLVE_ONE_SAMPLE 269 n--; 270 } 271 272 // Now the inputP is aligned and start to apply SSE. 273 float* endP = inputP + n - n % 4; 274 __m128 mInput; 275 __m128 mK1; 276 __m128 mK2; 277 __m128 mul1; 278 __m128 mul2; 279 280 __m128 sums1 = _mm_setzero_ps(); 281 __m128 sums2 = _mm_setzero_ps(); 282 bool k1Aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F); 283 bool k2Aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F); 284 285 #define LOAD_DATA(l1, l2) \ 286 mInput = _mm_load_ps(inputP); \ 287 mK1 = _mm_##l1##_ps(k1); \ 288 mK2 = _mm_##l2##_ps(k2); 289 290 #define CONVOLVE_4_SAMPLES \ 291 mul1 = _mm_mul_ps(mInput, mK1); \ 292 mul2 = _mm_mul_ps(mInput, mK2); \ 293 sums1 = _mm_add_ps(sums1, mul1); \ 294 sums2 = _mm_add_ps(sums2, mul2); \ 295 inputP += 4; \ 296 k1 += 4; \ 297 k2 += 4; 298 299 if (k1Aligned && k2Aligned) { // both aligned 300 while (inputP < endP) { 301 LOAD_DATA(load, load) 302 CONVOLVE_4_SAMPLES 303 } 304 } else if (!k1Aligned && k2Aligned) { // only k2 aligned 305 while (inputP < endP) { 306 LOAD_DATA(loadu, load) 307 CONVOLVE_4_SAMPLES 308 } 309 } else if (k1Aligned && !k2Aligned) { // only k1 aligned 310 while (inputP < endP) { 311 LOAD_DATA(load, loadu) 312 CONVOLVE_4_SAMPLES 313 } 314 } else { // both non-aligned 315 while (inputP < endP) { 316 LOAD_DATA(loadu, loadu) 317 CONVOLVE_4_SAMPLES 318 } 319 } 320 321 // Summarize the SSE results to sum1 and sum2. 322 float* groupSumP = reinterpret_cast<float*>(&sums1); 323 sum1 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3]; 324 groupSumP = reinterpret_cast<float*>(&sums2); 325 sum2 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3]; 326 327 n %= 4; 328 while (n) { 329 CONVOLVE_ONE_SAMPLE 330 n--; 331 } 332 #else 333 // FIXME: add ARM NEON optimizations for the following. The scalar code-path can probably also be optimized better. 334 335 // Optimize size 32 and size 64 kernels by unrolling the while loop. 336 // A 20 - 30% speed improvement was measured in some cases by using this approach. 337 338 if (n == 32) { 339 CONVOLVE_ONE_SAMPLE // 1 340 CONVOLVE_ONE_SAMPLE // 2 341 CONVOLVE_ONE_SAMPLE // 3 342 CONVOLVE_ONE_SAMPLE // 4 343 CONVOLVE_ONE_SAMPLE // 5 344 CONVOLVE_ONE_SAMPLE // 6 345 CONVOLVE_ONE_SAMPLE // 7 346 CONVOLVE_ONE_SAMPLE // 8 347 CONVOLVE_ONE_SAMPLE // 9 348 CONVOLVE_ONE_SAMPLE // 10 349 CONVOLVE_ONE_SAMPLE // 11 350 CONVOLVE_ONE_SAMPLE // 12 351 CONVOLVE_ONE_SAMPLE // 13 352 CONVOLVE_ONE_SAMPLE // 14 353 CONVOLVE_ONE_SAMPLE // 15 354 CONVOLVE_ONE_SAMPLE // 16 355 CONVOLVE_ONE_SAMPLE // 17 356 CONVOLVE_ONE_SAMPLE // 18 357 CONVOLVE_ONE_SAMPLE // 19 358 CONVOLVE_ONE_SAMPLE // 20 359 CONVOLVE_ONE_SAMPLE // 21 360 CONVOLVE_ONE_SAMPLE // 22 361 CONVOLVE_ONE_SAMPLE // 23 362 CONVOLVE_ONE_SAMPLE // 24 363 CONVOLVE_ONE_SAMPLE // 25 364 CONVOLVE_ONE_SAMPLE // 26 365 CONVOLVE_ONE_SAMPLE // 27 366 CONVOLVE_ONE_SAMPLE // 28 367 CONVOLVE_ONE_SAMPLE // 29 368 CONVOLVE_ONE_SAMPLE // 30 369 CONVOLVE_ONE_SAMPLE // 31 370 CONVOLVE_ONE_SAMPLE // 32 371 } else if (n == 64) { 372 CONVOLVE_ONE_SAMPLE // 1 373 CONVOLVE_ONE_SAMPLE // 2 374 CONVOLVE_ONE_SAMPLE // 3 375 CONVOLVE_ONE_SAMPLE // 4 376 CONVOLVE_ONE_SAMPLE // 5 377 CONVOLVE_ONE_SAMPLE // 6 378 CONVOLVE_ONE_SAMPLE // 7 379 CONVOLVE_ONE_SAMPLE // 8 380 CONVOLVE_ONE_SAMPLE // 9 381 CONVOLVE_ONE_SAMPLE // 10 382 CONVOLVE_ONE_SAMPLE // 11 383 CONVOLVE_ONE_SAMPLE // 12 384 CONVOLVE_ONE_SAMPLE // 13 385 CONVOLVE_ONE_SAMPLE // 14 386 CONVOLVE_ONE_SAMPLE // 15 387 CONVOLVE_ONE_SAMPLE // 16 388 CONVOLVE_ONE_SAMPLE // 17 389 CONVOLVE_ONE_SAMPLE // 18 390 CONVOLVE_ONE_SAMPLE // 19 391 CONVOLVE_ONE_SAMPLE // 20 392 CONVOLVE_ONE_SAMPLE // 21 393 CONVOLVE_ONE_SAMPLE // 22 394 CONVOLVE_ONE_SAMPLE // 23 395 CONVOLVE_ONE_SAMPLE // 24 396 CONVOLVE_ONE_SAMPLE // 25 397 CONVOLVE_ONE_SAMPLE // 26 398 CONVOLVE_ONE_SAMPLE // 27 399 CONVOLVE_ONE_SAMPLE // 28 400 CONVOLVE_ONE_SAMPLE // 29 401 CONVOLVE_ONE_SAMPLE // 30 402 CONVOLVE_ONE_SAMPLE // 31 403 CONVOLVE_ONE_SAMPLE // 32 404 CONVOLVE_ONE_SAMPLE // 33 405 CONVOLVE_ONE_SAMPLE // 34 406 CONVOLVE_ONE_SAMPLE // 35 407 CONVOLVE_ONE_SAMPLE // 36 408 CONVOLVE_ONE_SAMPLE // 37 409 CONVOLVE_ONE_SAMPLE // 38 410 CONVOLVE_ONE_SAMPLE // 39 411 CONVOLVE_ONE_SAMPLE // 40 412 CONVOLVE_ONE_SAMPLE // 41 413 CONVOLVE_ONE_SAMPLE // 42 414 CONVOLVE_ONE_SAMPLE // 43 415 CONVOLVE_ONE_SAMPLE // 44 416 CONVOLVE_ONE_SAMPLE // 45 417 CONVOLVE_ONE_SAMPLE // 46 418 CONVOLVE_ONE_SAMPLE // 47 419 CONVOLVE_ONE_SAMPLE // 48 420 CONVOLVE_ONE_SAMPLE // 49 421 CONVOLVE_ONE_SAMPLE // 50 422 CONVOLVE_ONE_SAMPLE // 51 423 CONVOLVE_ONE_SAMPLE // 52 424 CONVOLVE_ONE_SAMPLE // 53 425 CONVOLVE_ONE_SAMPLE // 54 426 CONVOLVE_ONE_SAMPLE // 55 427 CONVOLVE_ONE_SAMPLE // 56 428 CONVOLVE_ONE_SAMPLE // 57 429 CONVOLVE_ONE_SAMPLE // 58 430 CONVOLVE_ONE_SAMPLE // 59 431 CONVOLVE_ONE_SAMPLE // 60 432 CONVOLVE_ONE_SAMPLE // 61 433 CONVOLVE_ONE_SAMPLE // 62 434 CONVOLVE_ONE_SAMPLE // 63 435 CONVOLVE_ONE_SAMPLE // 64 436 } else { 437 while (n--) { 438 // Non-optimized using actual while loop. 439 CONVOLVE_ONE_SAMPLE 440 } 441 } 442 #endif 443 } 444 445 // Linearly interpolate the two "convolutions". 446 double result = (1.0 - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2; 447 448 *destination++ = result; 449 450 // Advance the virtual index. 451 m_virtualSourceIndex += m_scaleFactor; 452 453 --numberOfDestinationFrames; 454 if (!numberOfDestinationFrames) 455 return; 456 } 457 458 // Wrap back around to the start. 459 m_virtualSourceIndex -= m_blockSize; 460 461 // Step (3) Copy r3 to r1 and r4 to r2. 462 // This wraps the last input frames back to the start of the buffer. 463 memcpy(r1, r3, sizeof(float) * (m_kernelSize / 2)); 464 memcpy(r2, r4, sizeof(float) * (m_kernelSize / 2)); 465 466 // Step (4) 467 // Refresh the buffer with more input. 468 consumeSource(r5, m_blockSize); 469 } 470 } 471 472 } // namespace WebCore 473 474 #endif // ENABLE(WEB_AUDIO) 475