Home | History | Annotate | Download | only in audio
      1 /*
      2  * Copyright (C) 2011 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include "config.h"
     30 
     31 #if ENABLE(WEB_AUDIO)
     32 
     33 #include "platform/audio/SincResampler.h"
     34 
     35 #include "platform/audio/AudioBus.h"
     36 #include "wtf/MathExtras.h"
     37 
     38 #ifdef __SSE2__
     39 #include <emmintrin.h>
     40 #endif
     41 
     42 using namespace std;
     43 
     44 // Input buffer layout, dividing the total buffer into regions (r0 - r5):
     45 //
     46 // |----------------|----------------------------------------------------------------|----------------|
     47 //
     48 //                                              blockSize + kernelSize / 2
     49 //                   <-------------------------------------------------------------------------------->
     50 //                                                  r0
     51 //
     52 //   kernelSize / 2   kernelSize / 2                                 kernelSize / 2     kernelSize / 2
     53 // <---------------> <--------------->                              <---------------> <--------------->
     54 //         r1                r2                                             r3                r4
     55 //
     56 //                                              blockSize
     57 //                                     <-------------------------------------------------------------->
     58 //                                                  r5
     59 
     60 // The Algorithm:
     61 //
     62 // 1) Consume input frames into r0 (r1 is zero-initialized).
     63 // 2) Position kernel centered at start of r0 (r2) and generate output frames until kernel is centered at start of r4.
     64 //    or we've finished generating all the output frames.
     65 // 3) Copy r3 to r1 and r4 to r2.
     66 // 4) Consume input frames into r5 (zero-pad if we run out of input).
     67 // 5) Goto (2) until all of input is consumed.
     68 //
     69 // note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.
     70 
     71 namespace WebCore {
     72 
     73 SincResampler::SincResampler(double scaleFactor, unsigned kernelSize, unsigned numberOfKernelOffsets)
     74     : m_scaleFactor(scaleFactor)
     75     , m_kernelSize(kernelSize)
     76     , m_numberOfKernelOffsets(numberOfKernelOffsets)
     77     , m_kernelStorage(m_kernelSize * (m_numberOfKernelOffsets + 1))
     78     , m_virtualSourceIndex(0)
     79     , m_blockSize(512)
     80     , m_inputBuffer(m_blockSize + m_kernelSize) // See input buffer layout above.
     81     , m_source(0)
     82     , m_sourceFramesAvailable(0)
     83     , m_sourceProvider(0)
     84     , m_isBufferPrimed(false)
     85 {
     86     initializeKernel();
     87 }
     88 
     89 void SincResampler::initializeKernel()
     90 {
     91     // Blackman window parameters.
     92     double alpha = 0.16;
     93     double a0 = 0.5 * (1.0 - alpha);
     94     double a1 = 0.5;
     95     double a2 = 0.5 * alpha;
     96 
     97     // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
     98     double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;
     99 
    100     // The sinc function is an idealized brick-wall filter, but since we're windowing it the
    101     // transition from pass to stop does not happen right away. So we should adjust the
    102     // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
    103     // FIXME: this value is empirical and to be more exact should vary depending on m_kernelSize.
    104     sincScaleFactor *= 0.9;
    105 
    106     int n = m_kernelSize;
    107     int halfSize = n / 2;
    108 
    109     // Generates a set of windowed sinc() kernels.
    110     // We generate a range of sub-sample offsets from 0.0 to 1.0.
    111     for (unsigned offsetIndex = 0; offsetIndex <= m_numberOfKernelOffsets; ++offsetIndex) {
    112         double subsampleOffset = static_cast<double>(offsetIndex) / m_numberOfKernelOffsets;
    113 
    114         for (int i = 0; i < n; ++i) {
    115             // Compute the sinc() with offset.
    116             double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
    117             double sinc = !s ? 1.0 : sin(s) / s;
    118             sinc *= sincScaleFactor;
    119 
    120             // Compute Blackman window, matching the offset of the sinc().
    121             double x = (i - subsampleOffset) / n;
    122             double window = a0 - a1 * cos(2.0 * piDouble * x) + a2 * cos(4.0 * piDouble * x);
    123 
    124             // Window the sinc() function and store at the correct offset.
    125             m_kernelStorage[i + offsetIndex * m_kernelSize] = sinc * window;
    126         }
    127     }
    128 }
    129 
    130 void SincResampler::consumeSource(float* buffer, unsigned numberOfSourceFrames)
    131 {
    132     ASSERT(m_sourceProvider);
    133     if (!m_sourceProvider)
    134         return;
    135 
    136     // Wrap the provided buffer by an AudioBus for use by the source provider.
    137     RefPtr<AudioBus> bus = AudioBus::create(1, numberOfSourceFrames, false);
    138 
    139     // FIXME: Find a way to make the following const-correct:
    140     bus->setChannelMemory(0, buffer, numberOfSourceFrames);
    141 
    142     m_sourceProvider->provideInput(bus.get(), numberOfSourceFrames);
    143 }
    144 
    145 namespace {
    146 
    147 // BufferSourceProvider is an AudioSourceProvider wrapping an in-memory buffer.
    148 
    149 class BufferSourceProvider : public AudioSourceProvider {
    150 public:
    151     BufferSourceProvider(const float* source, size_t numberOfSourceFrames)
    152         : m_source(source)
    153         , m_sourceFramesAvailable(numberOfSourceFrames)
    154     {
    155     }
    156 
    157     // Consumes samples from the in-memory buffer.
    158     virtual void provideInput(AudioBus* bus, size_t framesToProcess)
    159     {
    160         ASSERT(m_source && bus);
    161         if (!m_source || !bus)
    162             return;
    163 
    164         float* buffer = bus->channel(0)->mutableData();
    165 
    166         // Clamp to number of frames available and zero-pad.
    167         size_t framesToCopy = min(m_sourceFramesAvailable, framesToProcess);
    168         memcpy(buffer, m_source, sizeof(float) * framesToCopy);
    169 
    170         // Zero-pad if necessary.
    171         if (framesToCopy < framesToProcess)
    172             memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy));
    173 
    174         m_sourceFramesAvailable -= framesToCopy;
    175         m_source += framesToCopy;
    176     }
    177 
    178 private:
    179     const float* m_source;
    180     size_t m_sourceFramesAvailable;
    181 };
    182 
    183 } // namespace
    184 
    185 void SincResampler::process(const float* source, float* destination, unsigned numberOfSourceFrames)
    186 {
    187     // Resample an in-memory buffer using an AudioSourceProvider.
    188     BufferSourceProvider sourceProvider(source, numberOfSourceFrames);
    189 
    190     unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / m_scaleFactor);
    191     unsigned remaining = numberOfDestinationFrames;
    192 
    193     while (remaining) {
    194         unsigned framesThisTime = min(remaining, m_blockSize);
    195         process(&sourceProvider, destination, framesThisTime);
    196 
    197         destination += framesThisTime;
    198         remaining -= framesThisTime;
    199     }
    200 }
    201 
    202 void SincResampler::process(AudioSourceProvider* sourceProvider, float* destination, size_t framesToProcess)
    203 {
    204     bool isGood = sourceProvider && m_blockSize > m_kernelSize && m_inputBuffer.size() >= m_blockSize + m_kernelSize && !(m_kernelSize % 2);
    205     ASSERT(isGood);
    206     if (!isGood)
    207         return;
    208 
    209     m_sourceProvider = sourceProvider;
    210 
    211     unsigned numberOfDestinationFrames = framesToProcess;
    212 
    213     // Setup various region pointers in the buffer (see diagram above).
    214     float* r0 = m_inputBuffer.data() + m_kernelSize / 2;
    215     float* r1 = m_inputBuffer.data();
    216     float* r2 = r0;
    217     float* r3 = r0 + m_blockSize - m_kernelSize / 2;
    218     float* r4 = r0 + m_blockSize;
    219     float* r5 = r0 + m_kernelSize / 2;
    220 
    221     // Step (1)
    222     // Prime the input buffer at the start of the input stream.
    223     if (!m_isBufferPrimed) {
    224         consumeSource(r0, m_blockSize + m_kernelSize / 2);
    225         m_isBufferPrimed = true;
    226     }
    227 
    228     // Step (2)
    229 
    230     while (numberOfDestinationFrames) {
    231         while (m_virtualSourceIndex < m_blockSize) {
    232             // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
    233             int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
    234             double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;
    235 
    236             double virtualOffsetIndex = subsampleRemainder * m_numberOfKernelOffsets;
    237             int offsetIndex = static_cast<int>(virtualOffsetIndex);
    238 
    239             float* k1 = m_kernelStorage.data() + offsetIndex * m_kernelSize;
    240             float* k2 = k1 + m_kernelSize;
    241 
    242             // Initialize input pointer based on quantized m_virtualSourceIndex.
    243             float* inputP = r1 + sourceIndexI;
    244 
    245             // We'll compute "convolutions" for the two kernels which straddle m_virtualSourceIndex
    246             float sum1 = 0;
    247             float sum2 = 0;
    248 
    249             // Figure out how much to weight each kernel's "convolution".
    250             double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;
    251 
    252             // Generate a single output sample.
    253             int n = m_kernelSize;
    254 
    255 #define CONVOLVE_ONE_SAMPLE      \
    256             input = *inputP++;   \
    257             sum1 += input * *k1; \
    258             sum2 += input * *k2; \
    259             ++k1;                \
    260             ++k2;
    261 
    262             {
    263                 float input;
    264 
    265 #ifdef __SSE2__
    266                 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
    267                 while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) {
    268                     CONVOLVE_ONE_SAMPLE
    269                     n--;
    270                 }
    271 
    272                 // Now the inputP is aligned and start to apply SSE.
    273                 float* endP = inputP + n - n % 4;
    274                 __m128 mInput;
    275                 __m128 mK1;
    276                 __m128 mK2;
    277                 __m128 mul1;
    278                 __m128 mul2;
    279 
    280                 __m128 sums1 = _mm_setzero_ps();
    281                 __m128 sums2 = _mm_setzero_ps();
    282                 bool k1Aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F);
    283                 bool k2Aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F);
    284 
    285 #define LOAD_DATA(l1, l2)                        \
    286                 mInput = _mm_load_ps(inputP);    \
    287                 mK1 = _mm_##l1##_ps(k1);         \
    288                 mK2 = _mm_##l2##_ps(k2);
    289 
    290 #define CONVOLVE_4_SAMPLES                       \
    291                 mul1 = _mm_mul_ps(mInput, mK1);  \
    292                 mul2 = _mm_mul_ps(mInput, mK2);  \
    293                 sums1 = _mm_add_ps(sums1, mul1); \
    294                 sums2 = _mm_add_ps(sums2, mul2); \
    295                 inputP += 4;                     \
    296                 k1 += 4;                         \
    297                 k2 += 4;
    298 
    299                 if (k1Aligned && k2Aligned) { // both aligned
    300                     while (inputP < endP) {
    301                         LOAD_DATA(load, load)
    302                         CONVOLVE_4_SAMPLES
    303                     }
    304                 } else if (!k1Aligned && k2Aligned) { // only k2 aligned
    305                     while (inputP < endP) {
    306                         LOAD_DATA(loadu, load)
    307                         CONVOLVE_4_SAMPLES
    308                     }
    309                 } else if (k1Aligned && !k2Aligned) { // only k1 aligned
    310                     while (inputP < endP) {
    311                         LOAD_DATA(load, loadu)
    312                         CONVOLVE_4_SAMPLES
    313                     }
    314                 } else { // both non-aligned
    315                     while (inputP < endP) {
    316                         LOAD_DATA(loadu, loadu)
    317                         CONVOLVE_4_SAMPLES
    318                     }
    319                 }
    320 
    321                 // Summarize the SSE results to sum1 and sum2.
    322                 float* groupSumP = reinterpret_cast<float*>(&sums1);
    323                 sum1 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
    324                 groupSumP = reinterpret_cast<float*>(&sums2);
    325                 sum2 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
    326 
    327                 n %= 4;
    328                 while (n) {
    329                     CONVOLVE_ONE_SAMPLE
    330                     n--;
    331                 }
    332 #else
    333                 // FIXME: add ARM NEON optimizations for the following. The scalar code-path can probably also be optimized better.
    334 
    335                 // Optimize size 32 and size 64 kernels by unrolling the while loop.
    336                 // A 20 - 30% speed improvement was measured in some cases by using this approach.
    337 
    338                 if (n == 32) {
    339                     CONVOLVE_ONE_SAMPLE // 1
    340                     CONVOLVE_ONE_SAMPLE // 2
    341                     CONVOLVE_ONE_SAMPLE // 3
    342                     CONVOLVE_ONE_SAMPLE // 4
    343                     CONVOLVE_ONE_SAMPLE // 5
    344                     CONVOLVE_ONE_SAMPLE // 6
    345                     CONVOLVE_ONE_SAMPLE // 7
    346                     CONVOLVE_ONE_SAMPLE // 8
    347                     CONVOLVE_ONE_SAMPLE // 9
    348                     CONVOLVE_ONE_SAMPLE // 10
    349                     CONVOLVE_ONE_SAMPLE // 11
    350                     CONVOLVE_ONE_SAMPLE // 12
    351                     CONVOLVE_ONE_SAMPLE // 13
    352                     CONVOLVE_ONE_SAMPLE // 14
    353                     CONVOLVE_ONE_SAMPLE // 15
    354                     CONVOLVE_ONE_SAMPLE // 16
    355                     CONVOLVE_ONE_SAMPLE // 17
    356                     CONVOLVE_ONE_SAMPLE // 18
    357                     CONVOLVE_ONE_SAMPLE // 19
    358                     CONVOLVE_ONE_SAMPLE // 20
    359                     CONVOLVE_ONE_SAMPLE // 21
    360                     CONVOLVE_ONE_SAMPLE // 22
    361                     CONVOLVE_ONE_SAMPLE // 23
    362                     CONVOLVE_ONE_SAMPLE // 24
    363                     CONVOLVE_ONE_SAMPLE // 25
    364                     CONVOLVE_ONE_SAMPLE // 26
    365                     CONVOLVE_ONE_SAMPLE // 27
    366                     CONVOLVE_ONE_SAMPLE // 28
    367                     CONVOLVE_ONE_SAMPLE // 29
    368                     CONVOLVE_ONE_SAMPLE // 30
    369                     CONVOLVE_ONE_SAMPLE // 31
    370                     CONVOLVE_ONE_SAMPLE // 32
    371                 } else if (n == 64) {
    372                     CONVOLVE_ONE_SAMPLE // 1
    373                     CONVOLVE_ONE_SAMPLE // 2
    374                     CONVOLVE_ONE_SAMPLE // 3
    375                     CONVOLVE_ONE_SAMPLE // 4
    376                     CONVOLVE_ONE_SAMPLE // 5
    377                     CONVOLVE_ONE_SAMPLE // 6
    378                     CONVOLVE_ONE_SAMPLE // 7
    379                     CONVOLVE_ONE_SAMPLE // 8
    380                     CONVOLVE_ONE_SAMPLE // 9
    381                     CONVOLVE_ONE_SAMPLE // 10
    382                     CONVOLVE_ONE_SAMPLE // 11
    383                     CONVOLVE_ONE_SAMPLE // 12
    384                     CONVOLVE_ONE_SAMPLE // 13
    385                     CONVOLVE_ONE_SAMPLE // 14
    386                     CONVOLVE_ONE_SAMPLE // 15
    387                     CONVOLVE_ONE_SAMPLE // 16
    388                     CONVOLVE_ONE_SAMPLE // 17
    389                     CONVOLVE_ONE_SAMPLE // 18
    390                     CONVOLVE_ONE_SAMPLE // 19
    391                     CONVOLVE_ONE_SAMPLE // 20
    392                     CONVOLVE_ONE_SAMPLE // 21
    393                     CONVOLVE_ONE_SAMPLE // 22
    394                     CONVOLVE_ONE_SAMPLE // 23
    395                     CONVOLVE_ONE_SAMPLE // 24
    396                     CONVOLVE_ONE_SAMPLE // 25
    397                     CONVOLVE_ONE_SAMPLE // 26
    398                     CONVOLVE_ONE_SAMPLE // 27
    399                     CONVOLVE_ONE_SAMPLE // 28
    400                     CONVOLVE_ONE_SAMPLE // 29
    401                     CONVOLVE_ONE_SAMPLE // 30
    402                     CONVOLVE_ONE_SAMPLE // 31
    403                     CONVOLVE_ONE_SAMPLE // 32
    404                     CONVOLVE_ONE_SAMPLE // 33
    405                     CONVOLVE_ONE_SAMPLE // 34
    406                     CONVOLVE_ONE_SAMPLE // 35
    407                     CONVOLVE_ONE_SAMPLE // 36
    408                     CONVOLVE_ONE_SAMPLE // 37
    409                     CONVOLVE_ONE_SAMPLE // 38
    410                     CONVOLVE_ONE_SAMPLE // 39
    411                     CONVOLVE_ONE_SAMPLE // 40
    412                     CONVOLVE_ONE_SAMPLE // 41
    413                     CONVOLVE_ONE_SAMPLE // 42
    414                     CONVOLVE_ONE_SAMPLE // 43
    415                     CONVOLVE_ONE_SAMPLE // 44
    416                     CONVOLVE_ONE_SAMPLE // 45
    417                     CONVOLVE_ONE_SAMPLE // 46
    418                     CONVOLVE_ONE_SAMPLE // 47
    419                     CONVOLVE_ONE_SAMPLE // 48
    420                     CONVOLVE_ONE_SAMPLE // 49
    421                     CONVOLVE_ONE_SAMPLE // 50
    422                     CONVOLVE_ONE_SAMPLE // 51
    423                     CONVOLVE_ONE_SAMPLE // 52
    424                     CONVOLVE_ONE_SAMPLE // 53
    425                     CONVOLVE_ONE_SAMPLE // 54
    426                     CONVOLVE_ONE_SAMPLE // 55
    427                     CONVOLVE_ONE_SAMPLE // 56
    428                     CONVOLVE_ONE_SAMPLE // 57
    429                     CONVOLVE_ONE_SAMPLE // 58
    430                     CONVOLVE_ONE_SAMPLE // 59
    431                     CONVOLVE_ONE_SAMPLE // 60
    432                     CONVOLVE_ONE_SAMPLE // 61
    433                     CONVOLVE_ONE_SAMPLE // 62
    434                     CONVOLVE_ONE_SAMPLE // 63
    435                     CONVOLVE_ONE_SAMPLE // 64
    436                 } else {
    437                     while (n--) {
    438                         // Non-optimized using actual while loop.
    439                         CONVOLVE_ONE_SAMPLE
    440                     }
    441                 }
    442 #endif
    443             }
    444 
    445             // Linearly interpolate the two "convolutions".
    446             double result = (1.0 - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
    447 
    448             *destination++ = result;
    449 
    450             // Advance the virtual index.
    451             m_virtualSourceIndex += m_scaleFactor;
    452 
    453             --numberOfDestinationFrames;
    454             if (!numberOfDestinationFrames)
    455                 return;
    456         }
    457 
    458         // Wrap back around to the start.
    459         m_virtualSourceIndex -= m_blockSize;
    460 
    461         // Step (3) Copy r3 to r1 and r4 to r2.
    462         // This wraps the last input frames back to the start of the buffer.
    463         memcpy(r1, r3, sizeof(float) * (m_kernelSize / 2));
    464         memcpy(r2, r4, sizeof(float) * (m_kernelSize / 2));
    465 
    466         // Step (4)
    467         // Refresh the buffer with more input.
    468         consumeSource(r5, m_blockSize);
    469     }
    470 }
    471 
    472 } // namespace WebCore
    473 
    474 #endif // ENABLE(WEB_AUDIO)
    475