Home | History | Annotate | Download | only in audio
      1 /*
      2  * Copyright (C) 2011 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include "config.h"
     30 
     31 #if ENABLE(WEB_AUDIO)
     32 
     33 #include "platform/audio/SincResampler.h"
     34 
     35 #include "platform/audio/AudioBus.h"
     36 #include "wtf/CPU.h"
     37 #include "wtf/MathExtras.h"
     38 
     39 #if CPU(X86) || CPU(X86_64)
     40 #include <emmintrin.h>
     41 #endif
     42 
     43 // Input buffer layout, dividing the total buffer into regions (r0 - r5):
     44 //
     45 // |----------------|----------------------------------------------------------------|----------------|
     46 //
     47 //                                              blockSize + kernelSize / 2
     48 //                   <-------------------------------------------------------------------------------->
     49 //                                                  r0
     50 //
     51 //   kernelSize / 2   kernelSize / 2                                 kernelSize / 2     kernelSize / 2
     52 // <---------------> <--------------->                              <---------------> <--------------->
     53 //         r1                r2                                             r3                r4
     54 //
     55 //                                              blockSize
     56 //                                     <-------------------------------------------------------------->
     57 //                                                  r5
     58 
     59 // The Algorithm:
     60 //
     61 // 1) Consume input frames into r0 (r1 is zero-initialized).
     62 // 2) Position kernel centered at start of r0 (r2) and generate output frames until kernel is centered at start of r4.
     63 //    or we've finished generating all the output frames.
     64 // 3) Copy r3 to r1 and r4 to r2.
     65 // 4) Consume input frames into r5 (zero-pad if we run out of input).
     66 // 5) Goto (2) until all of input is consumed.
     67 //
     68 // note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.
     69 
     70 namespace blink {
     71 
     72 SincResampler::SincResampler(double scaleFactor, unsigned kernelSize, unsigned numberOfKernelOffsets)
     73     : m_scaleFactor(scaleFactor)
     74     , m_kernelSize(kernelSize)
     75     , m_numberOfKernelOffsets(numberOfKernelOffsets)
     76     , m_kernelStorage(m_kernelSize * (m_numberOfKernelOffsets + 1))
     77     , m_virtualSourceIndex(0)
     78     , m_blockSize(512)
     79     , m_inputBuffer(m_blockSize + m_kernelSize) // See input buffer layout above.
     80     , m_source(0)
     81     , m_sourceFramesAvailable(0)
     82     , m_sourceProvider(0)
     83     , m_isBufferPrimed(false)
     84 {
     85     initializeKernel();
     86 }
     87 
     88 void SincResampler::initializeKernel()
     89 {
     90     // Blackman window parameters.
     91     double alpha = 0.16;
     92     double a0 = 0.5 * (1.0 - alpha);
     93     double a1 = 0.5;
     94     double a2 = 0.5 * alpha;
     95 
     96     // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
     97     double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;
     98 
     99     // The sinc function is an idealized brick-wall filter, but since we're windowing it the
    100     // transition from pass to stop does not happen right away. So we should adjust the
    101     // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
    102     // FIXME: this value is empirical and to be more exact should vary depending on m_kernelSize.
    103     sincScaleFactor *= 0.9;
    104 
    105     int n = m_kernelSize;
    106     int halfSize = n / 2;
    107 
    108     // Generates a set of windowed sinc() kernels.
    109     // We generate a range of sub-sample offsets from 0.0 to 1.0.
    110     for (unsigned offsetIndex = 0; offsetIndex <= m_numberOfKernelOffsets; ++offsetIndex) {
    111         double subsampleOffset = static_cast<double>(offsetIndex) / m_numberOfKernelOffsets;
    112 
    113         for (int i = 0; i < n; ++i) {
    114             // Compute the sinc() with offset.
    115             double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
    116             double sinc = !s ? 1.0 : std::sin(s) / s;
    117             sinc *= sincScaleFactor;
    118 
    119             // Compute Blackman window, matching the offset of the sinc().
    120             double x = (i - subsampleOffset) / n;
    121             double window = a0 - a1 * std::cos(twoPiDouble * x) + a2 * std::cos(twoPiDouble * 2.0 * x);
    122 
    123             // Window the sinc() function and store at the correct offset.
    124             m_kernelStorage[i + offsetIndex * m_kernelSize] = sinc * window;
    125         }
    126     }
    127 }
    128 
    129 void SincResampler::consumeSource(float* buffer, unsigned numberOfSourceFrames)
    130 {
    131     ASSERT(m_sourceProvider);
    132     if (!m_sourceProvider)
    133         return;
    134 
    135     // Wrap the provided buffer by an AudioBus for use by the source provider.
    136     RefPtr<AudioBus> bus = AudioBus::create(1, numberOfSourceFrames, false);
    137 
    138     // FIXME: Find a way to make the following const-correct:
    139     bus->setChannelMemory(0, buffer, numberOfSourceFrames);
    140 
    141     m_sourceProvider->provideInput(bus.get(), numberOfSourceFrames);
    142 }
    143 
    144 namespace {
    145 
    146 // BufferSourceProvider is an AudioSourceProvider wrapping an in-memory buffer.
    147 
    148 class BufferSourceProvider FINAL : public AudioSourceProvider {
    149 public:
    150     BufferSourceProvider(const float* source, size_t numberOfSourceFrames)
    151         : m_source(source)
    152         , m_sourceFramesAvailable(numberOfSourceFrames)
    153     {
    154     }
    155 
    156     // Consumes samples from the in-memory buffer.
    157     virtual void provideInput(AudioBus* bus, size_t framesToProcess) OVERRIDE
    158     {
    159         ASSERT(m_source && bus);
    160         if (!m_source || !bus)
    161             return;
    162 
    163         float* buffer = bus->channel(0)->mutableData();
    164 
    165         // Clamp to number of frames available and zero-pad.
    166         size_t framesToCopy = std::min(m_sourceFramesAvailable, framesToProcess);
    167         memcpy(buffer, m_source, sizeof(float) * framesToCopy);
    168 
    169         // Zero-pad if necessary.
    170         if (framesToCopy < framesToProcess)
    171             memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy));
    172 
    173         m_sourceFramesAvailable -= framesToCopy;
    174         m_source += framesToCopy;
    175     }
    176 
    177 private:
    178     const float* m_source;
    179     size_t m_sourceFramesAvailable;
    180 };
    181 
    182 } // namespace
    183 
    184 void SincResampler::process(const float* source, float* destination, unsigned numberOfSourceFrames)
    185 {
    186     // Resample an in-memory buffer using an AudioSourceProvider.
    187     BufferSourceProvider sourceProvider(source, numberOfSourceFrames);
    188 
    189     unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / m_scaleFactor);
    190     unsigned remaining = numberOfDestinationFrames;
    191 
    192     while (remaining) {
    193         unsigned framesThisTime = std::min(remaining, m_blockSize);
    194         process(&sourceProvider, destination, framesThisTime);
    195 
    196         destination += framesThisTime;
    197         remaining -= framesThisTime;
    198     }
    199 }
    200 
    201 void SincResampler::process(AudioSourceProvider* sourceProvider, float* destination, size_t framesToProcess)
    202 {
    203     bool isGood = sourceProvider && m_blockSize > m_kernelSize && m_inputBuffer.size() >= m_blockSize + m_kernelSize && !(m_kernelSize % 2);
    204     ASSERT(isGood);
    205     if (!isGood)
    206         return;
    207 
    208     m_sourceProvider = sourceProvider;
    209 
    210     unsigned numberOfDestinationFrames = framesToProcess;
    211 
    212     // Setup various region pointers in the buffer (see diagram above).
    213     float* r0 = m_inputBuffer.data() + m_kernelSize / 2;
    214     float* r1 = m_inputBuffer.data();
    215     float* r2 = r0;
    216     float* r3 = r0 + m_blockSize - m_kernelSize / 2;
    217     float* r4 = r0 + m_blockSize;
    218     float* r5 = r0 + m_kernelSize / 2;
    219 
    220     // Step (1)
    221     // Prime the input buffer at the start of the input stream.
    222     if (!m_isBufferPrimed) {
    223         consumeSource(r0, m_blockSize + m_kernelSize / 2);
    224         m_isBufferPrimed = true;
    225     }
    226 
    227     // Step (2)
    228 
    229     while (numberOfDestinationFrames) {
    230         while (m_virtualSourceIndex < m_blockSize) {
    231             // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
    232             int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
    233             double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;
    234 
    235             double virtualOffsetIndex = subsampleRemainder * m_numberOfKernelOffsets;
    236             int offsetIndex = static_cast<int>(virtualOffsetIndex);
    237 
    238             float* k1 = m_kernelStorage.data() + offsetIndex * m_kernelSize;
    239             float* k2 = k1 + m_kernelSize;
    240 
    241             // Initialize input pointer based on quantized m_virtualSourceIndex.
    242             float* inputP = r1 + sourceIndexI;
    243 
    244             // We'll compute "convolutions" for the two kernels which straddle m_virtualSourceIndex
    245             float sum1 = 0;
    246             float sum2 = 0;
    247 
    248             // Figure out how much to weight each kernel's "convolution".
    249             double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;
    250 
    251             // Generate a single output sample.
    252             int n = m_kernelSize;
    253 
    254 #define CONVOLVE_ONE_SAMPLE      \
    255             input = *inputP++;   \
    256             sum1 += input * *k1; \
    257             sum2 += input * *k2; \
    258             ++k1;                \
    259             ++k2;
    260 
    261             {
    262                 float input;
    263 
    264 #if CPU(X86) || CPU(X86_64)
    265                 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
    266                 while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) {
    267                     CONVOLVE_ONE_SAMPLE
    268                     n--;
    269                 }
    270 
    271                 // Now the inputP is aligned and start to apply SSE.
    272                 float* endP = inputP + n - n % 4;
    273                 __m128 mInput;
    274                 __m128 mK1;
    275                 __m128 mK2;
    276                 __m128 mul1;
    277                 __m128 mul2;
    278 
    279                 __m128 sums1 = _mm_setzero_ps();
    280                 __m128 sums2 = _mm_setzero_ps();
    281                 bool k1Aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F);
    282                 bool k2Aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F);
    283 
    284 #define LOAD_DATA(l1, l2)                        \
    285                 mInput = _mm_load_ps(inputP);    \
    286                 mK1 = _mm_##l1##_ps(k1);         \
    287                 mK2 = _mm_##l2##_ps(k2);
    288 
    289 #define CONVOLVE_4_SAMPLES                       \
    290                 mul1 = _mm_mul_ps(mInput, mK1);  \
    291                 mul2 = _mm_mul_ps(mInput, mK2);  \
    292                 sums1 = _mm_add_ps(sums1, mul1); \
    293                 sums2 = _mm_add_ps(sums2, mul2); \
    294                 inputP += 4;                     \
    295                 k1 += 4;                         \
    296                 k2 += 4;
    297 
    298                 if (k1Aligned && k2Aligned) { // both aligned
    299                     while (inputP < endP) {
    300                         LOAD_DATA(load, load)
    301                         CONVOLVE_4_SAMPLES
    302                     }
    303                 } else if (!k1Aligned && k2Aligned) { // only k2 aligned
    304                     while (inputP < endP) {
    305                         LOAD_DATA(loadu, load)
    306                         CONVOLVE_4_SAMPLES
    307                     }
    308                 } else if (k1Aligned && !k2Aligned) { // only k1 aligned
    309                     while (inputP < endP) {
    310                         LOAD_DATA(load, loadu)
    311                         CONVOLVE_4_SAMPLES
    312                     }
    313                 } else { // both non-aligned
    314                     while (inputP < endP) {
    315                         LOAD_DATA(loadu, loadu)
    316                         CONVOLVE_4_SAMPLES
    317                     }
    318                 }
    319 
    320                 // Summarize the SSE results to sum1 and sum2.
    321                 float* groupSumP = reinterpret_cast<float*>(&sums1);
    322                 sum1 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
    323                 groupSumP = reinterpret_cast<float*>(&sums2);
    324                 sum2 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
    325 
    326                 n %= 4;
    327                 while (n) {
    328                     CONVOLVE_ONE_SAMPLE
    329                     n--;
    330                 }
    331 #else
    332                 // FIXME: add ARM NEON optimizations for the following. The scalar code-path can probably also be optimized better.
    333 
    334                 // Optimize size 32 and size 64 kernels by unrolling the while loop.
    335                 // A 20 - 30% speed improvement was measured in some cases by using this approach.
    336 
    337                 if (n == 32) {
    338                     CONVOLVE_ONE_SAMPLE // 1
    339                     CONVOLVE_ONE_SAMPLE // 2
    340                     CONVOLVE_ONE_SAMPLE // 3
    341                     CONVOLVE_ONE_SAMPLE // 4
    342                     CONVOLVE_ONE_SAMPLE // 5
    343                     CONVOLVE_ONE_SAMPLE // 6
    344                     CONVOLVE_ONE_SAMPLE // 7
    345                     CONVOLVE_ONE_SAMPLE // 8
    346                     CONVOLVE_ONE_SAMPLE // 9
    347                     CONVOLVE_ONE_SAMPLE // 10
    348                     CONVOLVE_ONE_SAMPLE // 11
    349                     CONVOLVE_ONE_SAMPLE // 12
    350                     CONVOLVE_ONE_SAMPLE // 13
    351                     CONVOLVE_ONE_SAMPLE // 14
    352                     CONVOLVE_ONE_SAMPLE // 15
    353                     CONVOLVE_ONE_SAMPLE // 16
    354                     CONVOLVE_ONE_SAMPLE // 17
    355                     CONVOLVE_ONE_SAMPLE // 18
    356                     CONVOLVE_ONE_SAMPLE // 19
    357                     CONVOLVE_ONE_SAMPLE // 20
    358                     CONVOLVE_ONE_SAMPLE // 21
    359                     CONVOLVE_ONE_SAMPLE // 22
    360                     CONVOLVE_ONE_SAMPLE // 23
    361                     CONVOLVE_ONE_SAMPLE // 24
    362                     CONVOLVE_ONE_SAMPLE // 25
    363                     CONVOLVE_ONE_SAMPLE // 26
    364                     CONVOLVE_ONE_SAMPLE // 27
    365                     CONVOLVE_ONE_SAMPLE // 28
    366                     CONVOLVE_ONE_SAMPLE // 29
    367                     CONVOLVE_ONE_SAMPLE // 30
    368                     CONVOLVE_ONE_SAMPLE // 31
    369                     CONVOLVE_ONE_SAMPLE // 32
    370                 } else if (n == 64) {
    371                     CONVOLVE_ONE_SAMPLE // 1
    372                     CONVOLVE_ONE_SAMPLE // 2
    373                     CONVOLVE_ONE_SAMPLE // 3
    374                     CONVOLVE_ONE_SAMPLE // 4
    375                     CONVOLVE_ONE_SAMPLE // 5
    376                     CONVOLVE_ONE_SAMPLE // 6
    377                     CONVOLVE_ONE_SAMPLE // 7
    378                     CONVOLVE_ONE_SAMPLE // 8
    379                     CONVOLVE_ONE_SAMPLE // 9
    380                     CONVOLVE_ONE_SAMPLE // 10
    381                     CONVOLVE_ONE_SAMPLE // 11
    382                     CONVOLVE_ONE_SAMPLE // 12
    383                     CONVOLVE_ONE_SAMPLE // 13
    384                     CONVOLVE_ONE_SAMPLE // 14
    385                     CONVOLVE_ONE_SAMPLE // 15
    386                     CONVOLVE_ONE_SAMPLE // 16
    387                     CONVOLVE_ONE_SAMPLE // 17
    388                     CONVOLVE_ONE_SAMPLE // 18
    389                     CONVOLVE_ONE_SAMPLE // 19
    390                     CONVOLVE_ONE_SAMPLE // 20
    391                     CONVOLVE_ONE_SAMPLE // 21
    392                     CONVOLVE_ONE_SAMPLE // 22
    393                     CONVOLVE_ONE_SAMPLE // 23
    394                     CONVOLVE_ONE_SAMPLE // 24
    395                     CONVOLVE_ONE_SAMPLE // 25
    396                     CONVOLVE_ONE_SAMPLE // 26
    397                     CONVOLVE_ONE_SAMPLE // 27
    398                     CONVOLVE_ONE_SAMPLE // 28
    399                     CONVOLVE_ONE_SAMPLE // 29
    400                     CONVOLVE_ONE_SAMPLE // 30
    401                     CONVOLVE_ONE_SAMPLE // 31
    402                     CONVOLVE_ONE_SAMPLE // 32
    403                     CONVOLVE_ONE_SAMPLE // 33
    404                     CONVOLVE_ONE_SAMPLE // 34
    405                     CONVOLVE_ONE_SAMPLE // 35
    406                     CONVOLVE_ONE_SAMPLE // 36
    407                     CONVOLVE_ONE_SAMPLE // 37
    408                     CONVOLVE_ONE_SAMPLE // 38
    409                     CONVOLVE_ONE_SAMPLE // 39
    410                     CONVOLVE_ONE_SAMPLE // 40
    411                     CONVOLVE_ONE_SAMPLE // 41
    412                     CONVOLVE_ONE_SAMPLE // 42
    413                     CONVOLVE_ONE_SAMPLE // 43
    414                     CONVOLVE_ONE_SAMPLE // 44
    415                     CONVOLVE_ONE_SAMPLE // 45
    416                     CONVOLVE_ONE_SAMPLE // 46
    417                     CONVOLVE_ONE_SAMPLE // 47
    418                     CONVOLVE_ONE_SAMPLE // 48
    419                     CONVOLVE_ONE_SAMPLE // 49
    420                     CONVOLVE_ONE_SAMPLE // 50
    421                     CONVOLVE_ONE_SAMPLE // 51
    422                     CONVOLVE_ONE_SAMPLE // 52
    423                     CONVOLVE_ONE_SAMPLE // 53
    424                     CONVOLVE_ONE_SAMPLE // 54
    425                     CONVOLVE_ONE_SAMPLE // 55
    426                     CONVOLVE_ONE_SAMPLE // 56
    427                     CONVOLVE_ONE_SAMPLE // 57
    428                     CONVOLVE_ONE_SAMPLE // 58
    429                     CONVOLVE_ONE_SAMPLE // 59
    430                     CONVOLVE_ONE_SAMPLE // 60
    431                     CONVOLVE_ONE_SAMPLE // 61
    432                     CONVOLVE_ONE_SAMPLE // 62
    433                     CONVOLVE_ONE_SAMPLE // 63
    434                     CONVOLVE_ONE_SAMPLE // 64
    435                 } else {
    436                     while (n--) {
    437                         // Non-optimized using actual while loop.
    438                         CONVOLVE_ONE_SAMPLE
    439                     }
    440                 }
    441 #endif
    442             }
    443 
    444             // Linearly interpolate the two "convolutions".
    445             double result = (1.0 - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
    446 
    447             *destination++ = result;
    448 
    449             // Advance the virtual index.
    450             m_virtualSourceIndex += m_scaleFactor;
    451 
    452             --numberOfDestinationFrames;
    453             if (!numberOfDestinationFrames)
    454                 return;
    455         }
    456 
    457         // Wrap back around to the start.
    458         m_virtualSourceIndex -= m_blockSize;
    459 
    460         // Step (3) Copy r3 to r1 and r4 to r2.
    461         // This wraps the last input frames back to the start of the buffer.
    462         memcpy(r1, r3, sizeof(float) * (m_kernelSize / 2));
    463         memcpy(r2, r4, sizeof(float) * (m_kernelSize / 2));
    464 
    465         // Step (4)
    466         // Refresh the buffer with more input.
    467         consumeSource(r5, m_blockSize);
    468     }
    469 }
    470 
    471 } // namespace blink
    472 
    473 #endif // ENABLE(WEB_AUDIO)
    474