Home | History | Annotate | Download | only in audio
      1 /*
      2  * Copyright (C) 2012 Intel Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include "config.h"
     30 
     31 #if ENABLE(WEB_AUDIO)
     32 
     33 #include "platform/audio/DirectConvolver.h"
     34 
     35 #if OS(MACOSX)
     36 #include <Accelerate/Accelerate.h>
     37 #endif
     38 
     39 #include "platform/audio/VectorMath.h"
     40 #include "wtf/CPU.h"
     41 
     42 namespace WebCore {
     43 
     44 using namespace VectorMath;
     45 
     46 DirectConvolver::DirectConvolver(size_t inputBlockSize)
     47     : m_inputBlockSize(inputBlockSize)
     48 #if USE(WEBAUDIO_IPP)
     49     , m_overlayBuffer(inputBlockSize)
     50 #endif // USE(WEBAUDIO_IPP)
     51     , m_buffer(inputBlockSize * 2)
     52 {
     53 }
     54 
     55 void DirectConvolver::process(AudioFloatArray* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess)
     56 {
     57     ASSERT(framesToProcess == m_inputBlockSize);
     58     if (framesToProcess != m_inputBlockSize)
     59         return;
     60 
     61     // Only support kernelSize <= m_inputBlockSize
     62     size_t kernelSize = convolutionKernel->size();
     63     ASSERT(kernelSize <= m_inputBlockSize);
     64     if (kernelSize > m_inputBlockSize)
     65         return;
     66 
     67     float* kernelP = convolutionKernel->data();
     68 
     69     // Sanity check
     70     bool isCopyGood = kernelP && sourceP && destP && m_buffer.data();
     71     ASSERT(isCopyGood);
     72     if (!isCopyGood)
     73         return;
     74 
     75 #if USE(WEBAUDIO_IPP)
     76     float* outputBuffer = m_buffer.data();
     77     float* overlayBuffer = m_overlayBuffer.data();
     78     bool isCopyGood2 = overlayBuffer && m_overlayBuffer.size() >= kernelSize && m_buffer.size() == m_inputBlockSize * 2;
     79     ASSERT(isCopyGood2);
     80     if (!isCopyGood2)
     81         return;
     82 
     83     ippsConv_32f(static_cast<const Ipp32f*>(sourceP), framesToProcess, static_cast<Ipp32f*>(kernelP), kernelSize, static_cast<Ipp32f*>(outputBuffer));
     84 
     85     vadd(outputBuffer, 1, overlayBuffer, 1, destP, 1, framesToProcess);
     86     memcpy(overlayBuffer, outputBuffer + m_inputBlockSize, sizeof(float) * kernelSize);
     87 #else
     88     float* inputP = m_buffer.data() + m_inputBlockSize;
     89 
     90     // Copy samples to 2nd half of input buffer.
     91     memcpy(inputP, sourceP, sizeof(float) * framesToProcess);
     92 
     93 #if OS(MACOSX)
     94 #if CPU(X86)
     95     conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
     96 #else
     97     vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
     98 #endif // CPU(X86)
     99 #else
    100     // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES.
    101 #define CONVOLVE_ONE_SAMPLE                 \
    102     do {                                    \
    103         sum += inputP[i - j] * kernelP[j];  \
    104         j++;                                \
    105     } while (0)
    106 
    107     size_t i = 0;
    108     while (i < framesToProcess) {
    109         size_t j = 0;
    110         float sum = 0;
    111 
    112         // FIXME: SSE optimization may be applied here.
    113         if (kernelSize == 32) {
    114             CONVOLVE_ONE_SAMPLE; // 1
    115             CONVOLVE_ONE_SAMPLE; // 2
    116             CONVOLVE_ONE_SAMPLE; // 3
    117             CONVOLVE_ONE_SAMPLE; // 4
    118             CONVOLVE_ONE_SAMPLE; // 5
    119             CONVOLVE_ONE_SAMPLE; // 6
    120             CONVOLVE_ONE_SAMPLE; // 7
    121             CONVOLVE_ONE_SAMPLE; // 8
    122             CONVOLVE_ONE_SAMPLE; // 9
    123             CONVOLVE_ONE_SAMPLE; // 10
    124 
    125             CONVOLVE_ONE_SAMPLE; // 11
    126             CONVOLVE_ONE_SAMPLE; // 12
    127             CONVOLVE_ONE_SAMPLE; // 13
    128             CONVOLVE_ONE_SAMPLE; // 14
    129             CONVOLVE_ONE_SAMPLE; // 15
    130             CONVOLVE_ONE_SAMPLE; // 16
    131             CONVOLVE_ONE_SAMPLE; // 17
    132             CONVOLVE_ONE_SAMPLE; // 18
    133             CONVOLVE_ONE_SAMPLE; // 19
    134             CONVOLVE_ONE_SAMPLE; // 20
    135 
    136             CONVOLVE_ONE_SAMPLE; // 21
    137             CONVOLVE_ONE_SAMPLE; // 22
    138             CONVOLVE_ONE_SAMPLE; // 23
    139             CONVOLVE_ONE_SAMPLE; // 24
    140             CONVOLVE_ONE_SAMPLE; // 25
    141             CONVOLVE_ONE_SAMPLE; // 26
    142             CONVOLVE_ONE_SAMPLE; // 27
    143             CONVOLVE_ONE_SAMPLE; // 28
    144             CONVOLVE_ONE_SAMPLE; // 29
    145             CONVOLVE_ONE_SAMPLE; // 30
    146 
    147             CONVOLVE_ONE_SAMPLE; // 31
    148             CONVOLVE_ONE_SAMPLE; // 32
    149 
    150         } else if (kernelSize == 64) {
    151             CONVOLVE_ONE_SAMPLE; // 1
    152             CONVOLVE_ONE_SAMPLE; // 2
    153             CONVOLVE_ONE_SAMPLE; // 3
    154             CONVOLVE_ONE_SAMPLE; // 4
    155             CONVOLVE_ONE_SAMPLE; // 5
    156             CONVOLVE_ONE_SAMPLE; // 6
    157             CONVOLVE_ONE_SAMPLE; // 7
    158             CONVOLVE_ONE_SAMPLE; // 8
    159             CONVOLVE_ONE_SAMPLE; // 9
    160             CONVOLVE_ONE_SAMPLE; // 10
    161 
    162             CONVOLVE_ONE_SAMPLE; // 11
    163             CONVOLVE_ONE_SAMPLE; // 12
    164             CONVOLVE_ONE_SAMPLE; // 13
    165             CONVOLVE_ONE_SAMPLE; // 14
    166             CONVOLVE_ONE_SAMPLE; // 15
    167             CONVOLVE_ONE_SAMPLE; // 16
    168             CONVOLVE_ONE_SAMPLE; // 17
    169             CONVOLVE_ONE_SAMPLE; // 18
    170             CONVOLVE_ONE_SAMPLE; // 19
    171             CONVOLVE_ONE_SAMPLE; // 20
    172 
    173             CONVOLVE_ONE_SAMPLE; // 21
    174             CONVOLVE_ONE_SAMPLE; // 22
    175             CONVOLVE_ONE_SAMPLE; // 23
    176             CONVOLVE_ONE_SAMPLE; // 24
    177             CONVOLVE_ONE_SAMPLE; // 25
    178             CONVOLVE_ONE_SAMPLE; // 26
    179             CONVOLVE_ONE_SAMPLE; // 27
    180             CONVOLVE_ONE_SAMPLE; // 28
    181             CONVOLVE_ONE_SAMPLE; // 29
    182             CONVOLVE_ONE_SAMPLE; // 30
    183 
    184             CONVOLVE_ONE_SAMPLE; // 31
    185             CONVOLVE_ONE_SAMPLE; // 32
    186             CONVOLVE_ONE_SAMPLE; // 33
    187             CONVOLVE_ONE_SAMPLE; // 34
    188             CONVOLVE_ONE_SAMPLE; // 35
    189             CONVOLVE_ONE_SAMPLE; // 36
    190             CONVOLVE_ONE_SAMPLE; // 37
    191             CONVOLVE_ONE_SAMPLE; // 38
    192             CONVOLVE_ONE_SAMPLE; // 39
    193             CONVOLVE_ONE_SAMPLE; // 40
    194 
    195             CONVOLVE_ONE_SAMPLE; // 41
    196             CONVOLVE_ONE_SAMPLE; // 42
    197             CONVOLVE_ONE_SAMPLE; // 43
    198             CONVOLVE_ONE_SAMPLE; // 44
    199             CONVOLVE_ONE_SAMPLE; // 45
    200             CONVOLVE_ONE_SAMPLE; // 46
    201             CONVOLVE_ONE_SAMPLE; // 47
    202             CONVOLVE_ONE_SAMPLE; // 48
    203             CONVOLVE_ONE_SAMPLE; // 49
    204             CONVOLVE_ONE_SAMPLE; // 50
    205 
    206             CONVOLVE_ONE_SAMPLE; // 51
    207             CONVOLVE_ONE_SAMPLE; // 52
    208             CONVOLVE_ONE_SAMPLE; // 53
    209             CONVOLVE_ONE_SAMPLE; // 54
    210             CONVOLVE_ONE_SAMPLE; // 55
    211             CONVOLVE_ONE_SAMPLE; // 56
    212             CONVOLVE_ONE_SAMPLE; // 57
    213             CONVOLVE_ONE_SAMPLE; // 58
    214             CONVOLVE_ONE_SAMPLE; // 59
    215             CONVOLVE_ONE_SAMPLE; // 60
    216 
    217             CONVOLVE_ONE_SAMPLE; // 61
    218             CONVOLVE_ONE_SAMPLE; // 62
    219             CONVOLVE_ONE_SAMPLE; // 63
    220             CONVOLVE_ONE_SAMPLE; // 64
    221 
    222         } else if (kernelSize == 128) {
    223             CONVOLVE_ONE_SAMPLE; // 1
    224             CONVOLVE_ONE_SAMPLE; // 2
    225             CONVOLVE_ONE_SAMPLE; // 3
    226             CONVOLVE_ONE_SAMPLE; // 4
    227             CONVOLVE_ONE_SAMPLE; // 5
    228             CONVOLVE_ONE_SAMPLE; // 6
    229             CONVOLVE_ONE_SAMPLE; // 7
    230             CONVOLVE_ONE_SAMPLE; // 8
    231             CONVOLVE_ONE_SAMPLE; // 9
    232             CONVOLVE_ONE_SAMPLE; // 10
    233 
    234             CONVOLVE_ONE_SAMPLE; // 11
    235             CONVOLVE_ONE_SAMPLE; // 12
    236             CONVOLVE_ONE_SAMPLE; // 13
    237             CONVOLVE_ONE_SAMPLE; // 14
    238             CONVOLVE_ONE_SAMPLE; // 15
    239             CONVOLVE_ONE_SAMPLE; // 16
    240             CONVOLVE_ONE_SAMPLE; // 17
    241             CONVOLVE_ONE_SAMPLE; // 18
    242             CONVOLVE_ONE_SAMPLE; // 19
    243             CONVOLVE_ONE_SAMPLE; // 20
    244 
    245             CONVOLVE_ONE_SAMPLE; // 21
    246             CONVOLVE_ONE_SAMPLE; // 22
    247             CONVOLVE_ONE_SAMPLE; // 23
    248             CONVOLVE_ONE_SAMPLE; // 24
    249             CONVOLVE_ONE_SAMPLE; // 25
    250             CONVOLVE_ONE_SAMPLE; // 26
    251             CONVOLVE_ONE_SAMPLE; // 27
    252             CONVOLVE_ONE_SAMPLE; // 28
    253             CONVOLVE_ONE_SAMPLE; // 29
    254             CONVOLVE_ONE_SAMPLE; // 30
    255 
    256             CONVOLVE_ONE_SAMPLE; // 31
    257             CONVOLVE_ONE_SAMPLE; // 32
    258             CONVOLVE_ONE_SAMPLE; // 33
    259             CONVOLVE_ONE_SAMPLE; // 34
    260             CONVOLVE_ONE_SAMPLE; // 35
    261             CONVOLVE_ONE_SAMPLE; // 36
    262             CONVOLVE_ONE_SAMPLE; // 37
    263             CONVOLVE_ONE_SAMPLE; // 38
    264             CONVOLVE_ONE_SAMPLE; // 39
    265             CONVOLVE_ONE_SAMPLE; // 40
    266 
    267             CONVOLVE_ONE_SAMPLE; // 41
    268             CONVOLVE_ONE_SAMPLE; // 42
    269             CONVOLVE_ONE_SAMPLE; // 43
    270             CONVOLVE_ONE_SAMPLE; // 44
    271             CONVOLVE_ONE_SAMPLE; // 45
    272             CONVOLVE_ONE_SAMPLE; // 46
    273             CONVOLVE_ONE_SAMPLE; // 47
    274             CONVOLVE_ONE_SAMPLE; // 48
    275             CONVOLVE_ONE_SAMPLE; // 49
    276             CONVOLVE_ONE_SAMPLE; // 50
    277 
    278             CONVOLVE_ONE_SAMPLE; // 51
    279             CONVOLVE_ONE_SAMPLE; // 52
    280             CONVOLVE_ONE_SAMPLE; // 53
    281             CONVOLVE_ONE_SAMPLE; // 54
    282             CONVOLVE_ONE_SAMPLE; // 55
    283             CONVOLVE_ONE_SAMPLE; // 56
    284             CONVOLVE_ONE_SAMPLE; // 57
    285             CONVOLVE_ONE_SAMPLE; // 58
    286             CONVOLVE_ONE_SAMPLE; // 59
    287             CONVOLVE_ONE_SAMPLE; // 60
    288 
    289             CONVOLVE_ONE_SAMPLE; // 61
    290             CONVOLVE_ONE_SAMPLE; // 62
    291             CONVOLVE_ONE_SAMPLE; // 63
    292             CONVOLVE_ONE_SAMPLE; // 64
    293             CONVOLVE_ONE_SAMPLE; // 65
    294             CONVOLVE_ONE_SAMPLE; // 66
    295             CONVOLVE_ONE_SAMPLE; // 67
    296             CONVOLVE_ONE_SAMPLE; // 68
    297             CONVOLVE_ONE_SAMPLE; // 69
    298             CONVOLVE_ONE_SAMPLE; // 70
    299 
    300             CONVOLVE_ONE_SAMPLE; // 71
    301             CONVOLVE_ONE_SAMPLE; // 72
    302             CONVOLVE_ONE_SAMPLE; // 73
    303             CONVOLVE_ONE_SAMPLE; // 74
    304             CONVOLVE_ONE_SAMPLE; // 75
    305             CONVOLVE_ONE_SAMPLE; // 76
    306             CONVOLVE_ONE_SAMPLE; // 77
    307             CONVOLVE_ONE_SAMPLE; // 78
    308             CONVOLVE_ONE_SAMPLE; // 79
    309             CONVOLVE_ONE_SAMPLE; // 80
    310 
    311             CONVOLVE_ONE_SAMPLE; // 81
    312             CONVOLVE_ONE_SAMPLE; // 82
    313             CONVOLVE_ONE_SAMPLE; // 83
    314             CONVOLVE_ONE_SAMPLE; // 84
    315             CONVOLVE_ONE_SAMPLE; // 85
    316             CONVOLVE_ONE_SAMPLE; // 86
    317             CONVOLVE_ONE_SAMPLE; // 87
    318             CONVOLVE_ONE_SAMPLE; // 88
    319             CONVOLVE_ONE_SAMPLE; // 89
    320             CONVOLVE_ONE_SAMPLE; // 90
    321 
    322             CONVOLVE_ONE_SAMPLE; // 91
    323             CONVOLVE_ONE_SAMPLE; // 92
    324             CONVOLVE_ONE_SAMPLE; // 93
    325             CONVOLVE_ONE_SAMPLE; // 94
    326             CONVOLVE_ONE_SAMPLE; // 95
    327             CONVOLVE_ONE_SAMPLE; // 96
    328             CONVOLVE_ONE_SAMPLE; // 97
    329             CONVOLVE_ONE_SAMPLE; // 98
    330             CONVOLVE_ONE_SAMPLE; // 99
    331             CONVOLVE_ONE_SAMPLE; // 100
    332 
    333             CONVOLVE_ONE_SAMPLE; // 101
    334             CONVOLVE_ONE_SAMPLE; // 102
    335             CONVOLVE_ONE_SAMPLE; // 103
    336             CONVOLVE_ONE_SAMPLE; // 104
    337             CONVOLVE_ONE_SAMPLE; // 105
    338             CONVOLVE_ONE_SAMPLE; // 106
    339             CONVOLVE_ONE_SAMPLE; // 107
    340             CONVOLVE_ONE_SAMPLE; // 108
    341             CONVOLVE_ONE_SAMPLE; // 109
    342             CONVOLVE_ONE_SAMPLE; // 110
    343 
    344             CONVOLVE_ONE_SAMPLE; // 111
    345             CONVOLVE_ONE_SAMPLE; // 112
    346             CONVOLVE_ONE_SAMPLE; // 113
    347             CONVOLVE_ONE_SAMPLE; // 114
    348             CONVOLVE_ONE_SAMPLE; // 115
    349             CONVOLVE_ONE_SAMPLE; // 116
    350             CONVOLVE_ONE_SAMPLE; // 117
    351             CONVOLVE_ONE_SAMPLE; // 118
    352             CONVOLVE_ONE_SAMPLE; // 119
    353             CONVOLVE_ONE_SAMPLE; // 120
    354 
    355             CONVOLVE_ONE_SAMPLE; // 121
    356             CONVOLVE_ONE_SAMPLE; // 122
    357             CONVOLVE_ONE_SAMPLE; // 123
    358             CONVOLVE_ONE_SAMPLE; // 124
    359             CONVOLVE_ONE_SAMPLE; // 125
    360             CONVOLVE_ONE_SAMPLE; // 126
    361             CONVOLVE_ONE_SAMPLE; // 127
    362             CONVOLVE_ONE_SAMPLE; // 128
    363         } else {
    364             while (j < kernelSize) {
    365                 // Non-optimized using actual while loop.
    366                 CONVOLVE_ONE_SAMPLE;
    367             }
    368         }
    369         destP[i++] = sum;
    370     }
    371 #endif // OS(MACOSX)
    372 
    373     // Copy 2nd half of input buffer to 1st half.
    374     memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess);
    375 #endif
    376 }
    377 
    378 void DirectConvolver::reset()
    379 {
    380     m_buffer.zero();
    381 #if USE(WEBAUDIO_IPP)
    382     m_overlayBuffer.zero();
    383 #endif // USE(WEBAUDIO_IPP)
    384 }
    385 
    386 } // namespace WebCore
    387 
    388 #endif // ENABLE(WEB_AUDIO)
    389