1 /* 2 * Copyright (C) 2012 Intel Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "config.h" 30 31 #if ENABLE(WEB_AUDIO) 32 33 #include "platform/audio/DirectConvolver.h" 34 35 #if OS(MACOSX) 36 #include <Accelerate/Accelerate.h> 37 #endif 38 39 #include "platform/audio/VectorMath.h" 40 #include "wtf/CPU.h" 41 42 namespace WebCore { 43 44 using namespace VectorMath; 45 46 DirectConvolver::DirectConvolver(size_t inputBlockSize) 47 : m_inputBlockSize(inputBlockSize) 48 #if USE(WEBAUDIO_IPP) 49 , m_overlayBuffer(inputBlockSize) 50 #endif // USE(WEBAUDIO_IPP) 51 , m_buffer(inputBlockSize * 2) 52 { 53 } 54 55 void DirectConvolver::process(AudioFloatArray* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess) 56 { 57 ASSERT(framesToProcess == m_inputBlockSize); 58 if (framesToProcess != m_inputBlockSize) 59 return; 60 61 // Only support kernelSize <= m_inputBlockSize 62 size_t kernelSize = convolutionKernel->size(); 63 ASSERT(kernelSize <= m_inputBlockSize); 64 if (kernelSize > m_inputBlockSize) 65 return; 66 67 float* kernelP = convolutionKernel->data(); 68 69 // Sanity check 70 bool isCopyGood = kernelP && sourceP && destP && m_buffer.data(); 71 ASSERT(isCopyGood); 72 if (!isCopyGood) 73 return; 74 75 #if USE(WEBAUDIO_IPP) 76 float* outputBuffer = m_buffer.data(); 77 float* overlayBuffer = m_overlayBuffer.data(); 78 bool isCopyGood2 = overlayBuffer && m_overlayBuffer.size() >= kernelSize && m_buffer.size() == m_inputBlockSize * 2; 79 ASSERT(isCopyGood2); 80 if (!isCopyGood2) 81 return; 82 83 ippsConv_32f(static_cast<const Ipp32f*>(sourceP), framesToProcess, static_cast<Ipp32f*>(kernelP), kernelSize, static_cast<Ipp32f*>(outputBuffer)); 84 85 vadd(outputBuffer, 1, overlayBuffer, 1, destP, 1, framesToProcess); 86 memcpy(overlayBuffer, outputBuffer + m_inputBlockSize, sizeof(float) * kernelSize); 87 #else 88 float* inputP = m_buffer.data() + m_inputBlockSize; 89 90 // Copy samples to 2nd half of input buffer. 91 memcpy(inputP, sourceP, sizeof(float) * framesToProcess); 92 93 #if OS(MACOSX) 94 #if CPU(X86) 95 conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize); 96 #else 97 vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize); 98 #endif // CPU(X86) 99 #else 100 // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES. 101 #define CONVOLVE_ONE_SAMPLE \ 102 do { \ 103 sum += inputP[i - j] * kernelP[j]; \ 104 j++; \ 105 } while (0) 106 107 size_t i = 0; 108 while (i < framesToProcess) { 109 size_t j = 0; 110 float sum = 0; 111 112 // FIXME: SSE optimization may be applied here. 113 if (kernelSize == 32) { 114 CONVOLVE_ONE_SAMPLE; // 1 115 CONVOLVE_ONE_SAMPLE; // 2 116 CONVOLVE_ONE_SAMPLE; // 3 117 CONVOLVE_ONE_SAMPLE; // 4 118 CONVOLVE_ONE_SAMPLE; // 5 119 CONVOLVE_ONE_SAMPLE; // 6 120 CONVOLVE_ONE_SAMPLE; // 7 121 CONVOLVE_ONE_SAMPLE; // 8 122 CONVOLVE_ONE_SAMPLE; // 9 123 CONVOLVE_ONE_SAMPLE; // 10 124 125 CONVOLVE_ONE_SAMPLE; // 11 126 CONVOLVE_ONE_SAMPLE; // 12 127 CONVOLVE_ONE_SAMPLE; // 13 128 CONVOLVE_ONE_SAMPLE; // 14 129 CONVOLVE_ONE_SAMPLE; // 15 130 CONVOLVE_ONE_SAMPLE; // 16 131 CONVOLVE_ONE_SAMPLE; // 17 132 CONVOLVE_ONE_SAMPLE; // 18 133 CONVOLVE_ONE_SAMPLE; // 19 134 CONVOLVE_ONE_SAMPLE; // 20 135 136 CONVOLVE_ONE_SAMPLE; // 21 137 CONVOLVE_ONE_SAMPLE; // 22 138 CONVOLVE_ONE_SAMPLE; // 23 139 CONVOLVE_ONE_SAMPLE; // 24 140 CONVOLVE_ONE_SAMPLE; // 25 141 CONVOLVE_ONE_SAMPLE; // 26 142 CONVOLVE_ONE_SAMPLE; // 27 143 CONVOLVE_ONE_SAMPLE; // 28 144 CONVOLVE_ONE_SAMPLE; // 29 145 CONVOLVE_ONE_SAMPLE; // 30 146 147 CONVOLVE_ONE_SAMPLE; // 31 148 CONVOLVE_ONE_SAMPLE; // 32 149 150 } else if (kernelSize == 64) { 151 CONVOLVE_ONE_SAMPLE; // 1 152 CONVOLVE_ONE_SAMPLE; // 2 153 CONVOLVE_ONE_SAMPLE; // 3 154 CONVOLVE_ONE_SAMPLE; // 4 155 CONVOLVE_ONE_SAMPLE; // 5 156 CONVOLVE_ONE_SAMPLE; // 6 157 CONVOLVE_ONE_SAMPLE; // 7 158 CONVOLVE_ONE_SAMPLE; // 8 159 CONVOLVE_ONE_SAMPLE; // 9 160 CONVOLVE_ONE_SAMPLE; // 10 161 162 CONVOLVE_ONE_SAMPLE; // 11 163 CONVOLVE_ONE_SAMPLE; // 12 164 CONVOLVE_ONE_SAMPLE; // 13 165 CONVOLVE_ONE_SAMPLE; // 14 166 CONVOLVE_ONE_SAMPLE; // 15 167 CONVOLVE_ONE_SAMPLE; // 16 168 CONVOLVE_ONE_SAMPLE; // 17 169 CONVOLVE_ONE_SAMPLE; // 18 170 CONVOLVE_ONE_SAMPLE; // 19 171 CONVOLVE_ONE_SAMPLE; // 20 172 173 CONVOLVE_ONE_SAMPLE; // 21 174 CONVOLVE_ONE_SAMPLE; // 22 175 CONVOLVE_ONE_SAMPLE; // 23 176 CONVOLVE_ONE_SAMPLE; // 24 177 CONVOLVE_ONE_SAMPLE; // 25 178 CONVOLVE_ONE_SAMPLE; // 26 179 CONVOLVE_ONE_SAMPLE; // 27 180 CONVOLVE_ONE_SAMPLE; // 28 181 CONVOLVE_ONE_SAMPLE; // 29 182 CONVOLVE_ONE_SAMPLE; // 30 183 184 CONVOLVE_ONE_SAMPLE; // 31 185 CONVOLVE_ONE_SAMPLE; // 32 186 CONVOLVE_ONE_SAMPLE; // 33 187 CONVOLVE_ONE_SAMPLE; // 34 188 CONVOLVE_ONE_SAMPLE; // 35 189 CONVOLVE_ONE_SAMPLE; // 36 190 CONVOLVE_ONE_SAMPLE; // 37 191 CONVOLVE_ONE_SAMPLE; // 38 192 CONVOLVE_ONE_SAMPLE; // 39 193 CONVOLVE_ONE_SAMPLE; // 40 194 195 CONVOLVE_ONE_SAMPLE; // 41 196 CONVOLVE_ONE_SAMPLE; // 42 197 CONVOLVE_ONE_SAMPLE; // 43 198 CONVOLVE_ONE_SAMPLE; // 44 199 CONVOLVE_ONE_SAMPLE; // 45 200 CONVOLVE_ONE_SAMPLE; // 46 201 CONVOLVE_ONE_SAMPLE; // 47 202 CONVOLVE_ONE_SAMPLE; // 48 203 CONVOLVE_ONE_SAMPLE; // 49 204 CONVOLVE_ONE_SAMPLE; // 50 205 206 CONVOLVE_ONE_SAMPLE; // 51 207 CONVOLVE_ONE_SAMPLE; // 52 208 CONVOLVE_ONE_SAMPLE; // 53 209 CONVOLVE_ONE_SAMPLE; // 54 210 CONVOLVE_ONE_SAMPLE; // 55 211 CONVOLVE_ONE_SAMPLE; // 56 212 CONVOLVE_ONE_SAMPLE; // 57 213 CONVOLVE_ONE_SAMPLE; // 58 214 CONVOLVE_ONE_SAMPLE; // 59 215 CONVOLVE_ONE_SAMPLE; // 60 216 217 CONVOLVE_ONE_SAMPLE; // 61 218 CONVOLVE_ONE_SAMPLE; // 62 219 CONVOLVE_ONE_SAMPLE; // 63 220 CONVOLVE_ONE_SAMPLE; // 64 221 222 } else if (kernelSize == 128) { 223 CONVOLVE_ONE_SAMPLE; // 1 224 CONVOLVE_ONE_SAMPLE; // 2 225 CONVOLVE_ONE_SAMPLE; // 3 226 CONVOLVE_ONE_SAMPLE; // 4 227 CONVOLVE_ONE_SAMPLE; // 5 228 CONVOLVE_ONE_SAMPLE; // 6 229 CONVOLVE_ONE_SAMPLE; // 7 230 CONVOLVE_ONE_SAMPLE; // 8 231 CONVOLVE_ONE_SAMPLE; // 9 232 CONVOLVE_ONE_SAMPLE; // 10 233 234 CONVOLVE_ONE_SAMPLE; // 11 235 CONVOLVE_ONE_SAMPLE; // 12 236 CONVOLVE_ONE_SAMPLE; // 13 237 CONVOLVE_ONE_SAMPLE; // 14 238 CONVOLVE_ONE_SAMPLE; // 15 239 CONVOLVE_ONE_SAMPLE; // 16 240 CONVOLVE_ONE_SAMPLE; // 17 241 CONVOLVE_ONE_SAMPLE; // 18 242 CONVOLVE_ONE_SAMPLE; // 19 243 CONVOLVE_ONE_SAMPLE; // 20 244 245 CONVOLVE_ONE_SAMPLE; // 21 246 CONVOLVE_ONE_SAMPLE; // 22 247 CONVOLVE_ONE_SAMPLE; // 23 248 CONVOLVE_ONE_SAMPLE; // 24 249 CONVOLVE_ONE_SAMPLE; // 25 250 CONVOLVE_ONE_SAMPLE; // 26 251 CONVOLVE_ONE_SAMPLE; // 27 252 CONVOLVE_ONE_SAMPLE; // 28 253 CONVOLVE_ONE_SAMPLE; // 29 254 CONVOLVE_ONE_SAMPLE; // 30 255 256 CONVOLVE_ONE_SAMPLE; // 31 257 CONVOLVE_ONE_SAMPLE; // 32 258 CONVOLVE_ONE_SAMPLE; // 33 259 CONVOLVE_ONE_SAMPLE; // 34 260 CONVOLVE_ONE_SAMPLE; // 35 261 CONVOLVE_ONE_SAMPLE; // 36 262 CONVOLVE_ONE_SAMPLE; // 37 263 CONVOLVE_ONE_SAMPLE; // 38 264 CONVOLVE_ONE_SAMPLE; // 39 265 CONVOLVE_ONE_SAMPLE; // 40 266 267 CONVOLVE_ONE_SAMPLE; // 41 268 CONVOLVE_ONE_SAMPLE; // 42 269 CONVOLVE_ONE_SAMPLE; // 43 270 CONVOLVE_ONE_SAMPLE; // 44 271 CONVOLVE_ONE_SAMPLE; // 45 272 CONVOLVE_ONE_SAMPLE; // 46 273 CONVOLVE_ONE_SAMPLE; // 47 274 CONVOLVE_ONE_SAMPLE; // 48 275 CONVOLVE_ONE_SAMPLE; // 49 276 CONVOLVE_ONE_SAMPLE; // 50 277 278 CONVOLVE_ONE_SAMPLE; // 51 279 CONVOLVE_ONE_SAMPLE; // 52 280 CONVOLVE_ONE_SAMPLE; // 53 281 CONVOLVE_ONE_SAMPLE; // 54 282 CONVOLVE_ONE_SAMPLE; // 55 283 CONVOLVE_ONE_SAMPLE; // 56 284 CONVOLVE_ONE_SAMPLE; // 57 285 CONVOLVE_ONE_SAMPLE; // 58 286 CONVOLVE_ONE_SAMPLE; // 59 287 CONVOLVE_ONE_SAMPLE; // 60 288 289 CONVOLVE_ONE_SAMPLE; // 61 290 CONVOLVE_ONE_SAMPLE; // 62 291 CONVOLVE_ONE_SAMPLE; // 63 292 CONVOLVE_ONE_SAMPLE; // 64 293 CONVOLVE_ONE_SAMPLE; // 65 294 CONVOLVE_ONE_SAMPLE; // 66 295 CONVOLVE_ONE_SAMPLE; // 67 296 CONVOLVE_ONE_SAMPLE; // 68 297 CONVOLVE_ONE_SAMPLE; // 69 298 CONVOLVE_ONE_SAMPLE; // 70 299 300 CONVOLVE_ONE_SAMPLE; // 71 301 CONVOLVE_ONE_SAMPLE; // 72 302 CONVOLVE_ONE_SAMPLE; // 73 303 CONVOLVE_ONE_SAMPLE; // 74 304 CONVOLVE_ONE_SAMPLE; // 75 305 CONVOLVE_ONE_SAMPLE; // 76 306 CONVOLVE_ONE_SAMPLE; // 77 307 CONVOLVE_ONE_SAMPLE; // 78 308 CONVOLVE_ONE_SAMPLE; // 79 309 CONVOLVE_ONE_SAMPLE; // 80 310 311 CONVOLVE_ONE_SAMPLE; // 81 312 CONVOLVE_ONE_SAMPLE; // 82 313 CONVOLVE_ONE_SAMPLE; // 83 314 CONVOLVE_ONE_SAMPLE; // 84 315 CONVOLVE_ONE_SAMPLE; // 85 316 CONVOLVE_ONE_SAMPLE; // 86 317 CONVOLVE_ONE_SAMPLE; // 87 318 CONVOLVE_ONE_SAMPLE; // 88 319 CONVOLVE_ONE_SAMPLE; // 89 320 CONVOLVE_ONE_SAMPLE; // 90 321 322 CONVOLVE_ONE_SAMPLE; // 91 323 CONVOLVE_ONE_SAMPLE; // 92 324 CONVOLVE_ONE_SAMPLE; // 93 325 CONVOLVE_ONE_SAMPLE; // 94 326 CONVOLVE_ONE_SAMPLE; // 95 327 CONVOLVE_ONE_SAMPLE; // 96 328 CONVOLVE_ONE_SAMPLE; // 97 329 CONVOLVE_ONE_SAMPLE; // 98 330 CONVOLVE_ONE_SAMPLE; // 99 331 CONVOLVE_ONE_SAMPLE; // 100 332 333 CONVOLVE_ONE_SAMPLE; // 101 334 CONVOLVE_ONE_SAMPLE; // 102 335 CONVOLVE_ONE_SAMPLE; // 103 336 CONVOLVE_ONE_SAMPLE; // 104 337 CONVOLVE_ONE_SAMPLE; // 105 338 CONVOLVE_ONE_SAMPLE; // 106 339 CONVOLVE_ONE_SAMPLE; // 107 340 CONVOLVE_ONE_SAMPLE; // 108 341 CONVOLVE_ONE_SAMPLE; // 109 342 CONVOLVE_ONE_SAMPLE; // 110 343 344 CONVOLVE_ONE_SAMPLE; // 111 345 CONVOLVE_ONE_SAMPLE; // 112 346 CONVOLVE_ONE_SAMPLE; // 113 347 CONVOLVE_ONE_SAMPLE; // 114 348 CONVOLVE_ONE_SAMPLE; // 115 349 CONVOLVE_ONE_SAMPLE; // 116 350 CONVOLVE_ONE_SAMPLE; // 117 351 CONVOLVE_ONE_SAMPLE; // 118 352 CONVOLVE_ONE_SAMPLE; // 119 353 CONVOLVE_ONE_SAMPLE; // 120 354 355 CONVOLVE_ONE_SAMPLE; // 121 356 CONVOLVE_ONE_SAMPLE; // 122 357 CONVOLVE_ONE_SAMPLE; // 123 358 CONVOLVE_ONE_SAMPLE; // 124 359 CONVOLVE_ONE_SAMPLE; // 125 360 CONVOLVE_ONE_SAMPLE; // 126 361 CONVOLVE_ONE_SAMPLE; // 127 362 CONVOLVE_ONE_SAMPLE; // 128 363 } else { 364 while (j < kernelSize) { 365 // Non-optimized using actual while loop. 366 CONVOLVE_ONE_SAMPLE; 367 } 368 } 369 destP[i++] = sum; 370 } 371 #endif // OS(MACOSX) 372 373 // Copy 2nd half of input buffer to 1st half. 374 memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess); 375 #endif 376 } 377 378 void DirectConvolver::reset() 379 { 380 m_buffer.zero(); 381 #if USE(WEBAUDIO_IPP) 382 m_overlayBuffer.zero(); 383 #endif // USE(WEBAUDIO_IPP) 384 } 385 386 } // namespace WebCore 387 388 #endif // ENABLE(WEB_AUDIO) 389