1 /* 2 * Copyright (C) 2010, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #include "config.h" 26 27 #if ENABLE(WEB_AUDIO) 28 29 #include "core/platform/audio/HRTFPanner.h" 30 31 #include <algorithm> 32 #include "core/platform/audio/AudioBus.h" 33 #include "core/platform/audio/FFTConvolver.h" 34 #include "core/platform/audio/HRTFDatabase.h" 35 #include "wtf/MathExtras.h" 36 #include "wtf/RefPtr.h" 37 38 using namespace std; 39 40 namespace WebCore { 41 42 // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds). 43 // We ASSERT the delay values used in process() with this value. 44 const double MaxDelayTimeSeconds = 0.002; 45 46 const int UninitializedAzimuth = -1; 47 const unsigned RenderingQuantum = 128; 48 49 HRTFPanner::HRTFPanner(float sampleRate, HRTFDatabaseLoader* databaseLoader) 50 : Panner(PanningModelHRTF) 51 , m_databaseLoader(databaseLoader) 52 , m_sampleRate(sampleRate) 53 , m_crossfadeSelection(CrossfadeSelection1) 54 , m_azimuthIndex1(UninitializedAzimuth) 55 , m_elevation1(0) 56 , m_azimuthIndex2(UninitializedAzimuth) 57 , m_elevation2(0) 58 , m_crossfadeX(0) 59 , m_crossfadeIncr(0) 60 , m_convolverL1(fftSizeForSampleRate(sampleRate)) 61 , m_convolverR1(fftSizeForSampleRate(sampleRate)) 62 , m_convolverL2(fftSizeForSampleRate(sampleRate)) 63 , m_convolverR2(fftSizeForSampleRate(sampleRate)) 64 , m_delayLineL(MaxDelayTimeSeconds, sampleRate) 65 , m_delayLineR(MaxDelayTimeSeconds, sampleRate) 66 , m_tempL1(RenderingQuantum) 67 , m_tempR1(RenderingQuantum) 68 , m_tempL2(RenderingQuantum) 69 , m_tempR2(RenderingQuantum) 70 { 71 ASSERT(databaseLoader); 72 } 73 74 HRTFPanner::~HRTFPanner() 75 { 76 } 77 78 size_t HRTFPanner::fftSizeForSampleRate(float sampleRate) 79 { 80 // The HRTF impulse responses (loaded as audio resources) are 512 sample-frames @44.1KHz. 81 // Currently, we truncate the impulse responses to half this size, but an FFT-size of twice impulse response size is needed (for convolution). 82 // So for sample rates around 44.1KHz an FFT size of 512 is good. We double the FFT-size only for sample rates at least double this. 83 ASSERT(sampleRate >= 44100 && sampleRate <= 96000.0); 84 return (sampleRate < 88200.0) ? 512 : 1024; 85 } 86 87 void HRTFPanner::reset() 88 { 89 m_convolverL1.reset(); 90 m_convolverR1.reset(); 91 m_convolverL2.reset(); 92 m_convolverR2.reset(); 93 m_delayLineL.reset(); 94 m_delayLineR.reset(); 95 } 96 97 int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend) 98 { 99 // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360. 100 // The azimuth index may then be calculated from this positive value. 101 if (azimuth < 0) 102 azimuth += 360.0; 103 104 HRTFDatabase* database = m_databaseLoader->database(); 105 ASSERT(database); 106 107 int numberOfAzimuths = database->numberOfAzimuths(); 108 const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; 109 110 // Calculate the azimuth index and the blend (0 -> 1) for interpolation. 111 double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; 112 int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); 113 azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); 114 115 // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at. 116 // This minimizes the clicks and graininess for moving sources which occur otherwise. 117 desiredAzimuthIndex = max(0, desiredAzimuthIndex); 118 desiredAzimuthIndex = min(numberOfAzimuths - 1, desiredAzimuthIndex); 119 return desiredAzimuthIndex; 120 } 121 122 void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* inputBus, AudioBus* outputBus, size_t framesToProcess) 123 { 124 unsigned numInputChannels = inputBus ? inputBus->numberOfChannels() : 0; 125 126 bool isInputGood = inputBus && numInputChannels >= 1 && numInputChannels <= 2; 127 ASSERT(isInputGood); 128 129 bool isOutputGood = outputBus && outputBus->numberOfChannels() == 2 && framesToProcess <= outputBus->length(); 130 ASSERT(isOutputGood); 131 132 if (!isInputGood || !isOutputGood) { 133 if (outputBus) 134 outputBus->zero(); 135 return; 136 } 137 138 HRTFDatabase* database = m_databaseLoader->database(); 139 ASSERT(database); 140 if (!database) { 141 outputBus->zero(); 142 return; 143 } 144 145 // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth. 146 double azimuth = -desiredAzimuth; 147 148 bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; 149 ASSERT(isAzimuthGood); 150 if (!isAzimuthGood) { 151 outputBus->zero(); 152 return; 153 } 154 155 // Normally, we'll just be dealing with mono sources. 156 // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF. 157 const AudioChannel* inputChannelL = inputBus->channelByType(AudioBus::ChannelLeft); 158 const AudioChannel* inputChannelR = numInputChannels > 1 ? inputBus->channelByType(AudioBus::ChannelRight) : 0; 159 160 // Get source and destination pointers. 161 const float* sourceL = inputChannelL->data(); 162 const float* sourceR = numInputChannels > 1 ? inputChannelR->data() : sourceL; 163 float* destinationL = outputBus->channelByType(AudioBus::ChannelLeft)->mutableData(); 164 float* destinationR = outputBus->channelByType(AudioBus::ChannelRight)->mutableData(); 165 166 double azimuthBlend; 167 int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); 168 169 // Initially snap azimuth and elevation values to first values encountered. 170 if (m_azimuthIndex1 == UninitializedAzimuth) { 171 m_azimuthIndex1 = desiredAzimuthIndex; 172 m_elevation1 = elevation; 173 } 174 if (m_azimuthIndex2 == UninitializedAzimuth) { 175 m_azimuthIndex2 = desiredAzimuthIndex; 176 m_elevation2 = elevation; 177 } 178 179 // Cross-fade / transition over a period of around 45 milliseconds. 180 // This is an empirical value tuned to be a reasonable trade-off between 181 // smoothness and speed. 182 const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; 183 184 // Check for azimuth and elevation changes, initiating a cross-fade if needed. 185 if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { 186 if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { 187 // Cross-fade from 1 -> 2 188 m_crossfadeIncr = 1 / fadeFrames; 189 m_azimuthIndex2 = desiredAzimuthIndex; 190 m_elevation2 = elevation; 191 } 192 } 193 if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { 194 if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { 195 // Cross-fade from 2 -> 1 196 m_crossfadeIncr = -1 / fadeFrames; 197 m_azimuthIndex1 = desiredAzimuthIndex; 198 m_elevation1 = elevation; 199 } 200 } 201 202 // This algorithm currently requires that we process in power-of-two size chunks at least RenderingQuantum. 203 ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess); 204 ASSERT(framesToProcess >= RenderingQuantum); 205 206 const unsigned framesPerSegment = RenderingQuantum; 207 const unsigned numberOfSegments = framesToProcess / framesPerSegment; 208 209 for (unsigned segment = 0; segment < numberOfSegments; ++segment) { 210 // Get the HRTFKernels and interpolated delays. 211 HRTFKernel* kernelL1; 212 HRTFKernel* kernelR1; 213 HRTFKernel* kernelL2; 214 HRTFKernel* kernelR2; 215 double frameDelayL1; 216 double frameDelayR1; 217 double frameDelayL2; 218 double frameDelayR2; 219 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1); 220 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2); 221 222 bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; 223 ASSERT(areKernelsGood); 224 if (!areKernelsGood) { 225 outputBus->zero(); 226 return; 227 } 228 229 ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); 230 ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); 231 232 // Crossfade inter-aural delays based on transitions. 233 double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2; 234 double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2; 235 236 // Calculate the source and destination pointers for the current segment. 237 unsigned offset = segment * framesPerSegment; 238 const float* segmentSourceL = sourceL + offset; 239 const float* segmentSourceR = sourceR + offset; 240 float* segmentDestinationL = destinationL + offset; 241 float* segmentDestinationR = destinationR + offset; 242 243 // First run through delay lines for inter-aural time difference. 244 m_delayLineL.setDelayFrames(frameDelayL); 245 m_delayLineR.setDelayFrames(frameDelayR); 246 m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment); 247 m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment); 248 249 bool needsCrossfading = m_crossfadeIncr; 250 251 // Have the convolvers render directly to the final destination if we're not cross-fading. 252 float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.data() : segmentDestinationL; 253 float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.data() : segmentDestinationR; 254 float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.data() : segmentDestinationL; 255 float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.data() : segmentDestinationR; 256 257 // Now do the convolutions. 258 // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading. 259 260 if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { 261 m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL, convolutionDestinationL1, framesPerSegment); 262 m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR, convolutionDestinationR1, framesPerSegment); 263 } 264 265 if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { 266 m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL, convolutionDestinationL2, framesPerSegment); 267 m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR, convolutionDestinationR2, framesPerSegment); 268 } 269 270 if (needsCrossfading) { 271 // Apply linear cross-fade. 272 float x = m_crossfadeX; 273 float incr = m_crossfadeIncr; 274 for (unsigned i = 0; i < framesPerSegment; ++i) { 275 segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i]; 276 segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i]; 277 x += incr; 278 } 279 // Update cross-fade value from local. 280 m_crossfadeX = x; 281 282 if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { 283 // We've fully made the crossfade transition from 1 -> 2. 284 m_crossfadeSelection = CrossfadeSelection2; 285 m_crossfadeX = 1; 286 m_crossfadeIncr = 0; 287 } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { 288 // We've fully made the crossfade transition from 2 -> 1. 289 m_crossfadeSelection = CrossfadeSelection1; 290 m_crossfadeX = 0; 291 m_crossfadeIncr = 0; 292 } 293 } 294 } 295 } 296 297 double HRTFPanner::tailTime() const 298 { 299 // Because HRTFPanner is implemented with a DelayKernel and a FFTConvolver, the tailTime of the HRTFPanner 300 // is the sum of the tailTime of the DelayKernel and the tailTime of the FFTConvolver, which is MaxDelayTimeSeconds 301 // and fftSize() / 2, respectively. 302 return MaxDelayTimeSeconds + (fftSize() / 2) / static_cast<double>(sampleRate()); 303 } 304 305 double HRTFPanner::latencyTime() const 306 { 307 // The latency of a FFTConvolver is also fftSize() / 2, and is in addition to its tailTime of the 308 // same value. 309 return (fftSize() / 2) / static_cast<double>(sampleRate()); 310 } 311 312 } // namespace WebCore 313 314 #endif // ENABLE(WEB_AUDIO) 315