1 /* 2 * Copyright (C) 2010, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #include "config.h" 26 27 #if ENABLE(WEB_AUDIO) 28 29 #include "platform/audio/HRTFPanner.h" 30 31 #include <algorithm> 32 #include "platform/audio/AudioBus.h" 33 #include "platform/audio/AudioUtilities.h" 34 #include "platform/audio/HRTFDatabase.h" 35 #include "wtf/MathExtras.h" 36 #include "wtf/RefPtr.h" 37 38 namespace blink { 39 40 // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds). 41 // We ASSERT the delay values used in process() with this value. 42 const double MaxDelayTimeSeconds = 0.002; 43 44 const int UninitializedAzimuth = -1; 45 const unsigned RenderingQuantum = 128; 46 47 HRTFPanner::HRTFPanner(float sampleRate, HRTFDatabaseLoader* databaseLoader) 48 : Panner(PanningModelHRTF) 49 , m_databaseLoader(databaseLoader) 50 , m_sampleRate(sampleRate) 51 , m_crossfadeSelection(CrossfadeSelection1) 52 , m_azimuthIndex1(UninitializedAzimuth) 53 , m_elevation1(0) 54 , m_azimuthIndex2(UninitializedAzimuth) 55 , m_elevation2(0) 56 , m_crossfadeX(0) 57 , m_crossfadeIncr(0) 58 , m_convolverL1(fftSizeForSampleRate(sampleRate)) 59 , m_convolverR1(fftSizeForSampleRate(sampleRate)) 60 , m_convolverL2(fftSizeForSampleRate(sampleRate)) 61 , m_convolverR2(fftSizeForSampleRate(sampleRate)) 62 , m_delayLineL(MaxDelayTimeSeconds, sampleRate) 63 , m_delayLineR(MaxDelayTimeSeconds, sampleRate) 64 , m_tempL1(RenderingQuantum) 65 , m_tempR1(RenderingQuantum) 66 , m_tempL2(RenderingQuantum) 67 , m_tempR2(RenderingQuantum) 68 { 69 ASSERT(databaseLoader); 70 } 71 72 HRTFPanner::~HRTFPanner() 73 { 74 } 75 76 size_t HRTFPanner::fftSizeForSampleRate(float sampleRate) 77 { 78 // The HRTF impulse responses (loaded as audio resources) are 512 sample-frames @44.1KHz. 79 // Currently, we truncate the impulse responses to half this size, 80 // but an FFT-size of twice impulse response size is needed (for convolution). 81 // So for sample rates around 44.1KHz an FFT size of 512 is good. 82 // For different sample rates, the truncated response is resampled. 83 // The resampled length is used to compute the FFT size by choosing a power of two that is 84 // greater than or equal the resampled length. This power of two is doubled to get the actual FFT size. 85 86 ASSERT(AudioUtilities::isValidAudioBufferSampleRate(sampleRate)); 87 88 int truncatedImpulseLength = 256; 89 double sampleRateRatio = sampleRate / 44100; 90 double resampledLength = truncatedImpulseLength * sampleRateRatio; 91 92 return 2 * (1 << static_cast<unsigned>(log2(resampledLength))); 93 } 94 95 void HRTFPanner::reset() 96 { 97 m_convolverL1.reset(); 98 m_convolverR1.reset(); 99 m_convolverL2.reset(); 100 m_convolverR2.reset(); 101 m_delayLineL.reset(); 102 m_delayLineR.reset(); 103 } 104 105 int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend) 106 { 107 // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360. 108 // The azimuth index may then be calculated from this positive value. 109 if (azimuth < 0) 110 azimuth += 360.0; 111 112 HRTFDatabase* database = m_databaseLoader->database(); 113 ASSERT(database); 114 115 int numberOfAzimuths = database->numberOfAzimuths(); 116 const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; 117 118 // Calculate the azimuth index and the blend (0 -> 1) for interpolation. 119 double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; 120 int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); 121 azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); 122 123 // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at. 124 // This minimizes the clicks and graininess for moving sources which occur otherwise. 125 desiredAzimuthIndex = std::max(0, desiredAzimuthIndex); 126 desiredAzimuthIndex = std::min(numberOfAzimuths - 1, desiredAzimuthIndex); 127 return desiredAzimuthIndex; 128 } 129 130 void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* inputBus, AudioBus* outputBus, size_t framesToProcess) 131 { 132 unsigned numInputChannels = inputBus ? inputBus->numberOfChannels() : 0; 133 134 bool isInputGood = inputBus && numInputChannels >= 1 && numInputChannels <= 2; 135 ASSERT(isInputGood); 136 137 bool isOutputGood = outputBus && outputBus->numberOfChannels() == 2 && framesToProcess <= outputBus->length(); 138 ASSERT(isOutputGood); 139 140 if (!isInputGood || !isOutputGood) { 141 if (outputBus) 142 outputBus->zero(); 143 return; 144 } 145 146 HRTFDatabase* database = m_databaseLoader->database(); 147 ASSERT(database); 148 if (!database) { 149 outputBus->zero(); 150 return; 151 } 152 153 // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth. 154 double azimuth = -desiredAzimuth; 155 156 bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; 157 ASSERT(isAzimuthGood); 158 if (!isAzimuthGood) { 159 outputBus->zero(); 160 return; 161 } 162 163 // Normally, we'll just be dealing with mono sources. 164 // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF. 165 const AudioChannel* inputChannelL = inputBus->channelByType(AudioBus::ChannelLeft); 166 const AudioChannel* inputChannelR = numInputChannels > 1 ? inputBus->channelByType(AudioBus::ChannelRight) : 0; 167 168 // Get source and destination pointers. 169 const float* sourceL = inputChannelL->data(); 170 const float* sourceR = numInputChannels > 1 ? inputChannelR->data() : sourceL; 171 float* destinationL = outputBus->channelByType(AudioBus::ChannelLeft)->mutableData(); 172 float* destinationR = outputBus->channelByType(AudioBus::ChannelRight)->mutableData(); 173 174 double azimuthBlend; 175 int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); 176 177 // Initially snap azimuth and elevation values to first values encountered. 178 if (m_azimuthIndex1 == UninitializedAzimuth) { 179 m_azimuthIndex1 = desiredAzimuthIndex; 180 m_elevation1 = elevation; 181 } 182 if (m_azimuthIndex2 == UninitializedAzimuth) { 183 m_azimuthIndex2 = desiredAzimuthIndex; 184 m_elevation2 = elevation; 185 } 186 187 // Cross-fade / transition over a period of around 45 milliseconds. 188 // This is an empirical value tuned to be a reasonable trade-off between 189 // smoothness and speed. 190 const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; 191 192 // Check for azimuth and elevation changes, initiating a cross-fade if needed. 193 if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { 194 if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { 195 // Cross-fade from 1 -> 2 196 m_crossfadeIncr = 1 / fadeFrames; 197 m_azimuthIndex2 = desiredAzimuthIndex; 198 m_elevation2 = elevation; 199 } 200 } 201 if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { 202 if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { 203 // Cross-fade from 2 -> 1 204 m_crossfadeIncr = -1 / fadeFrames; 205 m_azimuthIndex1 = desiredAzimuthIndex; 206 m_elevation1 = elevation; 207 } 208 } 209 210 // This algorithm currently requires that we process in power-of-two size chunks at least RenderingQuantum. 211 ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess); 212 ASSERT(framesToProcess >= RenderingQuantum); 213 214 const unsigned framesPerSegment = RenderingQuantum; 215 const unsigned numberOfSegments = framesToProcess / framesPerSegment; 216 217 for (unsigned segment = 0; segment < numberOfSegments; ++segment) { 218 // Get the HRTFKernels and interpolated delays. 219 HRTFKernel* kernelL1; 220 HRTFKernel* kernelR1; 221 HRTFKernel* kernelL2; 222 HRTFKernel* kernelR2; 223 double frameDelayL1; 224 double frameDelayR1; 225 double frameDelayL2; 226 double frameDelayR2; 227 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1); 228 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2); 229 230 bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; 231 ASSERT(areKernelsGood); 232 if (!areKernelsGood) { 233 outputBus->zero(); 234 return; 235 } 236 237 ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); 238 ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); 239 240 // Crossfade inter-aural delays based on transitions. 241 double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2; 242 double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2; 243 244 // Calculate the source and destination pointers for the current segment. 245 unsigned offset = segment * framesPerSegment; 246 const float* segmentSourceL = sourceL + offset; 247 const float* segmentSourceR = sourceR + offset; 248 float* segmentDestinationL = destinationL + offset; 249 float* segmentDestinationR = destinationR + offset; 250 251 // First run through delay lines for inter-aural time difference. 252 m_delayLineL.setDelayFrames(frameDelayL); 253 m_delayLineR.setDelayFrames(frameDelayR); 254 m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment); 255 m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment); 256 257 bool needsCrossfading = m_crossfadeIncr; 258 259 // Have the convolvers render directly to the final destination if we're not cross-fading. 260 float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.data() : segmentDestinationL; 261 float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.data() : segmentDestinationR; 262 float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.data() : segmentDestinationL; 263 float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.data() : segmentDestinationR; 264 265 // Now do the convolutions. 266 // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading. 267 268 if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { 269 m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL, convolutionDestinationL1, framesPerSegment); 270 m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR, convolutionDestinationR1, framesPerSegment); 271 } 272 273 if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { 274 m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL, convolutionDestinationL2, framesPerSegment); 275 m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR, convolutionDestinationR2, framesPerSegment); 276 } 277 278 if (needsCrossfading) { 279 // Apply linear cross-fade. 280 float x = m_crossfadeX; 281 float incr = m_crossfadeIncr; 282 for (unsigned i = 0; i < framesPerSegment; ++i) { 283 segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i]; 284 segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i]; 285 x += incr; 286 } 287 // Update cross-fade value from local. 288 m_crossfadeX = x; 289 290 if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { 291 // We've fully made the crossfade transition from 1 -> 2. 292 m_crossfadeSelection = CrossfadeSelection2; 293 m_crossfadeX = 1; 294 m_crossfadeIncr = 0; 295 } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { 296 // We've fully made the crossfade transition from 2 -> 1. 297 m_crossfadeSelection = CrossfadeSelection1; 298 m_crossfadeX = 0; 299 m_crossfadeIncr = 0; 300 } 301 } 302 } 303 } 304 305 double HRTFPanner::tailTime() const 306 { 307 // Because HRTFPanner is implemented with a DelayKernel and a FFTConvolver, the tailTime of the HRTFPanner 308 // is the sum of the tailTime of the DelayKernel and the tailTime of the FFTConvolver, which is MaxDelayTimeSeconds 309 // and fftSize() / 2, respectively. 310 return MaxDelayTimeSeconds + (fftSize() / 2) / static_cast<double>(sampleRate()); 311 } 312 313 double HRTFPanner::latencyTime() const 314 { 315 // The latency of a FFTConvolver is also fftSize() / 2, and is in addition to its tailTime of the 316 // same value. 317 return (fftSize() / 2) / static_cast<double>(sampleRate()); 318 } 319 320 void HRTFPanner::trace(Visitor* visitor) 321 { 322 visitor->trace(m_databaseLoader); 323 Panner::trace(visitor); 324 } 325 326 } // namespace blink 327 328 #endif // ENABLE(WEB_AUDIO) 329