1 /* 2 * Copyright (C) 2010 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 // FFTFrame implementation using Intel's Math Kernel Library (MKL), 27 // suitable for use on Windows and Linux. 28 29 #include "config.h" 30 31 #if ENABLE(WEB_AUDIO) 32 33 #if !OS(DARWIN) && USE(WEBAUDIO_MKL) 34 35 #include "FFTFrame.h" 36 37 #include "mkl_vml.h" 38 #include <wtf/MathExtras.h> 39 40 namespace { 41 42 DFTI_DESCRIPTOR_HANDLE createDescriptorHandle(int fftSize) 43 { 44 DFTI_DESCRIPTOR_HANDLE handle = 0; 45 46 // Create DFTI descriptor for 1D single precision transform. 47 MKL_LONG status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, fftSize); 48 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 49 50 // Set placement of result to DFTI_NOT_INPLACE. 51 status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE); 52 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 53 54 // Set packing format to PERM; this produces the layout which 55 // matches Accelerate.framework's on the Mac, though interleaved. 56 status = DftiSetValue(handle, DFTI_PACKED_FORMAT, DFTI_PERM_FORMAT); 57 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 58 59 // Set the forward scale factor to 2 to match Accelerate.framework's. 60 // FIXME: FFTFrameMac's scaling factor could be fixed to be 1.0, 61 // in which case this code would need to be changed as well. 62 status = DftiSetValue(handle, DFTI_FORWARD_SCALE, 2.0); 63 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 64 65 // Set the backward scale factor to 1 / 2n to match Accelerate.framework's. 66 // FIXME: if the above scaling factor is fixed then this needs to be as well. 67 double scale = 1.0 / (2.0 * fftSize); 68 status = DftiSetValue(handle, DFTI_BACKWARD_SCALE, scale); 69 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 70 71 // Use the default DFTI_CONJUGATE_EVEN_STORAGE = DFTI_COMPLEX_REAL. 72 73 // Commit DFTI descriptor. 74 status = DftiCommitDescriptor(handle); 75 ASSERT(DftiErrorClass(status, DFTI_NO_ERROR)); 76 77 return handle; 78 } 79 80 } // anonymous namespace 81 82 namespace WebCore { 83 84 const int kMaxFFTPow2Size = 24; 85 86 DFTI_DESCRIPTOR_HANDLE* FFTFrame::descriptorHandles = 0; 87 88 // Normal constructor: allocates for a given fftSize. 89 FFTFrame::FFTFrame(unsigned fftSize) 90 : m_FFTSize(fftSize) 91 , m_log2FFTSize(static_cast<unsigned>(log2(fftSize))) 92 , m_handle(0) 93 , m_complexData(fftSize) 94 , m_realData(fftSize / 2) 95 , m_imagData(fftSize / 2) 96 { 97 // We only allow power of two. 98 ASSERT(1UL << m_log2FFTSize == m_FFTSize); 99 100 m_handle = descriptorHandleForSize(fftSize); 101 } 102 103 // Creates a blank/empty frame (interpolate() must later be called). 104 FFTFrame::FFTFrame() 105 : m_FFTSize(0) 106 , m_log2FFTSize(0) 107 , m_handle(0) 108 { 109 } 110 111 // Copy constructor. 112 FFTFrame::FFTFrame(const FFTFrame& frame) 113 : m_FFTSize(frame.m_FFTSize) 114 , m_log2FFTSize(frame.m_log2FFTSize) 115 , m_handle(0) 116 , m_complexData(frame.m_FFTSize) 117 , m_realData(frame.m_FFTSize / 2) 118 , m_imagData(frame.m_FFTSize / 2) 119 { 120 m_handle = descriptorHandleForSize(m_FFTSize); 121 122 // Copy/setup frame data. 123 unsigned nbytes = sizeof(float) * (m_FFTSize / 2); 124 memcpy(realData(), frame.realData(), nbytes); 125 memcpy(imagData(), frame.imagData(), nbytes); 126 } 127 128 FFTFrame::~FFTFrame() 129 { 130 } 131 132 void FFTFrame::multiply(const FFTFrame& frame) 133 { 134 FFTFrame& frame1 = *this; 135 FFTFrame& frame2 = const_cast<FFTFrame&>(frame); 136 137 float* realP1 = frame1.realData(); 138 float* imagP1 = frame1.imagData(); 139 const float* realP2 = frame2.realData(); 140 const float* imagP2 = frame2.imagData(); 141 142 // Scale accounts for vecLib's peculiar scaling. 143 // This ensures the right scaling all the way back to inverse FFT. 144 // FIXME: this scaling factor will be 1.0f if the above 2.0 -> 1.0 145 // scaling factor is fixed. 146 float scale = 0.5f; 147 148 // Multiply packed DC/nyquist component. 149 realP1[0] *= scale * realP2[0]; 150 imagP1[0] *= scale * imagP2[0]; 151 152 // Multiply the rest, skipping packed DC/Nyquist components. 153 float* interleavedData1 = frame1.getUpToDateComplexData(); 154 float* interleavedData2 = frame2.getUpToDateComplexData(); 155 156 unsigned halfSize = m_FFTSize / 2; 157 158 // Complex multiply. 159 vcMul(halfSize - 1, 160 reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1, 161 reinterpret_cast<MKL_Complex8*>(interleavedData2) + 1, 162 reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1); 163 164 // De-interleave and scale the rest of the data. 165 // FIXME: find an MKL routine to do at least the scaling more efficiently. 166 for (unsigned i = 1; i < halfSize; ++i) { 167 int baseComplexIndex = 2 * i; 168 realP1[i] = scale * interleavedData1[baseComplexIndex]; 169 imagP1[i] = scale * interleavedData1[baseComplexIndex + 1]; 170 } 171 } 172 173 void FFTFrame::doFFT(float* data) 174 { 175 // Compute Forward transform. 176 MKL_LONG status = DftiComputeForward(m_handle, data, m_complexData.data()); 177 ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR)); 178 179 // De-interleave to separate real and complex arrays. FIXME: 180 // figure out if it's possible to get MKL to use split-complex 181 // form for 1D real-to-complex out-of-place FFTs. 182 int len = m_FFTSize / 2; 183 for (int i = 0; i < len; ++i) { 184 int baseComplexIndex = 2 * i; 185 // m_realData[0] is the DC component and m_imagData[0] the 186 // Nyquist component since the interleaved complex data is 187 // packed. 188 m_realData[i] = m_complexData[baseComplexIndex]; 189 m_imagData[i] = m_complexData[baseComplexIndex + 1]; 190 } 191 } 192 193 void FFTFrame::doInverseFFT(float* data) 194 { 195 // Prepare interleaved data. FIXME: figure out if it's possible to 196 // get MKL to use split-complex form for 1D backward 197 // (complex-to-real) out-of-place FFTs. 198 float* interleavedData = getUpToDateComplexData(); 199 200 // Compute backward transform. 201 MKL_LONG status = DftiComputeBackward(m_handle, interleavedData, data); 202 ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR)); 203 } 204 205 void FFTFrame::initialize() 206 { 207 } 208 209 void FFTFrame::cleanup() 210 { 211 if (!descriptorHandles) 212 return; 213 214 for (int i = 0; i < kMaxFFTPow2Size; ++i) { 215 if (descriptorHandles[i]) { 216 MKL_LONG status = DftiFreeDescriptor(&descriptorHandles[i]); 217 ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR)); 218 } 219 } 220 221 delete[] descriptorHandles; 222 descriptorHandles = 0; 223 } 224 225 float* FFTFrame::realData() const 226 { 227 return const_cast<float*>(m_realData.data()); 228 } 229 230 float* FFTFrame::imagData() const 231 { 232 return const_cast<float*>(m_imagData.data()); 233 } 234 235 float* FFTFrame::getUpToDateComplexData() 236 { 237 // FIXME: if we can't completely get rid of this method, SSE 238 // optimization could be considered if it shows up hot on profiles. 239 int len = m_FFTSize / 2; 240 for (int i = 0; i < len; ++i) { 241 int baseComplexIndex = 2 * i; 242 m_complexData[baseComplexIndex] = m_realData[i]; 243 m_complexData[baseComplexIndex + 1] = m_imagData[i]; 244 } 245 return const_cast<float*>(m_complexData.data()); 246 } 247 248 DFTI_DESCRIPTOR_HANDLE FFTFrame::descriptorHandleForSize(unsigned fftSize) 249 { 250 if (!descriptorHandles) { 251 descriptorHandles = new DFTI_DESCRIPTOR_HANDLE[kMaxFFTPow2Size]; 252 for (int i = 0; i < kMaxFFTPow2Size; ++i) 253 descriptorHandles[i] = 0; 254 } 255 256 ASSERT(fftSize); 257 int pow2size = static_cast<int>(log2(fftSize)); 258 ASSERT(pow2size < kMaxFFTPow2Size); 259 if (!descriptorHandles[pow2size]) 260 descriptorHandles[pow2size] = createDescriptorHandle(fftSize); 261 return descriptorHandles[pow2size]; 262 } 263 264 } // namespace WebCore 265 266 #endif // !OS(DARWIN) && USE(WEBAUDIO_MKL) 267 268 #endif // ENABLE(WEB_AUDIO) 269