Home | History | Annotate | Download | only in mkl
      1 /*
      2  * Copyright (C) 2010 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     17  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     20  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     21  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 // FFTFrame implementation using Intel's Math Kernel Library (MKL),
     27 // suitable for use on Windows and Linux.
     28 
     29 #include "config.h"
     30 
     31 #if ENABLE(WEB_AUDIO)
     32 
     33 #if !OS(DARWIN) && USE(WEBAUDIO_MKL)
     34 
     35 #include "FFTFrame.h"
     36 
     37 #include "mkl_vml.h"
     38 #include <wtf/MathExtras.h>
     39 
     40 namespace {
     41 
     42 DFTI_DESCRIPTOR_HANDLE createDescriptorHandle(int fftSize)
     43 {
     44     DFTI_DESCRIPTOR_HANDLE handle = 0;
     45 
     46     // Create DFTI descriptor for 1D single precision transform.
     47     MKL_LONG status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, fftSize);
     48     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     49 
     50     // Set placement of result to DFTI_NOT_INPLACE.
     51     status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
     52     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     53 
     54     // Set packing format to PERM; this produces the layout which
     55     // matches Accelerate.framework's on the Mac, though interleaved.
     56     status = DftiSetValue(handle, DFTI_PACKED_FORMAT, DFTI_PERM_FORMAT);
     57     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     58 
     59     // Set the forward scale factor to 2 to match Accelerate.framework's.
     60     // FIXME: FFTFrameMac's scaling factor could be fixed to be 1.0,
     61     // in which case this code would need to be changed as well.
     62     status = DftiSetValue(handle, DFTI_FORWARD_SCALE, 2.0);
     63     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     64 
     65     // Set the backward scale factor to 1 / 2n to match Accelerate.framework's.
     66     // FIXME: if the above scaling factor is fixed then this needs to be as well.
     67     double scale = 1.0 / (2.0 * fftSize);
     68     status = DftiSetValue(handle, DFTI_BACKWARD_SCALE, scale);
     69     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     70 
     71     // Use the default DFTI_CONJUGATE_EVEN_STORAGE = DFTI_COMPLEX_REAL.
     72 
     73     // Commit DFTI descriptor.
     74     status = DftiCommitDescriptor(handle);
     75     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
     76 
     77     return handle;
     78 }
     79 
     80 } // anonymous namespace
     81 
     82 namespace WebCore {
     83 
     84 const int kMaxFFTPow2Size = 24;
     85 
     86 DFTI_DESCRIPTOR_HANDLE* FFTFrame::descriptorHandles = 0;
     87 
     88 // Normal constructor: allocates for a given fftSize.
     89 FFTFrame::FFTFrame(unsigned fftSize)
     90     : m_FFTSize(fftSize)
     91     , m_log2FFTSize(static_cast<unsigned>(log2(fftSize)))
     92     , m_handle(0)
     93     , m_complexData(fftSize)
     94     , m_realData(fftSize / 2)
     95     , m_imagData(fftSize / 2)
     96 {
     97     // We only allow power of two.
     98     ASSERT(1UL << m_log2FFTSize == m_FFTSize);
     99 
    100     m_handle = descriptorHandleForSize(fftSize);
    101 }
    102 
    103 // Creates a blank/empty frame (interpolate() must later be called).
    104 FFTFrame::FFTFrame()
    105     : m_FFTSize(0)
    106     , m_log2FFTSize(0)
    107     , m_handle(0)
    108 {
    109 }
    110 
    111 // Copy constructor.
    112 FFTFrame::FFTFrame(const FFTFrame& frame)
    113     : m_FFTSize(frame.m_FFTSize)
    114     , m_log2FFTSize(frame.m_log2FFTSize)
    115     , m_handle(0)
    116     , m_complexData(frame.m_FFTSize)
    117     , m_realData(frame.m_FFTSize / 2)
    118     , m_imagData(frame.m_FFTSize / 2)
    119 {
    120     m_handle = descriptorHandleForSize(m_FFTSize);
    121 
    122     // Copy/setup frame data.
    123     unsigned nbytes = sizeof(float) * (m_FFTSize / 2);
    124     memcpy(realData(), frame.realData(), nbytes);
    125     memcpy(imagData(), frame.imagData(), nbytes);
    126 }
    127 
    128 FFTFrame::~FFTFrame()
    129 {
    130 }
    131 
    132 void FFTFrame::multiply(const FFTFrame& frame)
    133 {
    134     FFTFrame& frame1 = *this;
    135     FFTFrame& frame2 = const_cast<FFTFrame&>(frame);
    136 
    137     float* realP1 = frame1.realData();
    138     float* imagP1 = frame1.imagData();
    139     const float* realP2 = frame2.realData();
    140     const float* imagP2 = frame2.imagData();
    141 
    142     // Scale accounts for vecLib's peculiar scaling.
    143     // This ensures the right scaling all the way back to inverse FFT.
    144     // FIXME: this scaling factor will be 1.0f if the above 2.0 -> 1.0
    145     // scaling factor is fixed.
    146     float scale = 0.5f;
    147 
    148     // Multiply packed DC/nyquist component.
    149     realP1[0] *= scale * realP2[0];
    150     imagP1[0] *= scale * imagP2[0];
    151 
    152     // Multiply the rest, skipping packed DC/Nyquist components.
    153     float* interleavedData1 = frame1.getUpToDateComplexData();
    154     float* interleavedData2 = frame2.getUpToDateComplexData();
    155 
    156     unsigned halfSize = m_FFTSize / 2;
    157 
    158     // Complex multiply.
    159     vcMul(halfSize - 1,
    160           reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1,
    161           reinterpret_cast<MKL_Complex8*>(interleavedData2) + 1,
    162           reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1);
    163 
    164     // De-interleave and scale the rest of the data.
    165     // FIXME: find an MKL routine to do at least the scaling more efficiently.
    166     for (unsigned i = 1; i < halfSize; ++i) {
    167         int baseComplexIndex = 2 * i;
    168         realP1[i] = scale * interleavedData1[baseComplexIndex];
    169         imagP1[i] = scale * interleavedData1[baseComplexIndex + 1];
    170     }
    171 }
    172 
    173 void FFTFrame::doFFT(float* data)
    174 {
    175     // Compute Forward transform.
    176     MKL_LONG status = DftiComputeForward(m_handle, data, m_complexData.data());
    177     ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
    178 
    179     // De-interleave to separate real and complex arrays. FIXME:
    180     // figure out if it's possible to get MKL to use split-complex
    181     // form for 1D real-to-complex out-of-place FFTs.
    182     int len = m_FFTSize / 2;
    183     for (int i = 0; i < len; ++i) {
    184         int baseComplexIndex = 2 * i;
    185         // m_realData[0] is the DC component and m_imagData[0] the
    186         // Nyquist component since the interleaved complex data is
    187         // packed.
    188         m_realData[i] = m_complexData[baseComplexIndex];
    189         m_imagData[i] = m_complexData[baseComplexIndex + 1];
    190     }
    191 }
    192 
    193 void FFTFrame::doInverseFFT(float* data)
    194 {
    195     // Prepare interleaved data. FIXME: figure out if it's possible to
    196     // get MKL to use split-complex form for 1D backward
    197     // (complex-to-real) out-of-place FFTs.
    198     float* interleavedData = getUpToDateComplexData();
    199 
    200     // Compute backward transform.
    201     MKL_LONG status = DftiComputeBackward(m_handle, interleavedData, data);
    202     ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
    203 }
    204 
    205 void FFTFrame::initialize()
    206 {
    207 }
    208 
    209 void FFTFrame::cleanup()
    210 {
    211     if (!descriptorHandles)
    212         return;
    213 
    214     for (int i = 0; i < kMaxFFTPow2Size; ++i) {
    215         if (descriptorHandles[i]) {
    216             MKL_LONG status = DftiFreeDescriptor(&descriptorHandles[i]);
    217             ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
    218         }
    219     }
    220 
    221     delete[] descriptorHandles;
    222     descriptorHandles = 0;
    223 }
    224 
    225 float* FFTFrame::realData() const
    226 {
    227     return const_cast<float*>(m_realData.data());
    228 }
    229 
    230 float* FFTFrame::imagData() const
    231 {
    232     return const_cast<float*>(m_imagData.data());
    233 }
    234 
    235 float* FFTFrame::getUpToDateComplexData()
    236 {
    237     // FIXME: if we can't completely get rid of this method, SSE
    238     // optimization could be considered if it shows up hot on profiles.
    239     int len = m_FFTSize / 2;
    240     for (int i = 0; i < len; ++i) {
    241         int baseComplexIndex = 2 * i;
    242         m_complexData[baseComplexIndex] = m_realData[i];
    243         m_complexData[baseComplexIndex + 1] = m_imagData[i];
    244     }
    245     return const_cast<float*>(m_complexData.data());
    246 }
    247 
    248 DFTI_DESCRIPTOR_HANDLE FFTFrame::descriptorHandleForSize(unsigned fftSize)
    249 {
    250     if (!descriptorHandles) {
    251         descriptorHandles = new DFTI_DESCRIPTOR_HANDLE[kMaxFFTPow2Size];
    252         for (int i = 0; i < kMaxFFTPow2Size; ++i)
    253             descriptorHandles[i] = 0;
    254     }
    255 
    256     ASSERT(fftSize);
    257     int pow2size = static_cast<int>(log2(fftSize));
    258     ASSERT(pow2size < kMaxFFTPow2Size);
    259     if (!descriptorHandles[pow2size])
    260         descriptorHandles[pow2size] = createDescriptorHandle(fftSize);
    261     return descriptorHandles[pow2size];
    262 }
    263 
    264 } // namespace WebCore
    265 
    266 #endif // !OS(DARWIN) && USE(WEBAUDIO_MKL)
    267 
    268 #endif // ENABLE(WEB_AUDIO)
    269