Home | History | Annotate | Download | only in arm64
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 
     13 #include "dl/api/omxtypes.h"
     14 #include "dl/sp/api/armSP.h"
     15 #include "dl/sp/api/omxSP.h"
     16 
     17 extern void armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
     18     const OMX_FC32* pSrc,
     19     OMX_FC32* pDst,
     20     OMX_FC32* pTwiddle,
     21     long* subFFTNum,
     22     long* subFFTSize);
     23 
     24 extern void armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
     25     const OMX_FC32* pSrc,
     26     OMX_FC32* pDst,
     27     OMX_FC32* pTwiddle,
     28     long* subFFTNum,
     29     long* subFFTSize);
     30 
     31 extern void armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
     32     const OMX_FC32* pSrc,
     33     OMX_FC32* pDst,
     34     OMX_FC32* pTwiddle,
     35     long* subFFTNum,
     36     long* subFFTSize);
     37 
     38 extern void armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
     39     const OMX_FC32* pSrc,
     40     OMX_FC32* pDst,
     41     OMX_FC32* pTwiddle,
     42     long* subFFTNum,
     43     long* subFFTSize);
     44 
     45 extern void armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
     46     const OMX_FC32* pSrc,
     47     OMX_FC32* pDst,
     48     OMX_FC32* pTwiddle,
     49     long* subFFTNum,
     50     long* subFFTSize);
     51 
     52 extern void armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
     53     const OMX_FC32* pSrc,
     54     OMX_FC32* pDst,
     55     OMX_FC32* pTwiddle,
     56     long* subFFTNum,
     57     long* subFFTSize);
     58 
     59 extern void armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
     60     const OMX_FC32* pSrc,
     61     OMX_FC32* pDst,
     62     OMX_FC32* pTwiddle,
     63     long* subFFTNum,
     64     long* subFFTSize);
     65 
     66 /*
     67  * Scale FFT data by 1/|length|. |length| must be a power of two
     68  */
     69 static inline ScaleFFTData(OMX_FC32* fftData, unsigned length) {
     70   float32_t* data = (float32_t*)fftData;
     71   float32_t scale = 1.0f / length;
     72 
     73   /*
     74    * Do two complex elements at a time because |length| is always
     75    * greater than or equal to 2 (order >= 1)
     76    */
     77   do {
     78     float32x4_t x = vld1q_f32(data);
     79 
     80     length -= 2;
     81     x = vmulq_n_f32(x, scale);
     82     vst1q_f32(data, x);
     83     data += 4;
     84   } while (length > 0);
     85 }
     86 
     87 /**
     88  * Function:  omxSP_FFTInv_CToC_FC32
     89  *
     90  * Description:
     91  * These functions compute an inverse FFT for a complex signal of
     92  * length of 2^order, where 0 <= order <= 15. Transform length is
     93  * determined by the specification structure, which must be
     94  * initialized prior to calling the FFT function using the appropriate
     95  * helper, i.e., <FFTInit_C_FC32>. The relationship between the input
     96  * and output sequences can be expressed in terms of the IDFT, i.e.:
     97  *
     98  *     x[n] = SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
     99  *     n=0,1,2,...N-1
    100  *     N=2^order.
    101  *
    102  * Input Arguments:
    103  *   pSrc - pointer to the complex-valued input signal, of length 2^order ;
    104  *          must be aligned on a 32-byte boundary.
    105  *   pFFTSpec - pointer to the preallocated and initialized specification
    106  *            structure
    107  *
    108  * Output Arguments:
    109  *   order
    110  *   pDst - pointer to the complex-valued output signal, of length 2^order;
    111  *          must be aligned on a 32-byte boundary.
    112  *
    113  * Return Value:
    114  *
    115  *    OMX_Sts_NoErr - no error
    116  *    OMX_Sts_BadArgErr - returned if one or more of the following conditions
    117  *              is true:
    118  *    -   one or more of the following pointers is NULL: pSrc, pDst, or
    119  *              pFFTSpec.
    120  *    -   pSrc or pDst is not 32-byte aligned
    121  *
    122  */
    123 
    124 OMXResult omxSP_FFTInv_CToC_FC32_Sfs(const OMX_FC32* pSrc,
    125                                      OMX_FC32* pDst,
    126                                      const OMXFFTSpec_C_FC32* pFFTSpec) {
    127   ARMsFFTSpec_FC32* spec = (ARMsFFTSpec_FC32*)pFFTSpec;
    128   int order;
    129   long subFFTSize;
    130   long subFFTNum;
    131   OMX_FC32* pTwiddle;
    132   OMX_FC32* pOut;
    133 
    134   /*
    135    * Check args are not NULL and the source and destination pointers
    136    * are properly aligned.
    137    */
    138   if (!validateParametersFC32(pSrc, pDst, spec))
    139     return OMX_Sts_BadArgErr;
    140 
    141   order = fastlog2(spec->N);
    142 
    143   subFFTSize = 1;
    144   subFFTNum = spec->N;
    145   pTwiddle = spec->pTwiddle;
    146   pOut = spec->pBuf;
    147 
    148   if (order > 3) {
    149     OMX_FC32* argDst;
    150 
    151     /*
    152      * Set up argDst and pOut appropriately so that pOut = pDst for
    153      * the very last FFT stage.
    154      */
    155     if ((order & 2) == 0) {
    156       argDst = pOut;
    157       pOut = pDst;
    158     } else {
    159       argDst = pDst;
    160     }
    161 
    162     /*
    163      * Odd order uses a radix 8 first stage; even order, a radix 4
    164      * first stage.
    165      */
    166     if (order & 1) {
    167       armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
    168           pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
    169     } else {
    170       armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
    171           pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
    172     }
    173 
    174     /*
    175      * Now use radix 4 stages to finish rest of the FFT
    176      */
    177     if (subFFTNum >= 4) {
    178       while (subFFTNum > 4) {
    179         OMX_FC32* tmp;
    180 
    181         armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
    182             argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
    183         /*
    184          * Swap argDst and pOut
    185          */
    186         tmp = pOut;
    187         pOut = argDst;
    188         argDst = tmp;
    189       }
    190 
    191       armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
    192           argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
    193     }
    194   } else if (order == 3) {
    195     armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
    196         pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
    197     armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
    198         pDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
    199     armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
    200         pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
    201   } else if (order == 2) {
    202     armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
    203         pSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize);
    204     armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
    205         pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
    206   } else {
    207     /* Order = 1 */
    208     armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
    209         pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
    210   }
    211 
    212   ScaleFFTData(pDst, spec->N);
    213   return OMX_Sts_NoErr;
    214 }
    215