Home | History | Annotate | Download | only in mips
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  *
     10  */
     11 
     12 #include <stdint.h>
     13 
     14 #include "dl/api/omxtypes.h"
     15 #include "dl/sp/api/mipsSP.h"
     16 
     17 OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc,
     18                                       OMX_F32* pDst,
     19                                       const MIPSFFTSpec_R_FC32* pFFTSpec) {
     20   OMX_U32 num_transforms, step;
     21   OMX_FC32* p_dst = (OMX_FC32*)pDst;
     22   OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
     23   OMX_F32 tmp1, tmp2, tmp3, tmp4;
     24   OMX_F32* w_re_ptr;
     25   OMX_F32* w_im_ptr;
     26 
     27   /* Transform for order = 2. */
     28   /* TODO: hard-code the offsets for p_src. */
     29   if (pFFTSpec->order == 2) {
     30     OMX_U16* p_bitrev = pFFTSpec->pBitRev;
     31 
     32     tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
     33     tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
     34     tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
     35     tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
     36 
     37     p_dst[0].Re = tmp1 + tmp2;
     38     p_dst[2].Re = tmp1 - tmp2;
     39     p_dst[0].Im = 0.0f;
     40     p_dst[2].Im = 0.0f;
     41     p_dst[1].Re = tmp3;
     42     p_dst[1].Im = -tmp4;
     43 
     44     return OMX_Sts_NoErr;
     45   }
     46 
     47   /*
     48    * Loop performing sub-transforms of size 4, which contain two butterfly
     49    * operations. Reading the input signal from split-radix bitreverse offsets.
     50    */
     51   num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1;
     52   for (uint32_t n = 0; n < num_transforms; ++n) {
     53     OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
     54     OMX_FC32* p_tmp = p_buf + offset;
     55     OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
     56 
     57     tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
     58     tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
     59     tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
     60     tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
     61 
     62     p_tmp[0].Re = tmp1 + tmp2;
     63     p_tmp[2].Re = tmp1 - tmp2;
     64     p_tmp[0].Im = 0.0f;
     65     p_tmp[2].Im = 0.0f;
     66     p_tmp[1].Re = tmp3;
     67     p_tmp[3].Re = tmp3;
     68     p_tmp[1].Im = -tmp4;
     69     p_tmp[3].Im = tmp4;
     70   }
     71 
     72   /*
     73    * Loop performing sub-transforms of size 8,
     74    * which contain four butterfly operations.
     75    */
     76   num_transforms >>= 1;
     77   if (!num_transforms) {
     78     /*
     79      * Means the FFT size is equal to 8, so this is the last stage. Place the
     80      * output to the destination buffer and avoid unnecessary computations.
     81      */
     82     OMX_FC32* p_tmp = p_buf;
     83     OMX_U16* p_bitrev = pFFTSpec->pBitRev;
     84     OMX_F32 tmp5;
     85 
     86     tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
     87     tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
     88     tmp3 = tmp1 + tmp2;
     89     tmp4 = tmp1 - tmp2;
     90 
     91     tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
     92     tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
     93     tmp5 = SQRT1_2 * (tmp1 + tmp2);
     94     tmp1 = SQRT1_2 * (tmp1 - tmp2);
     95 
     96     p_dst[4].Re = p_tmp[0].Re - tmp3;
     97     p_dst[0].Re = p_tmp[0].Re + tmp3;
     98     p_dst[0].Im = p_tmp[0].Im;
     99     p_dst[4].Im = p_tmp[0].Im;
    100     p_dst[2].Re = p_tmp[2].Re;
    101     p_dst[2].Im = p_tmp[2].Im - tmp4;
    102     p_dst[1].Re = p_tmp[1].Re + tmp5;
    103     p_dst[1].Im = p_tmp[1].Im - tmp1;
    104     p_dst[3].Re = p_tmp[3].Re - tmp5;
    105     p_dst[3].Im = p_tmp[3].Im - tmp1;
    106 
    107     return OMX_Sts_NoErr;
    108   }
    109 
    110   num_transforms |= 1;
    111 
    112   for (uint32_t n = 0; n < num_transforms; ++n) {
    113     OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
    114     OMX_FC32* p_tmp = p_buf + offset;
    115     OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
    116     OMX_F32 tmp5;
    117 
    118     tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
    119     tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
    120     tmp3 = tmp1 + tmp2;
    121     tmp4 = tmp1 - tmp2;
    122 
    123     tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
    124     tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
    125     tmp5 = SQRT1_2 * (tmp1 + tmp2);
    126     tmp1 = SQRT1_2 * (tmp1 - tmp2);
    127 
    128     p_tmp[4].Re = p_tmp[0].Re - tmp3;
    129     p_tmp[0].Re = p_tmp[0].Re + tmp3;
    130     p_tmp[4].Im = p_tmp[0].Im;
    131     p_tmp[6].Re = p_tmp[2].Re;
    132     p_tmp[6].Im = p_tmp[2].Im + tmp4;
    133     p_tmp[2].Im = p_tmp[2].Im - tmp4;
    134 
    135     p_tmp[5].Re = p_tmp[1].Re - tmp5;
    136     p_tmp[1].Re = p_tmp[1].Re + tmp5;
    137     p_tmp[5].Im = p_tmp[1].Im + tmp1;
    138     p_tmp[1].Im = p_tmp[1].Im - tmp1;
    139     p_tmp[7].Re = p_tmp[3].Re + tmp5;
    140     p_tmp[3].Re = p_tmp[3].Re - tmp5;
    141     p_tmp[7].Im = p_tmp[3].Im + tmp1;
    142     p_tmp[3].Im = p_tmp[3].Im - tmp1;
    143   }
    144 
    145   step = 1 << (TWIDDLE_TABLE_ORDER - 4);
    146   /*
    147    * Last FFT stage,  performing sub-transforms of size 16. Place the output
    148    * into the destination buffer and avoid unnecessary computations.
    149    */
    150   tmp1 = p_buf[8].Re + p_buf[12].Re;
    151   tmp2 = p_buf[8].Re - p_buf[12].Re;
    152   tmp3 = p_buf[8].Im + p_buf[12].Im;
    153   tmp4 = p_buf[8].Im - p_buf[12].Im;
    154 
    155   p_dst[8].Re = p_buf[0].Re - tmp1;
    156   p_dst[0].Re = p_buf[0].Re + tmp1;
    157   p_dst[8].Im = p_buf[0].Im - tmp3;
    158   p_dst[0].Im = p_buf[0].Im + tmp3;
    159   p_dst[4].Re = p_buf[4].Re + tmp4;
    160   p_dst[4].Im = p_buf[4].Im - tmp2;
    161 
    162   w_re_ptr = pFFTSpec->pTwiddle + step;
    163   w_im_ptr =
    164       pFFTSpec->pTwiddle + (OMX_U32)(1 << TWIDDLE_TABLE_ORDER - 2) - step;
    165 
    166   /* Loop performing split-radix butterfly operations. */
    167   for (uint32_t n = 1; n < 4; ++n) {
    168     OMX_F32 tmp5, tmp6;
    169     OMX_F32 w_re = *w_re_ptr;
    170     OMX_F32 w_im = *w_im_ptr;
    171 
    172     tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im;
    173     tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re;
    174     tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im;
    175     tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re;
    176 
    177     tmp5 = tmp1 + tmp3;
    178     tmp1 = tmp1 - tmp3;
    179     tmp6 = tmp2 + tmp4;
    180     tmp2 = tmp2 - tmp4;
    181 
    182     p_dst[n].Re = p_buf[n].Re + tmp5;
    183     p_dst[n].Im = p_buf[n].Im + tmp6;
    184     p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2;
    185     p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1;
    186 
    187     w_re_ptr += step;
    188     w_im_ptr -= step;
    189   }
    190   return OMX_Sts_NoErr;
    191 }
    192