Home | History | Annotate | Download | only in mips
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  *
     10  */
     11 
     12 #include <stdint.h>
     13 
     14 #include "dl/api/omxtypes.h"
     15 #include "dl/sp/api/mipsSP.h"
     16 
     17 OMXResult mips_FFTInv_CCSToR_F32_complex(const OMX_F32* pSrc,
     18                                          OMX_F32* pDst,
     19                                          const MIPSFFTSpec_R_FC32* pFFTSpec) {
     20   OMX_U32 num_transforms, step;
     21   /* Quarter, half and three-quarters of transform size. */
     22   OMX_U32 n1_4, n1_2, n3_4;
     23   const OMX_F32* w_re_ptr;
     24   const OMX_F32* w_im_ptr;
     25   OMX_U32 fft_size = 1 << pFFTSpec->order;
     26   OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
     27   const OMX_FC32* p_src = (const OMX_FC32*)pSrc;
     28   OMX_FC32* p_dst;
     29   OMX_U16* p_bitrev = pFFTSpec->pBitRevInv;
     30   OMX_F32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, factor;
     31   OMX_F32 w_re, w_im;
     32 
     33   w_re_ptr = pFFTSpec->pTwiddle + 1;
     34   w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1;
     35 
     36   /*
     37    * Preliminary loop performing input adaptation due to computing real FFT
     38    * through complex FFT of half the size.
     39    */
     40   for (uint32_t n = 1; n < fft_size / 8; ++n) {
     41     tmp1 = p_src[n].Re;
     42     tmp2 = p_src[n].Im;
     43     tmp3 = p_src[fft_size / 2 - n].Re;
     44     tmp4 = p_src[fft_size / 2 - n].Im;
     45 
     46     tmp5 = tmp1 + tmp3;
     47     tmp6 = tmp1 - tmp3;
     48     tmp7 = tmp2 + tmp4;
     49     tmp8 = tmp2 - tmp4;
     50 
     51     w_re = *w_re_ptr;
     52     w_im = *w_im_ptr;
     53 
     54     p_buf[p_bitrev[n]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6);
     55     p_buf[p_bitrev[n]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7);
     56     p_buf[p_bitrev[fft_size / 2 - n]].Re =
     57         0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6);
     58     p_buf[p_bitrev[fft_size / 2 - n]].Im =
     59         0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7);
     60 
     61     tmp1 = p_src[n + fft_size / 4].Re;
     62     tmp2 = p_src[n + fft_size / 4].Im;
     63     tmp3 = p_src[fft_size / 4 - n].Re;
     64     tmp4 = p_src[fft_size / 4 - n].Im;
     65 
     66     tmp5 = tmp1 + tmp3;
     67     tmp6 = tmp1 - tmp3;
     68     tmp7 = tmp2 + tmp4;
     69     tmp8 = tmp2 - tmp4;
     70 
     71     p_buf[p_bitrev[n + fft_size / 4]].Re =
     72         0.5f * (tmp5 + w_im * tmp7 - w_re * tmp6);
     73     p_buf[p_bitrev[n + fft_size / 4]].Im =
     74         0.5f * (tmp8 - w_im * tmp6 - w_re * tmp7);
     75     p_buf[p_bitrev[fft_size / 4 - n]].Re =
     76         0.5f * (tmp5 - w_im * tmp7 + w_re * tmp6);
     77     p_buf[p_bitrev[fft_size / 4 - n]].Im =
     78         0.5f * (-tmp8 - w_im * tmp6 - w_re * tmp7);
     79 
     80     ++w_re_ptr;
     81     --w_im_ptr;
     82   }
     83   tmp1 = p_src[fft_size / 8].Re;
     84   tmp2 = p_src[fft_size / 8].Im;
     85   tmp3 = p_src[3 * fft_size / 8].Re;
     86   tmp4 = p_src[3 * fft_size / 8].Im;
     87 
     88   tmp5 = tmp1 + tmp3;
     89   tmp6 = tmp1 - tmp3;
     90   tmp7 = tmp2 + tmp4;
     91   tmp8 = tmp2 - tmp4;
     92 
     93   w_re = *w_re_ptr;
     94   w_im = *w_im_ptr;
     95 
     96   p_buf[p_bitrev[fft_size / 8]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6);
     97   p_buf[p_bitrev[fft_size / 8]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7);
     98   p_buf[p_bitrev[3 * fft_size / 8]].Re =
     99       0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6);
    100   p_buf[p_bitrev[3 * fft_size / 8]].Im =
    101       0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7);
    102 
    103   p_buf[p_bitrev[0]].Re = 0.5f * (p_src[0].Re + p_src[fft_size / 2].Re);
    104   p_buf[p_bitrev[0]].Im = 0.5f * (p_src[0].Re - p_src[fft_size / 2].Re);
    105   p_buf[p_bitrev[fft_size / 4]].Re = p_src[fft_size / 4].Re;
    106   p_buf[p_bitrev[fft_size / 4]].Im = -p_src[fft_size / 4].Im;
    107 
    108   /*
    109    * Loop performing sub-transforms of size 4,
    110    * which contain 2 butterfly operations.
    111    */
    112   num_transforms = (SUBTRANSFORM_CONST >> (17 - pFFTSpec->order)) | 1;
    113   for (uint32_t n = 0; n < num_transforms; ++n) {
    114     OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
    115     OMX_FC32* p_tmp = p_buf + offset;
    116 
    117     tmp1 = p_tmp[0].Re + p_tmp[1].Re;
    118     tmp2 = p_tmp[0].Im + p_tmp[1].Im;
    119     tmp3 = p_tmp[0].Re - p_tmp[1].Re;
    120     tmp4 = p_tmp[0].Im - p_tmp[1].Im;
    121     tmp5 = p_tmp[2].Re + p_tmp[3].Re;
    122     tmp6 = p_tmp[2].Im + p_tmp[3].Im;
    123     tmp8 = p_tmp[2].Im - p_tmp[3].Im;
    124     tmp7 = p_tmp[2].Re - p_tmp[3].Re;
    125 
    126     p_tmp[0].Re = tmp1 + tmp5;
    127     p_tmp[2].Re = tmp1 - tmp5;
    128     p_tmp[0].Im = tmp2 + tmp6;
    129     p_tmp[2].Im = tmp2 - tmp6;
    130     p_tmp[1].Re = tmp3 - tmp8;
    131     p_tmp[3].Re = tmp3 + tmp8;
    132     p_tmp[1].Im = tmp4 + tmp7;
    133     p_tmp[3].Im = tmp4 - tmp7;
    134   }
    135 
    136   num_transforms = (num_transforms >> 1) | 1;
    137   /*
    138    * Loop performing sub-transforms of size 8,
    139    * which contain four butterfly operations.
    140    */
    141   for (uint32_t n = 0; n < num_transforms; ++n) {
    142     OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
    143     OMX_FC32* p_tmp = p_buf + offset;
    144 
    145     tmp1 = p_tmp[4].Re + p_tmp[5].Re;
    146     tmp3 = p_tmp[6].Re + p_tmp[7].Re;
    147     tmp2 = p_tmp[4].Im + p_tmp[5].Im;
    148     tmp4 = p_tmp[6].Im + p_tmp[7].Im;
    149 
    150     tmp5 = tmp1 + tmp3;
    151     tmp7 = tmp1 - tmp3;
    152     tmp6 = tmp2 + tmp4;
    153     tmp8 = tmp2 - tmp4;
    154 
    155     tmp1 = p_tmp[4].Re - p_tmp[5].Re;
    156     tmp2 = p_tmp[4].Im - p_tmp[5].Im;
    157     tmp3 = p_tmp[6].Re - p_tmp[7].Re;
    158     tmp4 = p_tmp[6].Im - p_tmp[7].Im;
    159 
    160     p_tmp[4].Re = p_tmp[0].Re - tmp5;
    161     p_tmp[0].Re = p_tmp[0].Re + tmp5;
    162     p_tmp[4].Im = p_tmp[0].Im - tmp6;
    163     p_tmp[0].Im = p_tmp[0].Im + tmp6;
    164     p_tmp[6].Re = p_tmp[2].Re + tmp8;
    165     p_tmp[2].Re = p_tmp[2].Re - tmp8;
    166     p_tmp[6].Im = p_tmp[2].Im - tmp7;
    167     p_tmp[2].Im = p_tmp[2].Im + tmp7;
    168 
    169     tmp5 = SQRT1_2 * (tmp1 - tmp2);
    170     tmp7 = SQRT1_2 * (tmp3 + tmp4);
    171     tmp6 = SQRT1_2 * (tmp1 + tmp2);
    172     tmp8 = SQRT1_2 * (tmp4 - tmp3);
    173 
    174     tmp1 = tmp5 + tmp7;
    175     tmp3 = tmp5 - tmp7;
    176     tmp2 = tmp6 + tmp8;
    177     tmp4 = tmp6 - tmp8;
    178 
    179     p_tmp[5].Re = p_tmp[1].Re - tmp1;
    180     p_tmp[1].Re = p_tmp[1].Re + tmp1;
    181     p_tmp[5].Im = p_tmp[1].Im - tmp2;
    182     p_tmp[1].Im = p_tmp[1].Im + tmp2;
    183     p_tmp[7].Re = p_tmp[3].Re + tmp4;
    184     p_tmp[3].Re = p_tmp[3].Re - tmp4;
    185     p_tmp[7].Im = p_tmp[3].Im - tmp3;
    186     p_tmp[3].Im = p_tmp[3].Im + tmp3;
    187   }
    188 
    189   step = 1 << (pFFTSpec->order - 4);
    190   n1_4 = 4;
    191   /* Outer loop that loops over FFT stages. */
    192   for (uint32_t fft_stage = 4; fft_stage <= pFFTSpec->order - 2; ++fft_stage) {
    193     n1_2 = 2 * n1_4;
    194     n3_4 = 3 * n1_4;
    195     num_transforms = (num_transforms >> 1) | 1;
    196     for (uint32_t n = 0; n < num_transforms; ++n) {
    197       OMX_U32 offset = pFFTSpec->pOffset[n] << fft_stage;
    198       OMX_FC32* p_tmp = p_buf + offset;
    199 
    200       tmp1 = p_tmp[n1_2].Re + p_tmp[n3_4].Re;
    201       tmp2 = p_tmp[n1_2].Re - p_tmp[n3_4].Re;
    202       tmp3 = p_tmp[n1_2].Im + p_tmp[n3_4].Im;
    203       tmp4 = p_tmp[n1_2].Im - p_tmp[n3_4].Im;
    204 
    205       p_tmp[n1_2].Re = p_tmp[0].Re - tmp1;
    206       p_tmp[0].Re = p_tmp[0].Re + tmp1;
    207       p_tmp[n1_2].Im = p_tmp[0].Im - tmp3;
    208       p_tmp[0].Im = p_tmp[0].Im + tmp3;
    209       p_tmp[n3_4].Re = p_tmp[n1_4].Re + tmp4;
    210       p_tmp[n1_4].Re = p_tmp[n1_4].Re - tmp4;
    211       p_tmp[n3_4].Im = p_tmp[n1_4].Im - tmp2;
    212       p_tmp[n1_4].Im = p_tmp[n1_4].Im + tmp2;
    213 
    214       w_re_ptr = pFFTSpec->pTwiddle + step;
    215       w_im_ptr =
    216           pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step;
    217 
    218       /*
    219        * Loop performing split-radix butterfly operations for one sub-transform.
    220        */
    221       for (uint32_t i = 1; i < n1_4; ++i) {
    222         w_re = *w_re_ptr;
    223         w_im = *w_im_ptr;
    224 
    225         tmp1 = w_re * p_tmp[n1_2 + i].Re - w_im * p_tmp[n1_2 + i].Im;
    226         tmp2 = w_re * p_tmp[n1_2 + i].Im + w_im * p_tmp[n1_2 + i].Re;
    227         tmp3 = w_re * p_tmp[n3_4 + i].Re + w_im * p_tmp[n3_4 + i].Im;
    228         tmp4 = w_re * p_tmp[n3_4 + i].Im - w_im * p_tmp[n3_4 + i].Re;
    229 
    230         tmp5 = tmp1 + tmp3;
    231         tmp1 = tmp1 - tmp3;
    232         tmp6 = tmp2 + tmp4;
    233         tmp2 = tmp2 - tmp4;
    234 
    235         p_tmp[n1_2 + i].Re = p_tmp[i].Re - tmp5;
    236         p_tmp[i].Re = p_tmp[i].Re + tmp5;
    237         p_tmp[n1_2 + i].Im = p_tmp[i].Im - tmp6;
    238         p_tmp[i].Im = p_tmp[i].Im + tmp6;
    239         p_tmp[n3_4 + i].Re = p_tmp[n1_4 + i].Re + tmp2;
    240         p_tmp[n1_4 + i].Re = p_tmp[n1_4 + i].Re - tmp2;
    241         p_tmp[n3_4 + i].Im = p_tmp[n1_4 + i].Im - tmp1;
    242         p_tmp[n1_4 + i].Im = p_tmp[n1_4 + i].Im + tmp1;
    243 
    244         w_re_ptr += step;
    245         w_im_ptr -= step;
    246       }
    247     }
    248     step >>= 1;
    249     n1_4 <<= 1;
    250   }
    251 
    252   /* Last FFT stage, write data to output buffer. */
    253   n1_2 = 2 * n1_4;
    254   n3_4 = 3 * n1_4;
    255   factor = (OMX_F32)2.0f / fft_size;
    256 
    257   p_dst = (OMX_FC32*)pDst;
    258 
    259   tmp1 = p_buf[n1_2].Re + p_buf[n3_4].Re;
    260   tmp2 = p_buf[n1_2].Re - p_buf[n3_4].Re;
    261   tmp3 = p_buf[n1_2].Im + p_buf[n3_4].Im;
    262   tmp4 = p_buf[n1_2].Im - p_buf[n3_4].Im;
    263 
    264   p_dst[n1_2].Re = factor * (p_buf[0].Re - tmp1);
    265   p_dst[0].Re = factor * (p_buf[0].Re + tmp1);
    266   p_dst[n1_2].Im = factor * (p_buf[0].Im - tmp3);
    267   p_dst[0].Im = factor * (p_buf[0].Im + tmp3);
    268   p_dst[n3_4].Re = factor * (p_buf[n1_4].Re + tmp4);
    269   p_dst[n1_4].Re = factor * (p_buf[n1_4].Re - tmp4);
    270   p_dst[n3_4].Im = factor * (p_buf[n1_4].Im - tmp2);
    271   p_dst[n1_4].Im = factor * (p_buf[n1_4].Im + tmp2);
    272 
    273   w_re_ptr = pFFTSpec->pTwiddle + step;
    274   w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step;
    275 
    276   for (uint32_t i = 1; i < n1_4; ++i) {
    277     w_re = *w_re_ptr;
    278     w_im = *w_im_ptr;
    279 
    280     tmp1 = w_re * p_buf[n1_2 + i].Re - w_im * p_buf[n1_2 + i].Im;
    281     tmp2 = w_re * p_buf[n1_2 + i].Im + w_im * p_buf[n1_2 + i].Re;
    282     tmp3 = w_re * p_buf[n3_4 + i].Re + w_im * p_buf[n3_4 + i].Im;
    283     tmp4 = w_re * p_buf[n3_4 + i].Im - w_im * p_buf[n3_4 + i].Re;
    284 
    285     tmp5 = tmp1 + tmp3;
    286     tmp1 = tmp1 - tmp3;
    287     tmp6 = tmp2 + tmp4;
    288     tmp2 = tmp2 - tmp4;
    289 
    290     p_dst[n1_2 + i].Re = factor * (p_buf[i].Re - tmp5);
    291     p_dst[i].Re = factor * (p_buf[i].Re + tmp5);
    292     p_dst[n1_2 + i].Im = factor * (p_buf[i].Im - tmp6);
    293     p_dst[i].Im = factor * (p_buf[i].Im + tmp6);
    294     p_dst[n3_4 + i].Re = factor * (p_buf[n1_4 + i].Re + tmp2);
    295     p_dst[n1_4 + i].Re = factor * (p_buf[n1_4 + i].Re - tmp2);
    296     p_dst[n3_4 + i].Im = factor * (p_buf[n1_4 + i].Im - tmp1);
    297     p_dst[n1_4 + i].Im = factor * (p_buf[n1_4 + i].Im + tmp1);
    298 
    299     w_re_ptr += step;
    300     w_im_ptr -= step;
    301   }
    302 
    303   return OMX_Sts_NoErr;
    304 }
    305