Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  *
     10  */
     11 
     12 #include "dl/api/omxtypes.h"
     13 
     14 void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(
     15     const OMX_F32 *in,
     16     OMX_F32 *out,
     17     const OMX_F32 *twiddle,
     18     OMX_INT n,
     19     OMX_INT sub_size,
     20     OMX_INT sub_num) {
     21   OMX_INT set;
     22   OMX_INT grp;
     23   OMX_INT step = sub_num >> 1;
     24   OMX_INT set_count = sub_num >> 2;
     25   OMX_INT n_by_4 = n >> 2;
     26   OMX_INT n_mul_2 = n << 1;
     27   OMX_F32 *out0 = out;
     28 
     29   // grp == 0
     30   for (set = 0; set < set_count; ++set) {
     31     OMX_FC32 t0;
     32     OMX_FC32 t1;
     33     OMX_FC32 t2;
     34     OMX_FC32 t3;
     35 
     36     const OMX_F32 *in0 = in + set;
     37     const OMX_F32 *in1 = in0 + set_count;
     38     const OMX_F32 *in2 = in1 + set_count;
     39     const OMX_F32 *in3 = in2 + set_count;
     40     OMX_F32 *out1 = out0 + n_by_4;
     41     OMX_F32 *out2 = out1 + n_by_4;
     42     OMX_F32 *out3 = out2 + n_by_4;
     43 
     44     // CADD t0, in0, in2
     45     t0.Re = in0[0] + in2[0];
     46     t0.Im = in0[n] + in2[n];
     47 
     48     // CSUB t1, in0, in2
     49     t1.Re = in0[0] - in2[0];
     50     t1.Im = in0[n] - in2[n];
     51 
     52     // CADD t2, in1, in3
     53     t2.Re = in1[0] + in3[0];
     54     t2.Im = in1[n] + in3[n];
     55 
     56     // CSUB t3, in1, in3
     57     t3.Re = in1[0] - in3[0];
     58     t3.Im = in1[n] - in3[n];
     59 
     60     // CADD out0, t0, t2
     61     out0[0] = t0.Re + t2.Re;
     62     out0[n] = t0.Im + t2.Im;
     63 
     64     // CSUB out2, t0, t2
     65     out2[0] = t0.Re - t2.Re;
     66     out2[n] = t0.Im - t2.Im;
     67 
     68     // CSUB_ADD_X out3, t1, t3
     69     out3[0] = t1.Re - t3.Im;
     70     out3[n] = t1.Im + t3.Re;
     71 
     72     // CADD_SUB_X out1, t1, t3
     73     out1[0] = t1.Re + t3.Im;
     74     out1[n] = t1.Im - t3.Re;
     75 
     76     out0 += 1;
     77   }
     78 
     79   // grp > 0
     80   for (grp = 1; grp < sub_size; ++grp) {
     81     const OMX_F32 *tw1 = twiddle + grp * step;
     82     const OMX_F32 *tw2 = tw1 + grp * step;
     83     const OMX_F32 *tw3 = tw2 + grp * step;
     84 
     85     for (set = 0; set < set_count; ++set) {
     86       OMX_FC32 t0;
     87       OMX_FC32 t1;
     88       OMX_FC32 t2;
     89       OMX_FC32 t3;
     90       OMX_FC32 tt1;
     91       OMX_FC32 tt2;
     92       OMX_FC32 tt3;
     93 
     94       const OMX_F32 *in0 = in + set + grp * sub_num;
     95       const OMX_F32 *in1 = in0 + set_count;
     96       const OMX_F32 *in2 = in1 + set_count;
     97       const OMX_F32 *in3 = in2 + set_count;
     98       OMX_F32 *out1 = out0 + n_by_4;
     99       OMX_F32 *out2 = out1 + n_by_4;
    100       OMX_F32 *out3 = out2 + n_by_4;
    101 
    102       // CMUL tt1, Tw1, in1
    103       tt1.Re = tw1[0] * in1[0] - tw1[n_mul_2] * in1[n];
    104       tt1.Im = tw1[0] * in1[n] + tw1[n_mul_2] * in1[0];
    105 
    106       // CMUL tt2, Tw2, in2
    107       tt2.Re = tw2[0] * in2[0] - tw2[n_mul_2] * in2[n];
    108       tt2.Im = tw2[0] * in2[n] + tw2[n_mul_2] * in2[0];
    109 
    110       // CMUL tt3, Tw3, in3
    111       tt3.Re = tw3[0] * in3[0] - tw3[n_mul_2] * in3[n];
    112       tt3.Im = tw3[0] * in3[n] + tw3[n_mul_2] * in3[0];
    113 
    114       // CADD t0, in0, tt2
    115       t0.Re = in0[0] + tt2.Re;
    116       t0.Im = in0[n] + tt2.Im;
    117 
    118       // CSUB t1, in0, tt2
    119       t1.Re = in0[0] - tt2.Re;
    120       t1.Im = in0[n] - tt2.Im;
    121 
    122       // CADD t2, tt1, tt3
    123       t2.Re = tt1.Re + tt3.Re;
    124       t2.Im = tt1.Im + tt3.Im;
    125 
    126       // CSUB t3, tt1, tt3
    127       t3.Re = tt1.Re - tt3.Re;
    128       t3.Im = tt1.Im - tt3.Im;
    129 
    130       // CADD out0, t0, t2
    131       out0[0] = t0.Re + t2.Re;
    132       out0[n] = t0.Im + t2.Im;
    133 
    134       // CSUB out2, t0, t2
    135       out2[0] = t0.Re - t2.Re;
    136       out2[n] = t0.Im - t2.Im;
    137 
    138       // CADD_SUB_X out1, t1, t3
    139       out1[0] = t1.Re + t3.Im;
    140       out1[n] = t1.Im - t3.Re;
    141 
    142       // CSUB_ADD_X out3, t1, t3
    143       out3[0] = t1.Re - t3.Im;
    144       out3[n] = t1.Im + t3.Re;
    145 
    146       out0 += 1;
    147     }
    148   }
    149 }
    150