1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12 #include "dl/api/omxtypes.h" 13 14 void x86SP_FFT_CToC_FC32_Inv_Radix4_ms( 15 const OMX_F32 *in, 16 OMX_F32 *out, 17 const OMX_F32 *twiddle, 18 OMX_INT n, 19 OMX_INT sub_size, 20 OMX_INT sub_num) { 21 OMX_INT set; 22 OMX_INT grp; 23 OMX_INT step = sub_num >> 1; 24 OMX_INT set_count = sub_num >> 2; 25 OMX_INT n_by_4 = n >> 2; 26 OMX_INT n_mul_2 = n << 1; 27 OMX_F32 *out0 = out; 28 29 // grp == 0 30 for (set = 0; set < set_count; ++set) { 31 OMX_FC32 t0; 32 OMX_FC32 t1; 33 OMX_FC32 t2; 34 OMX_FC32 t3; 35 36 const OMX_F32 *in0 = in + set; 37 const OMX_F32 *in1 = in0 + set_count; 38 const OMX_F32 *in2 = in1 + set_count; 39 const OMX_F32 *in3 = in2 + set_count; 40 OMX_F32 *out1 = out0 + n_by_4; 41 OMX_F32 *out2 = out1 + n_by_4; 42 OMX_F32 *out3 = out2 + n_by_4; 43 44 // CADD t0, in0, in2 45 t0.Re = in0[0] + in2[0]; 46 t0.Im = in0[n] + in2[n]; 47 48 // CSUB t1, in0, in2 49 t1.Re = in0[0] - in2[0]; 50 t1.Im = in0[n] - in2[n]; 51 52 // CADD t2, in1, in3 53 t2.Re = in1[0] + in3[0]; 54 t2.Im = in1[n] + in3[n]; 55 56 // CSUB t3, in1, in3 57 t3.Re = in1[0] - in3[0]; 58 t3.Im = in1[n] - in3[n]; 59 60 // CADD out0, t0, t2 61 out0[0] = t0.Re + t2.Re; 62 out0[n] = t0.Im + t2.Im; 63 64 // CSUB out2, t0, t2 65 out2[0] = t0.Re - t2.Re; 66 out2[n] = t0.Im - t2.Im; 67 68 // CSUB_ADD_X out1, t1, t3 69 out1[0] = t1.Re - t3.Im; 70 out1[n] = t1.Im + t3.Re; 71 72 // CADD_SUB_X out3, t1, t3 73 out3[0] = t1.Re + t3.Im; 74 out3[n] = t1.Im - t3.Re; 75 76 out0 += 1; 77 } 78 79 // grp > 0 80 for (grp = 1; grp < sub_size; ++grp) { 81 const OMX_F32 *tw1 = twiddle + grp * step; 82 const OMX_F32 *tw2 = tw1 + grp * step; 83 const OMX_F32 *tw3 = tw2 + grp * step; 84 85 for (set = 0; set < set_count; ++set) { 86 OMX_FC32 t0; 87 OMX_FC32 t1; 88 OMX_FC32 t2; 89 OMX_FC32 t3; 90 OMX_FC32 tt1; 91 OMX_FC32 tt2; 92 OMX_FC32 tt3; 93 94 const OMX_F32 *in0 = in + set + grp * sub_num; 95 const OMX_F32 *in1 = in0 + set_count; 96 const OMX_F32 *in2 = in1 + set_count; 97 const OMX_F32 *in3 = in2 + set_count; 98 OMX_F32 *out1 = out0 + n_by_4; 99 OMX_F32 *out2 = out1 + n_by_4; 100 OMX_F32 *out3 = out2 + n_by_4; 101 102 // CMUL tt1, Tw1, in1 103 tt1.Re = tw1[0] * in1[0] + tw1[n_mul_2] * in1[n]; 104 tt1.Im = tw1[0] * in1[n] - tw1[n_mul_2] * in1[0]; 105 106 // CMUL tt2, Tw2, in2 107 tt2.Re = tw2[0] * in2[0] + tw2[n_mul_2] * in2[n]; 108 tt2.Im = tw2[0] * in2[n] - tw2[n_mul_2] * in2[0]; 109 110 // CMUL tt3, Tw3, in3 111 tt3.Re = tw3[0] * in3[0] + tw3[n_mul_2] * in3[n]; 112 tt3.Im = tw3[0] * in3[n] - tw3[n_mul_2] * in3[0]; 113 114 // CADD t0, in0, tt2 115 t0.Re = in0[0] + tt2.Re; 116 t0.Im = in0[n] + tt2.Im; 117 118 // CSUB t1, in0, tt2 119 t1.Re = in0[0] - tt2.Re; 120 t1.Im = in0[n] - tt2.Im; 121 122 // CADD t2, tt1, tt3 123 t2.Re = tt1.Re + tt3.Re; 124 t2.Im = tt1.Im + tt3.Im; 125 126 // CSUB t3, tt1, tt3 127 t3.Re = tt1.Re - tt3.Re; 128 t3.Im = tt1.Im - tt3.Im; 129 130 // CADD out0, t0, t2 131 out0[0] = t0.Re + t2.Re; 132 out0[n] = t0.Im + t2.Im; 133 134 // CSUB out2, t0, t2 135 out2[0] = t0.Re - t2.Re; 136 out2[n] = t0.Im - t2.Im; 137 138 // CSUB_ADD_X out1, t1, t3 139 out1[0] = t1.Re - t3.Im; 140 out1[n] = t1.Im + t3.Re; 141 142 // CADD_SUB_X out3, t1, t3 143 out3[0] = t1.Re + t3.Im; 144 out3[n] = t1.Im - t3.Re; 145 146 out0 += 1; 147 } 148 } 149 } 150