1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12 #include "dl/api/omxtypes.h" 13 #include "dl/sp/src/x86/x86SP_SSE_Math.h" 14 15 void x86SP_FFT_CToC_FC32_Fwd_Radix2_ls_sse( 16 const OMX_F32 *in, 17 OMX_F32 *out, 18 const OMX_F32 *twiddle, 19 OMX_INT n) { 20 OMX_F32 *out0 = out; 21 OMX_INT i; 22 23 // This function is used when n >= 8 24 assert(n >= 8); 25 if (n < 8) return; 26 27 for (i = 0; i < n; i += 8) { 28 VC v_tw; 29 VC v_t0; 30 VC v_t1; 31 VC v_temp; 32 33 // Load twiddle 34 const OMX_F32 *tw = twiddle + i; 35 v_tw.real = _mm_set_ps(tw[6], tw[4], tw[2], tw[0]); 36 const OMX_F32 * twi = tw + (n << 1); 37 v_tw.imag = _mm_set_ps(twi[6], twi[4], twi[2], twi[0]); 38 39 // Load real part 40 const OMX_F32 *t = in + i; 41 VC_LOAD_SHUFFLE(&(v_t0.real), &(v_t1.real), t); 42 43 // Load imag part 44 t = t + n; 45 VC_LOAD_SHUFFLE(&(v_t0.imag), &(v_t1.imag), t); 46 47 OMX_F32 *out1 = out0 + (n >> 1); 48 VC_MUL(&v_temp, &v_tw, &v_t1); 49 50 VC_SUB_STORE_SPLIT(out1, &v_t0, &v_temp, n); 51 52 VC_ADD_STORE_SPLIT(out0, &v_t0, &v_temp, n); 53 54 out0 += 4; 55 } 56 } 57