Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This is a modification of armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S
     11 @//  to support float instead of SC32.
     12 @//
     13 
     14 @//
     15 @// Description:
     16 @// Compute the first stage of a Radix 2 DIT in-order out-of-place FFT
     17 @// stage for a N point complex signal.
     18 @//
     19 @//
     20 
     21 
     22 @// Include standard headers
     23 
     24 #include "dl/api/arm/armCOMM_s.h"
     25 #include "dl/api/arm/omxtypes_s.h"
     26 
     27 
     28 @// Import symbols required from other files
     29 @// (For example tables)
     30 
     31 
     32 
     33 
     34 @// Set debugging level
     35 @//DEBUG_ON    SETL {TRUE}
     36 
     37 
     38 
     39 @// Guarding implementation by the processor name
     40 
     41 
     42 
     43 @// Guarding implementation by the processor name
     44 
     45 
     46 @//Input Registers
     47 
     48 #define pSrc            r0
     49 #define pDst            r2
     50 #define pTwiddle        r1
     51 #define pPingPongBuf    r5
     52 #define subFFTNum       r6
     53 #define subFFTSize      r7
     54 
     55 
     56 @//Output Registers
     57 
     58 
     59 @//Local Scratch Registers
     60 
     61 #define pointStep       r3
     62 #define outPointStep    r3
     63 #define grpSize         r4
     64 #define setCount        r4
     65 #define step            r8
     66 #define dstStep         r8
     67 
     68 @// Neon Registers
     69 
     70 #define dX0     D0.F32
     71 #define dX1     D1.F32
     72 #define dY0     D2.F32
     73 #define dY1     D3.F32
     74 
     75 
     76         .MACRO FFTSTAGE scaled, inverse, name
     77 
     78         @// Define stack arguments
     79 
     80 
     81         @// update subFFTSize and subFFTNum into RN6 and RN7 for the next stage
     82 
     83 
     84         MOV        subFFTSize,#2
     85         LSR        grpSize,subFFTNum,#1
     86         MOV        subFFTNum,grpSize
     87 
     88 
     89         @// pT0+1 increments pT0 by 8 bytes
     90         @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes
     91         @// Note: outPointStep = pointStep for firststage
     92         @// Note: setCount = grpSize/2 (reuse the updated grpSize for setCount)
     93 
     94         MOV        pointStep,grpSize,LSL #3
     95         RSB        step,pointStep,#8
     96 
     97 
     98         @// Loop on the sets for grp zero
     99 
    100 grpZeroSetLoop\name :
    101 
    102         VLD1    dX0,[pSrc],pointStep
    103         VLD1    dX1,[pSrc],step                   @// step = -pointStep + 8
    104         SUBS    setCount,setCount,#1
    105 
    106         VADD    dY0,dX0,dX1
    107         VSUB    dY1,dX0,dX1
    108 
    109         VST1    dY0,[pDst],outPointStep
    110         @// dstStep =  step = -pointStep + 8
    111         VST1    dY1,[pDst],dstStep
    112 
    113         BGT     grpZeroSetLoop\name
    114 
    115 
    116         @// reset pSrc to pDst for the next stage
    117         SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize
    118         MOV     pDst,pPingPongBuf
    119 
    120         .endm
    121 
    122 
    123 
    124         M_START armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
    125         FFTSTAGE "FALSE","FALSE",fwd
    126         M_END
    127 
    128 
    129 
    130         M_START armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe,r4
    131         FFTSTAGE "FALSE","TRUE",inv
    132         M_END
    133 
    134 	.end
    135