Home | History | Annotate | Download | only in armv7
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
     11 @//  to support float instead of SC32.
     12 @//
     13 
     14 @//
     15 @// Description:
     16 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
     17 @//
     18 @//
     19 
     20 
     21 @// Include standard headers
     22 
     23 #include "dl/api/arm/armCOMM_s.h"
     24 #include "dl/api/arm/omxtypes_s.h"
     25 
     26 @//        M_VARIANTS ARM1136JS
     27 
     28 @// Import symbols required from other files
     29 @// (For example tables)
     30 
     31 
     32 
     33 
     34 @// Set debugging level
     35 @//DEBUG_ON    SETL {TRUE}
     36 
     37 
     38 
     39 @// Guarding implementation by the processor name
     40 
     41 @//    IF  ARM1136JS
     42 
     43 @//Input Registers
     44 
     45 #define pSrc            r0
     46 #define pDst            r2
     47 #define pTwiddle        r1
     48 #define pPingPongBuf    r5
     49 #define subFFTNum       r6
     50 #define subFFTSize      r7
     51 
     52 
     53 @//Output Registers
     54 
     55 
     56 @//Local Scratch Registers
     57 
     58 #define grpSize         r14
     59 #define outPointStep    r12
     60 #define setStep         r3
     61 #define setCount        r14                  /*@// Reuse grpSize as setCount*/
     62 #define pointStep       r12
     63 
     64 @// Real and Imaginary parts
     65 #define x0r s0
     66 #define x0i s1
     67 #define x1r s2
     68 #define x1i s3
     69 #define x2r s4
     70 #define x2i s5
     71 #define x3r s6
     72 #define x3i s7
     73 #define t3r s0                 /*@// Temporarily hold x3r and x3i*/
     74 #define t3i s1
     75 #define sr  s8
     76 #define si  s9
     77 
     78 
     79 
     80         .macro FFTSTAGE scaled, inverse, name
     81 
     82         @// Define stack arguments
     83 
     84 
     85         @// Update grpCount and grpSize rightaway inorder to reuse
     86         @// pSubFFTSize and pSubFFTNum regs
     87         mov     subFFTSize, #4
     88         lsr     grpSize, subFFTNum, #2
     89         mov     subFFTNum, grpSize
     90 
     91 
     92         @// pT0+1 increments pT0 by 8 bytes
     93         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
     94         @// Note: outPointStep = pointStep for firststage
     95         @// Note: setCount = grpSize/4 (reuse the updated grpSize for setCount)
     96         MOV     pointStep,grpSize,LSL #3
     97 
     98 
     99         @// Calculate the step of input data for the next set
    100         @//MOV     setStep,pointStep,LSL #1
    101         MOV     setStep,grpSize,LSL #4
    102         @// setStep = 3*pointStep
    103         ADD     setStep,setStep,pointStep
    104         @// setStep = - 3*pointStep+8
    105         RSB     setStep,setStep,#8
    106 
    107         @// grp = 0 a special case since all the twiddle factors are 1
    108         @// Loop on the sets
    109 
    110 grpZeroSetLoop\name:
    111 
    112         vldm.f32 pSrc, {x0r, x0i}
    113         add     pSrc, pSrc, pointStep
    114         vldm.f32 pSrc, {x1r, x1i}
    115         add     pSrc, pSrc, pointStep
    116         vldm.f32 pSrc, {x2r, x2i}
    117         add     pSrc, pSrc, pointStep
    118         vldm.f32 pSrc, {x3r, x3i}
    119         add     pSrc, pSrc, setStep
    120 
    121 
    122         @// Decrement setcount
    123         SUBS    setCount,setCount,#1
    124 
    125 
    126 
    127         @// finish first stage of 4 point FFT
    128 
    129         vadd.f32     x0r,x0r,x2r                @// x0 = x0 + x2
    130         vadd.f32     x0i,x0i,x2i
    131 
    132         vadd.f32     sr, x2r, x2r
    133         vadd.f32     si, x2i, x2i
    134         vsub.f32     x2r,x0r,sr                 @// x2 = x0 - x2
    135         vsub.f32     x2i,x0i,si
    136 
    137         vadd.f32     x1r,x1r,x3r                @// x1 = x1 + x3
    138         vadd.f32     x1i,x1i,x3i
    139 
    140         vadd.f32     sr, x3r, x3r
    141         vadd.f32     si, x3i, x3i
    142         vsub.f32     x3r,x1r,sr                 @// x3 = x1 - x3
    143         vsub.f32     x3i,x1i,si
    144 
    145 
    146         @// finish second stage of 4 point FFT
    147 
    148 
    149         vadd.f32     x0r,x0r,x1r                @// x0 = x0 + x1
    150         vadd.f32     x0i,x0i,x1i
    151 
    152         vadd.f32     sr, x1r, x1r
    153         vadd.f32     si, x1i, x1i
    154         vsub.f32     x1r,x0r,sr                 @// x1 = x0 - x1
    155         vsub.f32     x1i,x0i,si
    156 
    157         vstm.f32 pDst, {x0r, x0i}
    158         add      pDst, pDst, outPointStep
    159 
    160         vadd.f32     x2r,x2r,x3i
    161         vsub.f32     x2i,x2i,x3r
    162 
    163         vadd.f32     sr, x3r, x3r
    164         vadd.f32     si, x3i, x3i
    165         vsub.f32     t3r, x2r, si
    166         vadd.f32     t3i, x2i, sr
    167 
    168         .ifeqs  "\inverse", "TRUE"
    169             vstm.f32 pDst, {t3r, t3i}
    170             add      pDst, pDst, outPointStep
    171             vstm.f32 pDst, {x1r, x1i}
    172             add      pDst, pDst, outPointStep
    173             vstm.f32 pDst, {x2r, x2i}
    174             add      pDst, pDst, setStep
    175         .else
    176             vstm.f32 pDst, {x2r, x2i}
    177             add      pDst, pDst, outPointStep
    178             vstm.f32 pDst, {x1r, x1i}
    179             add      pDst, pDst, outPointStep
    180             vstm.f32 pDst, {t3r, t3i}
    181             add      pDst, pDst, setStep
    182         .endif
    183 
    184 
    185         BGT     grpZeroSetLoop\name
    186 
    187 
    188         @// reset pSrc to pDst for the next stage
    189         SUB     pSrc,pDst,pointStep             @// pDst -= 2*grpSize
    190         mov     pDst, pPingPongBuf
    191 
    192         .endm
    193 
    194 
    195         M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4
    196         FFTSTAGE "FALSE","FALSE",FWD
    197         M_END
    198 
    199 
    200         M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp,r4
    201         FFTSTAGE "FALSE","TRUE",INV
    202         M_END
    203 
    204 
    205 @//    ENDIF                                                           @//ARM1136JS
    206 
    207 
    208 @// Guarding implementation by the processor name
    209 
    210 
    211 
    212 
    213     .end
    214