Home | History | Annotate | Download | only in armv7
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s
     11 @//  to support float instead of SC32.
     12 @//
     13 
     14 @//
     15 @// Description:
     16 @// Compute an inverse FFT for a complex signal
     17 @//
     18 @//
     19 
     20 
     21 @// Include standard headers
     22 
     23 #include "dl/api/arm/armCOMM_s.h"
     24 #include "dl/api/arm/omxtypes_s.h"
     25 
     26 @//        M_VARIANTS ARM1136JS
     27 
     28 @// Import symbols required from other files
     29 @// (For example tables)
     30 
     31         .extern  armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
     32         .extern  armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
     33         .extern  armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
     34         .extern  armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
     35         .extern  armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp
     36 
     37 @// Set debugging level
     38 @//DEBUG_ON    SETL {TRUE}
     39 
     40 
     41 
     42 @// Guarding implementation by the processor name
     43 
     44 @//    IF  ARM1136JS
     45 
     46 @//Input Registers
     47 
     48 #define pSrc            r0
     49 #define pDst            r1
     50 #define pFFTSpec        r2
     51 
     52 
     53 @// Output registers
     54 #define result          r0
     55 
     56 @//Local Scratch Registers
     57 
     58 
     59 #define argTwiddle      r1
     60 #define argDst          r2
     61 #define argScale        r4
     62 #define pTwiddle        r4
     63 #define pOut            r5
     64 #define subFFTSize      r7
     65 #define subFFTNum       r6
     66 #define N               r6
     67 #define order           r14
     68 #define diff            r9
     69 @// Total num of radix stages required to comple the FFT*/
     70 #define count           r8
     71 
     72 #define round           r3
     73 
     74 #define x0r     s0
     75 #define x0i     s1
     76 #define y0r     s2
     77 #define y0i     s3
     78 #define x1r     s4
     79 #define x1i     s5
     80 #define w1r     s2
     81 #define w1i     s3
     82 #define w0r     s6
     83 #define w0i     s7
     84 #define y1r     s2              /*@// w1r,w1i*/
     85 #define y1i     s3
     86 #define st0     s8
     87 #define st1     s9
     88 #define st2     s10
     89 #define st3     s11
     90 #define st4     s12
     91 #define st5     s13
     92 #define fscale  s2
     93 #define fone    s3
     94 
     95 
     96 
     97     @// Allocate stack memory required by the function
     98         M_ALLOC4        pDstOnStack, 4
     99         M_ALLOC4        pFFTSpecOnStack, 4
    100 
    101     @// Write function header
    102         M_START     omxSP_FFTInv_CCSToR_F32_Sfs_vfp,r11
    103 
    104 @ Structure offsets for FFTSpec
    105         .set    ARMsFFTSpec_N, 0
    106         .set    ARMsFFTSpec_pBitRev, 4
    107         .set    ARMsFFTSpec_pTwiddle, 8
    108         .set    ARMsFFTSpec_pBuf, 12
    109 
    110         @// Define stack arguments
    111 
    112         @// Read the size from structure and take log
    113         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    114 
    115 
    116 
    117         @//  N=1 Treat seperately
    118         CMP     N,#1
    119         BGT     sizeGreaterThanOne
    120         vldr.f32 x0r, [pSrc]
    121         vstr.f32 x0r, [pDst]
    122 
    123         B       End
    124 
    125 sizeGreaterThanOne:
    126         M_STR   pDst,pDstOnStack                    @// store all the pointers
    127         M_STR   pFFTSpec,pFFTSpecOnStack
    128 
    129 
    130         @// Call the preTwiddle Radix2 stage before doing the compledIFFT
    131 
    132         BL    armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp
    133 
    134 
    135 complexIFFT:
    136 
    137         M_LDR   pFFTSpec,pFFTSpecOnStack
    138         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    139         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    140         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    141 
    142         ASR     N,N,#1                  @// N/2 point complex IFFT
    143         ADD     pSrc,pOut,N,LSL #3      @// set pSrc as pOut1
    144         M_LDR   pDst,pDstOnStack
    145 
    146         CLZ     order,N                 @// N = 2^order
    147         RSB     order,order,#31
    148         MOV     subFFTSize,#1
    149 
    150         CMP     order,#1
    151         BGT     orderGreaterthan1       @// order > 1
    152         vldmlt.f32 pSrc, {x0r, x0i}
    153         vstmlt.f32 pDst, {x0r, x0i}
    154 
    155         MOVLT   pSrc,pDst
    156         BLT     FFTEnd
    157 
    158         MOV     argDst,pDst             @// Set input args to fft stages
    159         MOV     argTwiddle,pTwiddle
    160 
    161         BL      armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp
    162         B       FFTEnd
    163 
    164 
    165 orderGreaterthan1:
    166 
    167         TST     order, #2               @// Set input args to fft stages
    168         MOVNE   argDst,pDst
    169         MOVEQ   argDst,pOut
    170         MOVEQ   pOut,pDst               @// Pass the first stage destination in RN5
    171         MOV     argTwiddle,pTwiddle
    172 
    173 
    174         @//check for even or odd order
    175 
    176         @// NOTE: The following combination of BL's would work fine
    177         @// eventhough the first BL would corrupt the flags. This is
    178         @// because the end of the "grpZeroSetLoop" loop inside
    179         @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets
    180         @// the Z flag to EQ
    181 
    182         TST     order,#0x00000001
    183         BLEQ    armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp
    184         BLNE    armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp
    185 
    186 unscaledRadix4Loop:
    187         CMP        subFFTNum,#1
    188          BEQ        FFTEnd
    189          BL        armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp
    190          B        unscaledRadix4Loop
    191 
    192 FFTEnd:
    193 
    194         vldm.f32 pSrc, {x0r, x0i}
    195 
    196         vmov.f32     fscale, subFFTSize
    197         vcvt.f32.s32 fscale, fscale             @// fscale = N as a float
    198         mov          round, #1
    199         vmov.f32     fone, round
    200         vcvt.f32.s32 fone, fone
    201         vdiv.f32     fscale, fone, fscale       @// fscale = 1/N
    202 
    203 scaleFFTData:                                   @// N = subFFTSize
    204         SUBS    subFFTSize,subFFTSize,#1
    205         vmul.f32 x0r, x0r, fscale
    206         vmul.f32 x0i, x0i, fscale
    207         vstm.f32 pSrc!, {x0r, x0i}
    208         vldmgt.f32 pSrc, {x0r, x0i}
    209 
    210         BGT     scaleFFTData
    211 
    212 
    213 End:
    214         @// Set return value
    215         MOV     result, #OMX_Sts_NoErr
    216 
    217         @// Write function tail
    218         M_END
    219 
    220 @//    ENDIF                                           @//ARM1136JS
    221 
    222 
    223       @// Guarding implementation by the processor name
    224 
    225 
    226 
    227     .end
    228