Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This is a modification of omxSP_FFTFwd_CToC_SC32_Sfs_s.s
     11 @//  to support float instead of SC32.
     12 @//
     13 
     14 @//
     15 @// Description:
     16 @// Compute an inverse FFT for a complex signal
     17 @//
     18 @//
     19 
     20 
     21 @// Include standard headers
     22 
     23 #include "dl/api/arm/armCOMM_s.h"
     24 #include "dl/api/arm/omxtypes_s.h"
     25 
     26 @// Import symbols required from other files
     27 @// (For example tables)
     28 
     29         .extern  armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
     30         .extern  armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
     31         .extern  armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
     32         .extern  armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
     33         .extern  armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
     34 
     35 @// Set debugging level
     36 @//DEBUG_ON    SETL {TRUE}
     37 
     38 
     39 
     40 @// Guarding implementation by the processor name
     41 
     42 
     43 
     44     @// Guarding implementation by the processor name
     45 
     46 @// Import symbols required from other files
     47 @// (For example tables)
     48         .extern  armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
     49         .extern  armSP_FFTFwd_CToC_FC32_Sfs_Radix2_ls_OutOfPlace_unsafe
     50 
     51 
     52 @//Input Registers
     53 
     54 #define pSrc            r0
     55 #define pDst            r1
     56 #define pFFTSpec        r2
     57 
     58 
     59 @// Output registers
     60 #define result          r0
     61 
     62 @//Local Scratch Registers
     63 
     64 #define argTwiddle      r1
     65 #define argDst          r2
     66 #define argScale        r4
     67 #define tmpOrder        r4
     68 #define pTwiddle        r4
     69 #define pOut            r5
     70 #define subFFTSize      r7
     71 #define subFFTNum       r6
     72 #define N               r6
     73 #define order           r14
     74 #define diff            r9
     75 @// Total num of radix stages required to comple the FFT
     76 #define count           r8
     77 #define x0r             r4
     78 #define x0i             r5
     79 #define diffMinusOne    r2
     80 
     81 @// Neon registers
     82 
     83 #define dX0     D0.F32
     84 
     85 
     86     @// Allocate stack memory required by the function
     87 
     88     @// Write function header
     89         M_START     omxSP_FFTFwd_CToC_FC32_Sfs,r11,d15
     90 
     91 @ Structure offsets for the FFTSpec
     92         .set    ARMsFFTSpec_N, 0
     93         .set    ARMsFFTSpec_pBitRev, 4
     94         .set    ARMsFFTSpec_pTwiddle, 8
     95         .set    ARMsFFTSpec_pBuf, 12
     96 
     97         @// Define stack arguments
     98 
     99         @// Read the size from structure and take log
    100         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    101 
    102         @// Read other structure parameters
    103         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    104         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    105 
    106         CLZ     order,N                             @// N = 2^order
    107         RSB     order,order,#31
    108         MOV     subFFTSize,#1
    109         @//MOV     subFFTNum,N
    110 
    111         CMP     order,#3
    112         BGT     orderGreaterthan3                   @// order > 3
    113 
    114         CMP     order,#1
    115         BGE     orderGreaterthan0                   @// order > 0
    116         VLD1    dX0,[pSrc]
    117         VST1    dX0,[pDst]
    118         MOV     pSrc,pDst
    119         BLT     FFTEnd
    120 
    121 orderGreaterthan0:
    122         @// set the buffers appropriately for various orders
    123         CMP     order,#2
    124         MOVNE   argDst,pDst
    125         MOVEQ   argDst,pOut
    126         @// Pass the first stage destination in RN5
    127         MOVEQ   pOut,pDst
    128         MOV     argTwiddle,pTwiddle
    129 
    130         CMP     order,#1
    131         BGT     orderGreaterthan1
    132         @// order = 1
    133         BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
    134         B       FFTEnd
    135 
    136 orderGreaterthan1:
    137         CMP     order,#2
    138         BGT     orderGreaterthan2
    139         @// order = 2
    140         BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
    141         BL    armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
    142         B       FFTEnd
    143 
    144 orderGreaterthan2:                                                                     @// order =3
    145         BL    armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe
    146         BL    armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe
    147         BL    armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe
    148         B       FFTEnd
    149 
    150 orderGreaterthan3:
    151         @// Set input args to fft stages
    152         TST     order, #2
    153         MOVNE   argDst,pDst
    154         MOVEQ   argDst,pOut
    155         @// Pass the first stage destination in RN5
    156         MOVEQ   pOut,pDst
    157         MOV     argTwiddle,pTwiddle
    158 
    159         @//check for even or odd order
    160         @// NOTE: The following combination of BL's would work fine even though
    161         @// the first BL would corrupt the flags. This is because the end of
    162         @// the "grpZeroSetLoop" loop inside
    163         @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag
    164         @// to EQ
    165 
    166         TST     order,#0x00000001
    167         BLEQ    armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe
    168         BLNE    armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe
    169 
    170         CMP        subFFTNum,#4
    171         BLT     FFTEnd
    172 
    173 
    174 unscaledRadix4Loop:
    175         BEQ        lastStageUnscaledRadix4
    176          BL        armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe
    177          CMP        subFFTNum,#4
    178          B        unscaledRadix4Loop
    179 
    180 lastStageUnscaledRadix4:
    181         BL      armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe
    182         B        FFTEnd
    183 
    184 FFTEnd:
    185 
    186         @// Set return value
    187         MOV     result, #OMX_Sts_NoErr
    188 
    189         @// Write function tail
    190         M_END
    191 
    192         .end
    193