Home | History | Annotate | Download | only in neon
      1 @
      2 @ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @
      4 @ Use of this source code is governed by a BSD-style license
      5 @ that can be found in the LICENSE file in the root of the source
      6 @ tree. An additional intellectual property rights grant can be found
      7 @ in the file PATENTS.  All contributing project authors may
      8 @ be found in the AUTHORS file in the root of the source tree.
      9 @
     10 @ Some code in this file was originally from file
     11 @ omxSP_FFTInv_CToC_SC16_Sfs_s.S which was licensed as follows.
     12 @ It has been relicensed with permission from the copyright holders.
     13 @
     14 
     15 @
     16 @ File Name:  omxSP_FFTInv_CToC_SC16_Sfs_s.s
     17 @ OpenMAX DL: v1.0.2
     18 @ Last Modified Revision:   6729
     19 @ Last Modified Date:       Tue, 17 Jul 2007
     20 @
     21 @ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     22 @
     23 
     24 @
     25 @ Description:
     26 @ Compute an inverse FFT for a 16-bit real signal, with complex FFT routines.
     27 @
     28 
     29 #include "dl/api/arm/armCOMM_s.h"
     30 #include "dl/api/arm/omxtypes_s.h"
     31 
     32 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
     33 .extern  armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
     34 .extern  armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
     35 .extern  armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
     36 .extern  armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
     37 .extern  armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
     38 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
     39 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
     40 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
     41 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
     42 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
     43 .extern  armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
     44 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
     45 .extern  armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
     46 .extern  armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
     47 .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
     48 
     49 @Input Registers
     50 #define pSrc            r0
     51 #define pDst            r1
     52 #define pFFTSpec        r2
     53 #define scale           r3
     54 
     55 @ Output registers
     56 #define result  r0
     57 
     58 @Local Scratch Registers
     59 #define argTwiddle      r1
     60 #define argDst          r2
     61 #define argScale        r4
     62 #define pTwiddle        r4
     63 #define tmpOrder        r4
     64 #define pOut            r5
     65 #define subFFTSize      r7
     66 #define subFFTNum       r6
     67 #define N               r6
     68 #define order           r14
     69 #define diff            r9
     70 @ Total num of radix stages to comple the FFT
     71 #define count           r8
     72 #define x0r             r4
     73 #define x0i             r5
     74 #define diffMinusOne    r2
     75 #define round           r3
     76 #define pOut1           r2
     77 #define size            r7
     78 #define step            r8
     79 #define step1           r9
     80 #define twStep          r10
     81 #define pTwiddleTmp     r11
     82 #define argTwiddle1     r12
     83 #define zero            r14
     84 
     85 @ Neon registers
     86 #define dX0             D0.S32
     87 #define dShift          D1.S32
     88 #define qShift          Q0.s16
     89 #define dX1             D1.S32
     90 #define dY0             D2.S32
     91 #define dY1             D3.S32
     92 #define dX0r            D0.S32
     93 #define dX0i            D1.S32
     94 #define dX1r            D2.S32
     95 #define dX1i            D3.S32
     96 #define dW0r            D4.S32
     97 #define dW0i            D5.S32
     98 #define dW1r            D6.S32
     99 #define dW1i            D7.S32
    100 #define dT0             D8.S32
    101 #define dT1             D9.S32
    102 #define dT2             D10.S32
    103 #define dT3             D11.S32
    104 #define qT0             Q6.S64
    105 #define qT1             Q7.S64
    106 #define qT0s            Q6.S16
    107 #define qT1s            Q7.S16
    108 #define qT2             Q8.S64
    109 #define qT3             Q9.S64
    110 #define dY0r            D4.S32
    111 #define dY0i            D5.S32
    112 #define dY1r            D6.S32
    113 #define dY1i            D7.S32
    114 #define dzero           D20.S32
    115 #define dY2             D4.S32
    116 #define dY3             D5.S32
    117 #define dW0             D6.S32
    118 #define dW1             D7.S32
    119 #define dW0Tmp          D10.S32
    120 #define dW1Neg          D11.S32
    121 
    122 
    123 
    124     @ Allocate stack memory required by the function
    125         M_ALLOC4        diffOnStack, 4
    126 
    127     @ Write function header
    128         M_START     omxSP_FFTInv_CCSToR_S16_Sfs,r11,d15
    129 
    130 @ Structure offsets for the FFTSpec
    131         .set    ARMsFFTSpec_N, 0
    132         .set    ARMsFFTSpec_pBitRev, 4
    133         .set    ARMsFFTSpec_pTwiddle, 8
    134         .set    ARMsFFTSpec_pBuf, 12
    135 
    136         @ Define stack arguments
    137 
    138         @ Read the size from structure and take log
    139         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    140 
    141         @ Read other structure parameters
    142         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    143         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    144 
    145         @ Call the preTwiddle Radix2 stage before doing the complex IFFT
    146 
    147         @ The following conditional BL combination would work since
    148         @ evenOddButterflyLoop in the first call would set Z flag to zero
    149 
    150         CMP     scale,#0
    151         BLEQ    armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe
    152         BLGT    armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe
    153 
    154 complexIFFT:
    155 
    156         ASR     N,N,#1                              @ N/2 point complex IFFT
    157         ADD     pSrc,pOut,N,LSL #2                  @ set pSrc as pOut1
    158 
    159         CLZ     order,N                             @ N = 2^order
    160         RSB     order,order,#31
    161         MOV     subFFTSize,#1
    162 
    163         ADD     scale,scale,order                   @ FFTInverse has a final scaling factor by N
    164 
    165         CMP     order,#3
    166         BGT     orderGreaterthan3                   @ order > 3
    167 
    168         CMP     order,#1
    169         BGE     orderGreaterthan0                   @ order > 0
    170         M_STR   scale, diffOnStack,LT               @ order = 0
    171         LDRLT   x0r,[pSrc]
    172         STRLT   x0r,[pDst]
    173         MOVLT   pSrc,pDst
    174         BLT     FFTEnd
    175 
    176 orderGreaterthan0:
    177         @ set the buffers appropriately for various orders
    178         CMP     order,#2
    179         MOVNE   argDst,pDst
    180         MOVEQ   argDst,pOut
    181         MOVEQ   pOut,pDst                           @ Pass the first stage destination in RN5
    182         MOV     argTwiddle,pTwiddle
    183         @ Store the scale factor and scale at the end
    184         SUB     diff,scale,order
    185         M_STR   diff, diffOnStack
    186         BGE     orderGreaterthan1
    187         BLLT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe  @ order = 1
    188         B       FFTEnd
    189 
    190 
    191 orderGreaterthan1:
    192         MOV     tmpOrder,order                      @ tmpOrder = RN 4
    193         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
    194         CMP     tmpOrder,#2
    195         BLGT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
    196         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
    197         B       FFTEnd
    198 
    199 
    200 
    201 
    202 orderGreaterthan3:
    203         @ check scale = 0 or scale = order
    204         SUB     diff, scale, order                  @ scale > order
    205 
    206         TST     order, #2                           @ Set input args to fft stages
    207         MOVNE   argDst,pDst
    208         MOVEQ   argDst,pOut
    209         MOVEQ   pOut,pDst                           @ Pass the first stage destination in RN5
    210         MOV     argTwiddle,pTwiddle
    211 
    212         CMP     diff,#0
    213         M_STR   diff, diffOnStack
    214         BGE     scaleEqualsOrder
    215 
    216         @check for even or odd order
    217         @ NOTE: The following combination of BL's would work fine eventhough the first
    218         @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    219         @ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    220 
    221         TST     order,#0x00000001
    222         BLEQ    armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
    223         BLNE    armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
    224 
    225         CMP     subFFTNum,#4
    226         BLT     FFTEnd
    227 
    228 unscaledRadix4Loop:
    229         BEQ     lastStageUnscaledRadix4
    230         BL      armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
    231         CMP     subFFTNum,#4
    232         B       unscaledRadix4Loop
    233 
    234 lastStageUnscaledRadix4:
    235         BL      armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
    236         B       FFTEnd
    237 
    238 scaleEqualsOrder:
    239         @check for even or odd order
    240         @ NOTE: The following combination of BL's would work fine eventhough the first
    241         @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    242         @ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    243 
    244         TST     order,#0x00000001
    245         BLEQ    armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
    246         BLNE    armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
    247 
    248         CMP     subFFTNum,#4
    249         BLT     FFTEnd
    250 
    251 scaledRadix4Loop:
    252         BEQ     lastStageScaledRadix4
    253         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
    254         CMP     subFFTNum,#4
    255         B       scaledRadix4Loop
    256 
    257 lastStageScaledRadix4:
    258         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
    259 
    260 FFTEnd:                                         @ Does only the scaling
    261 
    262         M_LDR   diff, diffOnStack
    263         CMP     diff,#0
    264         BLE     End
    265 
    266         RSB     diff,diff,#0                    @ to use VRSHL for right shift by a variable
    267         VDUP    qShift,diff
    268 
    269         @ Use parallel loads for bigger FFT size.
    270         CMP     subFFTSize, #8
    271         BLT     scaleLessFFTData
    272 
    273 scaleFFTData:
    274         VLD1    {qT0s, qT1s},[pSrc:256]         @ pSrc contains pDst pointer
    275         SUBS    subFFTSize,subFFTSize,#8
    276         VSHL    qT0s,qShift
    277         VSHL    qT1s,qShift
    278         VST1    {qT0s, qT1s},[pSrc:256]!
    279         BGT     scaleFFTData
    280         B       End
    281 
    282 scaleLessFFTData:                               @ N = subFFTSize  ; dataptr = pDst  ; scale = diff
    283         VLD1    {dX0[0]},[pSrc]                 @ pSrc contains pDst pointer
    284         SUBS    subFFTSize,subFFTSize,#1
    285         VRSHL   dX0,dShift
    286         VST1    {dX0[0]},[pSrc]!
    287         BGT     scaleLessFFTData
    288 
    289 End:
    290         @ Set return value
    291         MOV     result, #OMX_Sts_NoErr
    292 
    293         @ Write function tail
    294         M_END
    295 
    296 
    297 
    298 
    299 
    300 
    301     .end
    302