Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 @//
     13 
     14 @//
     15 @// File Name:  omxSP_FFTInv_CCSToR_S32_Sfs_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   7469
     18 @// Last Modified Date:       Thu, 20 Sep 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute an inverse FFT for a complex signal
     26 @//
     27 
     28 
     29 
     30 @// Include standard headers
     31 
     32 #include "dl/api/arm/armCOMM_s.h"
     33 #include "dl/api/arm/omxtypes_s.h"
     34 
     35 
     36 @// Import symbols required from other files
     37 @// (For example tables)
     38 
     39         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
     40         .extern  armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
     41         .extern  armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
     42         .extern  armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
     43         .extern  armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
     44         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
     45         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
     46         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
     47         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
     48         .extern  armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
     49         .extern  armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
     50         .extern  armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
     51 
     52 
     53 @// Set debugging level
     54 @//DEBUG_ON    SETL {TRUE}
     55 
     56 
     57 
     58 @// Guarding implementation by the processor name
     59 
     60 
     61 
     62       @// Guarding implementation by the processor name
     63 
     64 @// Import symbols required from other files
     65 @// (For example tables)
     66         .extern  armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
     67         .extern  armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
     68         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
     69         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
     70 
     71 
     72 @//Input Registers
     73 
     74 #define pSrc            r0
     75 #define pDst            r1
     76 #define pFFTSpec        r2
     77 #define scale           r3
     78 
     79 
     80 @// Output registers
     81 #define result          r0
     82 
     83 @//Local Scratch Registers
     84 
     85 #define argTwiddle      r1
     86 #define argDst          r2
     87 #define argScale        r4
     88 #define tmpOrder        r4
     89 #define pTwiddle        r4
     90 #define pOut            r5
     91 #define subFFTSize      r7
     92 #define subFFTNum       r6
     93 #define N               r6
     94 #define order           r14
     95 #define diff            r9
     96 @// Total num of radix stages required to comple the FFT
     97 #define count           r8
     98 #define x0r             r4
     99 #define x0i             r5
    100 #define diffMinusOne    r2
    101 #define round           r3
    102 
    103 #define pOut1           r2
    104 #define size            r7
    105 #define step            r8
    106 #define step1           r9
    107 #define twStep          r10
    108 #define pTwiddleTmp     r11
    109 #define argTwiddle1     r12
    110 #define zero            r14
    111 
    112 @// Neon registers
    113 
    114 #define dX0     D0.S32
    115 #define dShift  D1.S32
    116 #define dX1     D1.S32
    117 #define dY0     D2.S32
    118 #define dY1     D3.S32
    119 #define dX0r    D0.S32
    120 #define dX0i    D1.S32
    121 #define dX1r    D2.S32
    122 #define dX1i    D3.S32
    123 #define dW0r    D4.S32
    124 #define dW0i    D5.S32
    125 #define dW1r    D6.S32
    126 #define dW1i    D7.S32
    127 #define dT0     D8.S32
    128 #define dT1     D9.S32
    129 #define dT2     D10.S32
    130 #define dT3     D11.S32
    131 #define qT0     Q6.S64
    132 #define qT1     Q7.S64
    133 #define qT2     Q8.S64
    134 #define qT3     Q9.S64
    135 #define dY0r    D4.S32
    136 #define dY0i    D5.S32
    137 #define dY1r    D6.S32
    138 #define dY1i    D7.S32
    139 #define dzero   D20.S32
    140 
    141 #define dY2     D4.S32
    142 #define dY3     D5.S32
    143 #define dW0     D6.S32
    144 #define dW1     D7.S32
    145 #define dW0Tmp  D10.S32
    146 #define dW1Neg  D11.S32
    147 
    148 
    149 
    150     @// Allocate stack memory required by the function
    151         M_ALLOC4        diffOnStack, 4
    152 
    153     @// Write function header
    154         M_START     omxSP_FFTInv_CCSToR_S32_Sfs,r11,d15
    155 
    156 @ Structure offsets for the FFTSpec
    157         .set    ARMsFFTSpec_N, 0
    158         .set    ARMsFFTSpec_pBitRev, 4
    159         .set    ARMsFFTSpec_pTwiddle, 8
    160         .set    ARMsFFTSpec_pBuf, 12
    161 
    162         @// Define stack arguments
    163 
    164         @// Read the size from structure and take log
    165         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    166 
    167         @// Read other structure parameters
    168         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    169         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    170 
    171         @//  N=1 Treat seperately
    172         CMP     N,#1
    173         BGT     sizeGreaterThanOne
    174         VLD1    dX0[0],[pSrc]
    175         RSB     scale,scale,#0                        @// to use VRSHL for right shift by a variable
    176         VMOV    dShift[0],scale
    177         VRSHL   dX0,dShift
    178         VST1    dX0[0],[pDst]
    179 
    180         B       End
    181 
    182 sizeGreaterThanOne:
    183 
    184         @// Call the preTwiddle Radix2 stage before doing the compledIFFT
    185 
    186         @// The following conditional BL combination would work since
    187         @// evenOddButterflyLoop in the first call would set Z flag to zero
    188 
    189         CMP     scale,#0
    190         BLEQ    armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe
    191         BLGT    armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe
    192 
    193 
    194 
    195 complexIFFT:
    196 
    197         ASR     N,N,#1                             @// N/2 point complex IFFT
    198         ADD     pSrc,pOut,N,LSL #3                 @// set pSrc as pOut1
    199 
    200         CLZ     order,N                             @// N = 2^order
    201         RSB     order,order,#31
    202         MOV     subFFTSize,#1
    203         @//MOV     subFFTNum,N
    204 
    205         ADD     scale,scale,order                   @// FFTInverse has a final scaling factor by N
    206 
    207         CMP     order,#3
    208         BGT     orderGreaterthan3                   @// order > 3
    209 
    210         CMP     order,#1
    211         BGE     orderGreaterthan0                   @// order > 0
    212         M_STR   scale, diffOnStack,LT               @// order = 0
    213         VLD1    dX0,[pSrc]
    214         VST1    dX0,[pDst]
    215         MOV     pSrc,pDst
    216         BLT     FFTEnd
    217 
    218 orderGreaterthan0:
    219         @// set the buffers appropriately for various orders
    220         CMP     order,#2
    221         MOVNE   argDst,pDst
    222         MOVEQ   argDst,pOut
    223         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    224         MOV     argTwiddle,pTwiddle
    225         @// Store the scale factor and scale at the end
    226         SUB     diff,scale,order
    227         M_STR   diff, diffOnStack
    228         BGE     orderGreaterthan1
    229         BLLT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
    230         B       FFTEnd
    231 
    232 orderGreaterthan1:
    233         MOV     tmpOrder,order                          @// tmpOrder = RN 4
    234         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
    235         CMP     tmpOrder,#2
    236         BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    237         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
    238         B       FFTEnd
    239 
    240 
    241 orderGreaterthan3:
    242         @// check scale = 0 or scale = order
    243         SUBS    diff, scale, order                 @// scale > order
    244         MOVGT   scale,order
    245         BGE     specialScaleCase                   @// scale = 0 or scale = order
    246         CMP     scale,#0
    247         BEQ     specialScaleCase
    248         B       generalScaleCase
    249 
    250 specialScaleCase:                                           @//  scale = 0 or scale = order  and order >= 2
    251 
    252         TST     order, #2                           @// Set input args to fft stages
    253         MOVNE   argDst,pDst
    254         MOVEQ   argDst,pOut
    255         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    256         MOV     argTwiddle,pTwiddle
    257 
    258         CMP      diff,#0
    259         M_STR    diff, diffOnStack
    260         BGE      scaleEqualsOrder
    261 
    262         @//check for even or odd order
    263         @// NOTE: The following combination of BL's would work fine eventhough the first
    264         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    265         @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    266 
    267         TST     order,#0x00000001
    268         BLEQ    armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
    269         BLNE    armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
    270 
    271         CMP        subFFTNum,#4
    272         BLT     FFTEnd
    273 
    274 
    275 unscaledRadix4Loop:
    276         BEQ        lastStageUnscaledRadix4
    277          BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
    278          CMP        subFFTNum,#4
    279          B        unscaledRadix4Loop
    280 
    281 lastStageUnscaledRadix4:
    282         BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    283         B        FFTEnd
    284 
    285 
    286 scaleEqualsOrder:
    287         @//check for even or odd order
    288         @// NOTE: The following combination of BL's would work fine eventhough the first
    289         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    290         @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    291 
    292         TST     order,#0x00000001
    293         BLEQ    armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
    294         BLNE    armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
    295 
    296         CMP        subFFTNum,#4
    297         BLT     FFTEnd
    298 
    299 
    300 scaledRadix4Loop:
    301         BEQ        lastStageScaledRadix4
    302          BL        armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
    303          CMP        subFFTNum,#4
    304          B        scaledRadix4Loop
    305 
    306 lastStageScaledRadix4:
    307         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
    308         B        FFTEnd
    309 
    310 generalScaleCase:                                               @// 0 < scale < order and order >= 2
    311         @// Determine the correct destination buffer
    312         SUB     diff,order,scale
    313         TST     diff,#0x01
    314         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
    315         MOVNE   count,order
    316         TST     count,#0x01                     @// Is count even or odd ?
    317 
    318         MOVNE   argDst,pDst                     @// Set input args to fft stages
    319         MOVEQ   argDst,pOut
    320         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
    321         MOV     argTwiddle,pTwiddle
    322 
    323         M_STR   diff, diffOnStack
    324 
    325         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
    326         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    327         SUBS    argScale,argScale,#1
    328 
    329 scaledRadix2Loop:
    330         BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    331         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    332         BGT     scaledRadix2Loop
    333 
    334 
    335         M_LDR   diff, diffOnStack
    336         @//check for even or odd order
    337         TST     diff,#0x00000001
    338         BEQ     generalUnscaledRadix4Loop
    339         B       unscaledRadix2Loop
    340 
    341 generalUnscaledRadix4Loop:
    342         CMP        subFFTNum,#4
    343          BEQ        generalLastStageUnscaledRadix4
    344          BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
    345          B        generalUnscaledRadix4Loop
    346 
    347 generalLastStageUnscaledRadix4:
    348         BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    349         B        End
    350 
    351 
    352 unscaledRadix2Loop:
    353         CMP        subFFTNum,#2
    354          BEQ        generalLastStageUnscaledRadix2
    355          BL        armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
    356          B        unscaledRadix2Loop
    357 
    358 generalLastStageUnscaledRadix2:
    359         BL      armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
    360         B        End
    361 
    362 
    363 FFTEnd:                                               @// Does only the scaling
    364 
    365         M_LDR   diff, diffOnStack
    366         CMP     diff,#0
    367         BLE     End
    368 
    369         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
    370         VDUP    dShift,diff
    371 
    372 scaleFFTData:                                           @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
    373         VLD1    {dX0},[pSrc]            @// pSrc contains pDst pointer
    374         SUBS    subFFTSize,subFFTSize,#1
    375         VRSHL   dX0,dShift
    376         VST1    {dX0},[pSrc]!
    377 
    378         BGT     scaleFFTData
    379 
    380 
    381 End:
    382         @// Set return value
    383         MOV     result, #OMX_Sts_NoErr
    384 
    385         @// Write function tail
    386         M_END
    387 
    388 
    389 
    390         .end
    391