Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 @//
     13 
     14 @//
     15 @// File Name:  omxSP_FFTInv_CToC_SC32_Sfs_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   6675
     18 @// Last Modified Date:       Fri, 06 Jul 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute an inverse FFT for a complex signal
     26 @//
     27 
     28 
     29 @// Include standard headers
     30 
     31 #include "dl/api/arm/armCOMM_s.h"
     32 #include "dl/api/arm/omxtypes_s.h"
     33 
     34 @// Import symbols required from other files
     35 @// (For example tables)
     36 
     37         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
     38         .extern  armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
     39         .extern  armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
     40         .extern  armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
     41         .extern  armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
     42         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
     43         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
     44         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
     45         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
     46         .extern  armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
     47 
     48 @// Set debugging level
     49 @//DEBUG_ON    SETL {TRUE}
     50 
     51 
     52 
     53 @// Guarding implementation by the processor name
     54 
     55 
     56 
     57       @// Guarding implementation by the processor name
     58 
     59 @// Import symbols required from other files
     60 @// (For example tables)
     61         .extern  armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
     62         .extern  armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
     63         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
     64         .extern  armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
     65 
     66 
     67 @//Input Registers
     68 
     69 #define pSrc	r0
     70 #define pDst	r1
     71 #define pFFTSpec	r2
     72 #define scale	r3
     73 
     74 
     75 @// Output registers
     76 #define result	r0
     77 
     78 @//Local Scratch Registers
     79 
     80 #define argTwiddle	r1
     81 #define argDst	r2
     82 #define argScale	r4
     83 #define tmpOrder	r4
     84 #define pTwiddle	r4
     85 #define pOut	r5
     86 #define subFFTSize	r7
     87 #define subFFTNum	r6
     88 #define N	r6
     89 #define order	r14
     90 #define diff	r9
     91 @// Total num of radix stages required to comple the FFT
     92 #define count	r8
     93 #define x0r	r4
     94 #define x0i	r5
     95 #define diffMinusOne	r2
     96 #define round	r3
     97 
     98 @// Neon registers
     99 
    100 #define dX0	D0.S32
    101 #define dShift	D1.S32
    102 
    103 
    104 
    105     @// Allocate stack memory required by the function
    106         M_ALLOC4        diffOnStack, 4
    107 
    108     @// Write function header
    109         M_START     omxSP_FFTInv_CToC_SC32_Sfs,r11,d15
    110 
    111 @ Structure offsets for the FFTSpec
    112 	.set	ARMsFFTSpec_N, 0
    113 	.set	ARMsFFTSpec_pBitRev, 4
    114 	.set	ARMsFFTSpec_pTwiddle, 8
    115 	.set	ARMsFFTSpec_pBuf, 12
    116 
    117         @// Define stack arguments
    118 
    119         @// Read the size from structure and take log
    120         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    121 
    122         @// Read other structure parameters
    123         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    124         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    125 
    126         CLZ     order,N                             @// N = 2^order
    127         RSB     order,order,#31
    128         MOV     subFFTSize,#1
    129         @//MOV     subFFTNum,N
    130 
    131         ADD     scale,scale,order                   @// FFTInverse has a final scaling factor by N
    132 
    133         CMP     order,#3
    134         BGT     orderGreaterthan3                   @// order > 3
    135 
    136         CMP     order,#1
    137         BGE     orderGreaterthan0                   @// order > 0
    138         M_STR   scale, diffOnStack,LT               @// order = 0
    139         VLD1    dX0,[pSrc]
    140         VST1    dX0,[pDst]
    141         MOV     pSrc,pDst
    142         BLT     FFTEnd
    143 
    144 orderGreaterthan0:
    145         @// set the buffers appropriately for various orders
    146         CMP     order,#2
    147         MOVNE   argDst,pDst
    148         MOVEQ   argDst,pOut
    149         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    150         MOV     argTwiddle,pTwiddle
    151         @// Store the scale factor and scale at the end
    152         SUB     diff,scale,order
    153         M_STR   diff, diffOnStack
    154         BGE     orderGreaterthan1
    155         BLLT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
    156         B       FFTEnd
    157 
    158 orderGreaterthan1:
    159         MOV     tmpOrder,order                          @// tmpOrder = RN 4
    160         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
    161         CMP     tmpOrder,#2
    162         BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    163         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
    164         B       FFTEnd
    165 
    166 
    167 orderGreaterthan3:
    168         @// check scale = 0 or scale = order
    169         SUBS    diff, scale, order                 @// scale > order
    170         MOVGT   scale,order
    171         BGE     specialScaleCase                   @// scale = 0 or scale = order
    172         CMP     scale,#0
    173         BEQ     specialScaleCase
    174         B       generalScaleCase
    175 
    176 specialScaleCase:	                                    @//  scale = 0 or scale = order  and order >= 2
    177 
    178         TST     order, #2                           @// Set input args to fft stages
    179         MOVNE   argDst,pDst
    180         MOVEQ   argDst,pOut
    181         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    182         MOV     argTwiddle,pTwiddle
    183 
    184         CMP      diff,#0
    185         M_STR    diff, diffOnStack
    186         BGE      scaleEqualsOrder
    187 
    188         @//check for even or odd order
    189         @// NOTE: The following combination of BL's would work fine eventhough the first
    190         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    191         @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    192 
    193         TST     order,#0x00000001
    194         BLEQ    armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
    195         BLNE    armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
    196 
    197         CMP        subFFTNum,#4
    198         BLT     FFTEnd
    199 
    200 
    201 unscaledRadix4Loop:
    202         BEQ        lastStageUnscaledRadix4
    203          BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
    204          CMP        subFFTNum,#4
    205          B        unscaledRadix4Loop
    206 
    207 lastStageUnscaledRadix4:
    208         BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    209         B        FFTEnd
    210 
    211 
    212 scaleEqualsOrder:
    213         @//check for even or odd order
    214         @// NOTE: The following combination of BL's would work fine eventhough the first
    215         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    216         @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    217 
    218         TST     order,#0x00000001
    219         BLEQ    armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
    220         BLNE    armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
    221 
    222         CMP        subFFTNum,#4
    223         BLT     FFTEnd
    224 
    225 
    226 scaledRadix4Loop:
    227         BEQ        lastStageScaledRadix4
    228          BL        armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
    229          CMP        subFFTNum,#4
    230          B        scaledRadix4Loop
    231 
    232 lastStageScaledRadix4:
    233         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
    234         B        FFTEnd
    235 
    236 generalScaleCase:	                                        @// 0 < scale < order and order >= 2
    237         @// Determine the correct destination buffer
    238         SUB     diff,order,scale
    239         TST     diff,#0x01
    240         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
    241         MOVNE   count,order
    242         TST     count,#0x01                     @// Is count even or odd ?
    243 
    244         MOVNE   argDst,pDst                     @// Set input args to fft stages
    245         MOVEQ   argDst,pOut
    246         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
    247         MOV     argTwiddle,pTwiddle
    248 
    249         M_STR   diff, diffOnStack
    250 
    251         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
    252         BL      armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    253         SUBS    argScale,argScale,#1
    254 
    255 scaledRadix2Loop:
    256         BLGT    armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    257         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    258         BGT     scaledRadix2Loop
    259 
    260 
    261         M_LDR   diff, diffOnStack
    262         @//check for even or odd order
    263         TST     diff,#0x00000001
    264         BEQ     generalUnscaledRadix4Loop
    265         B       unscaledRadix2Loop
    266 
    267 generalUnscaledRadix4Loop:
    268         CMP        subFFTNum,#4
    269          BEQ        generalLastStageUnscaledRadix4
    270          BL        armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe
    271          B        generalUnscaledRadix4Loop
    272 
    273 generalLastStageUnscaledRadix4:
    274         BL      armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    275         B        End
    276 
    277 
    278 unscaledRadix2Loop:
    279         CMP        subFFTNum,#2
    280          BEQ        generalLastStageUnscaledRadix2
    281          BL        armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe
    282          B        unscaledRadix2Loop
    283 
    284 generalLastStageUnscaledRadix2:
    285         BL      armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
    286         B        End
    287 
    288 
    289 FFTEnd:	                                              @// Does only the scaling
    290 
    291         M_LDR   diff, diffOnStack
    292         CMP     diff,#0
    293         BLE     End
    294 
    295         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
    296         VDUP    dShift,diff
    297 
    298 scaleFFTData:	                                        @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
    299         VLD1    {dX0},[pSrc]            @// pSrc contains pDst pointer
    300         SUBS    subFFTSize,subFFTSize,#1
    301         VRSHL   dX0,dShift
    302         VST1    {dX0},[pSrc]!
    303 
    304         BGT     scaleFFTData
    305 
    306 
    307 End:
    308         @// Set return value
    309         MOV     result, #OMX_Sts_NoErr
    310 
    311         @// Write function tail
    312         M_END
    313 
    314 	.end
    315