Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 @//
     13 @//
     14 @// File Name:  omxSP_FFTFwd_CToC_SC32_Sfs_s.s
     15 @// OpenMAX DL: v1.0.2
     16 @// Last Modified Revision:   6684
     17 @// Last Modified Date:       Mon, 09 Jul 2007
     18 @//
     19 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     20 @//
     21 @//
     22 @//
     23 @// Description:
     24 @// Compute an inverse FFT for a complex signal
     25 @//
     26 
     27 
     28 @// Include standard headers
     29 
     30 #include "dl/api/arm/armCOMM_s.h"
     31 #include "dl/api/arm/omxtypes_s.h"
     32 
     33 @// Import symbols required from other files
     34 @// (For example tables)
     35 
     36         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe
     37         .extern  armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
     38         .extern  armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
     39         .extern  armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
     40         .extern  armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
     41         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
     42         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
     43         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
     44         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
     45         .extern  armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
     46 
     47 @// Set debugging level
     48 @//DEBUG_ON    SETL {TRUE}
     49 
     50 
     51 
     52 @// Guarding implementation by the processor name
     53 
     54 
     55 
     56     @// Guarding implementation by the processor name
     57 
     58 @// Import symbols required from other files
     59 @// (For example tables)
     60         .extern  armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
     61         .extern  armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
     62         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
     63         .extern  armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
     64 
     65 
     66 @//Input Registers
     67 
     68 #define pSrc		r0
     69 #define pDst		r1
     70 #define pFFTSpec	r2
     71 #define scale		r3
     72 
     73 
     74 @// Output registers
     75 #define result		r0
     76 
     77 @//Local Scratch Registers
     78 
     79 #define argTwiddle	r1
     80 #define argDst		r2
     81 #define argScale	r4
     82 #define tmpOrder	r4
     83 #define pTwiddle	r4
     84 #define pOut		r5
     85 #define subFFTSize	r7
     86 #define subFFTNum	r6
     87 #define N		r6
     88 #define order		r14
     89 #define diff		r9
     90 @// Total num of radix stages required to comple the FFT
     91 #define count		r8
     92 #define x0r		r4
     93 #define x0i		r5
     94 #define diffMinusOne	r2
     95 #define round		r3
     96 
     97 @// Neon registers
     98 
     99 #define dX0	D0.S32
    100 #define dShift	D1.S32
    101 
    102 
    103 
    104     @// Allocate stack memory required by the function
    105         M_ALLOC4        diffOnStack, 4
    106 
    107     @// Write function header
    108         M_START     omxSP_FFTFwd_CToC_SC32_Sfs,r11,d15
    109 
    110 @ Structure offsets for the FFTSpec
    111 	.set	ARMsFFTSpec_N, 0
    112 	.set	ARMsFFTSpec_pBitRev, 4
    113 	.set	ARMsFFTSpec_pTwiddle, 8
    114 	.set	ARMsFFTSpec_pBuf, 12
    115 
    116         @// Define stack arguments
    117 
    118         @// Read the size from structure and take log
    119         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    120 
    121         @// Read other structure parameters
    122         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    123         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    124 
    125         CLZ     order,N                             @// N = 2^order
    126         RSB     order,order,#31
    127         MOV     subFFTSize,#1
    128         @//MOV     subFFTNum,N
    129 
    130         CMP     order,#3
    131         BGT     orderGreaterthan3                   @// order > 3
    132 
    133         CMP     order,#1
    134         BGE     orderGreaterthan0                   @// order > 0
    135         M_STR   scale, diffOnStack,LT               @// order = 0
    136         VLD1    dX0,[pSrc]
    137         VST1    dX0,[pDst]
    138         MOV     pSrc,pDst
    139         BLT     FFTEnd
    140 
    141 orderGreaterthan0:
    142         @// set the buffers appropriately for various orders
    143         CMP     order,#2
    144         MOVNE   argDst,pDst
    145         MOVEQ   argDst,pOut
    146         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    147         MOV     argTwiddle,pTwiddle
    148 
    149         SUBS     diff,scale,order
    150         M_STR   diff,diffOnStack
    151         MOVGT   scale,order
    152         @// Now scale <= order
    153 
    154         CMP     order,#1
    155         BGT     orderGreaterthan1
    156         SUBS    scale,scale,#1
    157         BLEQ    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
    158         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe      @// order = 1
    159         B       FFTEnd
    160 
    161 orderGreaterthan1:
    162         CMP     order,#2
    163         MOV     argScale,scale
    164         BGT     orderGreaterthan2
    165         SUBS    argScale,argScale,#1
    166         BLGE    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe      @// order =2
    167         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
    168         SUBS    argScale,argScale,#1
    169         BLEQ    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe
    170         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
    171         B       FFTEnd
    172 
    173 orderGreaterthan2:	                                                               @// order =3
    174         SUBS    argScale,argScale,#1
    175         BLGE    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe // "fs" means first stage
    176         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_fs_OutOfPlace_unsafe
    177         SUBS    argScale,argScale,#1
    178         BLGE    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    179         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
    180         SUBS    argScale,argScale,#1
    181         BLEQ    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe // "ls" means last stage
    182         BLLT    armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
    183         B       FFTEnd
    184 
    185 
    186 
    187 orderGreaterthan3:
    188         @// check scale = 0 or scale = order
    189         SUBS    diff, scale, order                 @// scale > order
    190         MOVGT   scale,order
    191         BGE     specialScaleCase                   @// scale = 0 or scale = order
    192         CMP     scale,#0
    193         BEQ     specialScaleCase
    194         B       generalScaleCase
    195 
    196 specialScaleCase:	                                    @//  scale = 0 or scale = order  and order >= 2
    197 
    198         TST     order, #2                           @// Set input args to fft stages
    199         MOVNE   argDst,pDst
    200         MOVEQ   argDst,pOut
    201         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    202         MOV     argTwiddle,pTwiddle
    203 
    204         CMP      diff,#0
    205         M_STR    diff, diffOnStack
    206         BGE      scaleEqualsOrder
    207 
    208         @//check for even or odd order
    209         @// NOTE: The following combination of BL's would work fine eventhough the first
    210         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    211         @// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    212 
    213         TST     order,#0x00000001
    214         BLEQ    armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe
    215         BLNE    armSP_FFTFwd_CToC_SC32_Radix8_fs_OutOfPlace_unsafe
    216 
    217         CMP        subFFTNum,#4
    218         BLT     FFTEnd
    219 
    220 
    221 unscaledRadix4Loop:
    222         BEQ        lastStageUnscaledRadix4
    223          BL        armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
    224          CMP        subFFTNum,#4
    225          B        unscaledRadix4Loop
    226 
    227 lastStageUnscaledRadix4:
    228         BL      armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    229         B        FFTEnd
    230 
    231 
    232 scaleEqualsOrder:
    233         @//check for even or odd order
    234         @// NOTE: The following combination of BL's would work fine eventhough the first
    235         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    236         @// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    237 
    238         TST     order,#0x00000001
    239         BLEQ    armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe
    240         BLNE    armSP_FFTFwd_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe
    241 
    242         CMP        subFFTNum,#4
    243         BLT     FFTEnd
    244 
    245 
    246 scaledRadix4Loop:
    247         BEQ        lastStageScaledRadix4
    248          BL        armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe
    249          CMP        subFFTNum,#4
    250          B        scaledRadix4Loop
    251 
    252 lastStageScaledRadix4:
    253         BL      armSP_FFTFwd_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe
    254         B        FFTEnd
    255 
    256 generalScaleCase:	                                        @// 0 < scale < order and order >= 2
    257         @// Determine the correct destination buffer
    258         SUB     diff,order,scale
    259         TST     diff,#0x01
    260         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
    261         MOVNE   count,order
    262         TST     count,#0x01                     @// Is count even or odd ?
    263 
    264         MOVNE   argDst,pDst                     @// Set input args to fft stages
    265         MOVEQ   argDst,pOut
    266         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
    267         MOV     argTwiddle,pTwiddle
    268 
    269         M_STR   diff, diffOnStack
    270 
    271         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
    272         BL      armSP_FFTFwd_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    273         SUBS    argScale,argScale,#1
    274 
    275 scaledRadix2Loop:
    276         BLGT    armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe
    277         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    278         BGT     scaledRadix2Loop
    279 
    280 
    281         M_LDR   diff, diffOnStack
    282         @//check for even or odd order
    283         TST     diff,#0x00000001
    284         BEQ     generalUnscaledRadix4Loop
    285         B       unscaledRadix2Loop
    286 
    287 generalUnscaledRadix4Loop:
    288         CMP        subFFTNum,#4
    289          BEQ        generalLastStageUnscaledRadix4
    290          BL        armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe
    291          B        generalUnscaledRadix4Loop
    292 
    293 generalLastStageUnscaledRadix4:
    294         BL      armSP_FFTFwd_CToC_SC32_Radix4_ls_OutOfPlace_unsafe
    295         B        End
    296 
    297 
    298 unscaledRadix2Loop:
    299         CMP        subFFTNum,#2
    300          BEQ        generalLastStageUnscaledRadix2
    301          BL        armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe
    302          B        unscaledRadix2Loop
    303 
    304 generalLastStageUnscaledRadix2:
    305         BL      armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe
    306         B        End
    307 
    308 
    309 FFTEnd:	                                              @// Does only the scaling
    310 
    311         M_LDR   diff, diffOnStack
    312         CMP     diff,#0
    313         BLE     End
    314 
    315         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
    316         VDUP    dShift,diff
    317 
    318 scaleFFTData:	                                        @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
    319         VLD1    {dX0},[pSrc]            @// pSrc contains pDst pointer
    320         SUBS    subFFTSize,subFFTSize,#1
    321         VRSHL   dX0,dShift
    322         VST1    {dX0},[pSrc]!
    323 
    324         BGT     scaleFFTData
    325 
    326 
    327 
    328 End:
    329         @// Set return value
    330         MOV     result, #OMX_Sts_NoErr
    331 
    332         @// Write function tail
    333         M_END
    334 
    335 	.end
    336