Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 
     13 @//
     14 @//
     15 @// File Name:  omxSP_FFTFwd_CToC_SC16_Sfs_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   6729
     18 @// Last Modified Date:       Tue, 17 Jul 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute an inverse FFT for a complex signal
     26 @//
     27 @//
     28 
     29 
     30 @// Include standard headers
     31 
     32 #include "dl/api/arm/armCOMM_s.h"
     33 #include "dl/api/arm/omxtypes_s.h"
     34 
     35 
     36 @// Import symbols required from other files
     37 @// (For example tables)
     38 
     39         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
     40         .extern  armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
     41         .extern  armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
     42         .extern  armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
     43         .extern  armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
     44         .extern  armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
     45         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
     46         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
     47         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
     48         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
     49         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
     50         .extern  armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
     51         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
     52         .extern  armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
     53 
     54 @// Set debugging level
     55 @//DEBUG_ON    SETL {TRUE}
     56 
     57 
     58 
     59 @// Guarding implementation by the processor name
     60 
     61 
     62 
     63 @// Guarding implementation by the processor name
     64 
     65 
     66     .extern  armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
     67     .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
     68 
     69 @//Input Registers
     70 
     71 #define pSrc            r0
     72 #define pDst            r1
     73 #define pFFTSpec                r2
     74 #define scale           r3
     75 
     76 
     77 @// Output registers
     78 #define result          r0
     79 
     80 @//Local Scratch Registers
     81 #define argTwiddle              r1
     82 #define argDst          r2
     83 #define argScale                r4
     84 #define pTwiddle                r4
     85 #define tmpOrder                r4
     86 #define pOut            r5
     87 #define subFFTSize              r7
     88 #define subFFTNum               r6
     89 #define N               r6
     90 #define order           r14
     91 #define diff            r9
     92 @// Total num of radix stages required to comple the FFT
     93 #define count           r8
     94 #define x0r             r4
     95 #define x0i             r5
     96 #define diffMinusOne            r2
     97 #define round           r3
     98 
     99 @// Neon registers
    100 
    101 #define dX0     D0.S16
    102 #define dShift  D1.S16
    103 #define dX0S32  D0.S32
    104 
    105 
    106 
    107     @// Allocate stack memory required by the function
    108         M_ALLOC4        diffOnStack, 4
    109 
    110     @// Write function header
    111         M_START     omxSP_FFTFwd_CToC_SC16_Sfs,r11,d15
    112 
    113 @ Structure offsets for the FFTSpec
    114         .set    ARMsFFTSpec_N, 0
    115         .set    ARMsFFTSpec_pBitRev, 4
    116         .set    ARMsFFTSpec_pTwiddle, 8
    117         .set    ARMsFFTSpec_pBuf, 12
    118 
    119         @// Define stack arguments
    120 
    121         @// Read the size from structure and take log
    122         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    123 
    124         @// Read other structure parameters
    125         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    126         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    127 
    128         CLZ     order,N                             @// N = 2^order
    129         RSB     order,order,#31
    130         MOV     subFFTSize,#1
    131         @//MOV     subFFTNum,N
    132 
    133         CMP     order,#3
    134         BGT     orderGreaterthan3                   @// order > 3
    135 
    136         CMP     order,#1
    137         BGE     orderGreaterthan0                   @// order > 0
    138         M_STR   scale, diffOnStack,LT               @// order = 0
    139         LDRLT   x0r,[pSrc]
    140         STRLT   x0r,[pDst]
    141         MOVLT   pSrc,pDst
    142         BLT     FFTEnd
    143 
    144 orderGreaterthan0:
    145         @// set the buffers appropriately for various orders
    146         CMP     order,#2
    147         MOVNE   argDst,pDst
    148         MOVEQ   argDst,pOut
    149         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    150         MOV     argTwiddle,pTwiddle
    151 
    152         SUBS     diff,scale,order
    153         M_STR   diff,diffOnStack
    154         MOVGT   scale,order
    155         @// Now scale <= order
    156 
    157         CMP     order,#1
    158         BGT     orderGreaterthan1
    159         SUBS    scale,scale,#1
    160         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
    161         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe      @// order = 1
    162         B       FFTEnd
    163 
    164 orderGreaterthan1:
    165         CMP     order,#2
    166         MOV     argScale,scale
    167         BGT     orderGreaterthan2
    168         SUBS    argScale,argScale,#1
    169         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe      @// order =2
    170         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
    171         SUBS    argScale,argScale,#1
    172         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
    173         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
    174         B       FFTEnd
    175 
    176 orderGreaterthan2:                                                                     @// order =3
    177         SUBS    argScale,argScale,#1
    178         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
    179         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
    180         SUBS    argScale,argScale,#1
    181         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
    182         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
    183         SUBS    argScale,argScale,#1
    184         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
    185         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
    186         B       FFTEnd
    187 
    188 
    189 orderGreaterthan3:
    190         @// check scale = 0 or scale = order
    191         SUBS    diff, scale, order                 @// scale > order
    192         MOVGT   scale,order
    193         BGE     specialScaleCase                   @// scale = 0 or scale = order
    194         CMP     scale,#0
    195         BEQ     specialScaleCase
    196         B       generalScaleCase
    197 
    198 specialScaleCase:                                           @//  scale = 0 or scale = order  and order > 3
    199 
    200         TST     order, #2                           @// Set input args to fft stages
    201         MOVNE   argDst,pDst
    202         MOVEQ   argDst,pOut
    203         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    204         MOV     argTwiddle,pTwiddle
    205 
    206         CMP      diff,#0
    207         M_STR    diff, diffOnStack
    208         BGE      scaleEqualsOrder
    209 
    210         @//check for even or odd order
    211         @// NOTE: The following combination of BL's would work fine eventhough the first
    212         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    213         @// armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    214 
    215         TST     order,#0x00000001
    216         BLEQ    armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
    217         BLNE    armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
    218 
    219         CMP        subFFTNum,#4
    220         BLT     FFTEnd
    221 
    222 unscaledRadix4Loop:
    223         BEQ        lastStageUnscaledRadix4
    224         BL        armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
    225          CMP        subFFTNum,#4
    226          B        unscaledRadix4Loop
    227 
    228 lastStageUnscaledRadix4:
    229         BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
    230         B        FFTEnd
    231 
    232 scaleEqualsOrder:
    233         @//check for even or odd order
    234         @// NOTE: The following combination of BL's would work fine eventhough the first
    235         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    236         @// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    237 
    238         TST     order,#0x00000001
    239         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
    240         BLNE    armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
    241 
    242         CMP        subFFTNum,#4
    243         BLT     FFTEnd
    244 
    245 scaledRadix4Loop:
    246         BEQ        lastStageScaledRadix4
    247         BL        armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
    248          CMP        subFFTNum,#4
    249          B        scaledRadix4Loop
    250 
    251 lastStageScaledRadix4:
    252         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
    253         B        FFTEnd
    254 
    255 
    256 
    257 generalScaleCase:                                               @// 0 < scale < order and order > 3
    258         @// Determine the correct destination buffer
    259         SUB     diff,order,scale
    260         TST     diff,#0x01
    261         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
    262         MOVNE   count,order
    263         TST     count,#0x01                     @// Is count even or odd ?
    264 
    265         MOVNE   argDst,pDst                     @// Set input args to fft stages
    266         MOVEQ   argDst,pOut
    267         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
    268         MOV     argTwiddle,pTwiddle
    269 
    270         CMP     diff,#1
    271         M_STR   diff, diffOnStack
    272         BEQ     scaleps                         @// scaling including a radix2_ps stage
    273 
    274         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
    275         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    276         SUBS    argScale,argScale,#1
    277 
    278 scaledRadix2Loop:
    279         BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
    280         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    281         BGT     scaledRadix2Loop
    282         B       outScale
    283 
    284 scaleps:
    285         SUB     argScale,scale,#1                   @// order>3 and diff=1 => scale >= 3
    286         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    287         SUBS    argScale,argScale,#1
    288 
    289 scaledRadix2psLoop:
    290         BEQ     scaledRadix2psStage
    291         BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
    292         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    293         BGE     scaledRadix2psLoop
    294 
    295 scaledRadix2psStage:
    296         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
    297         B       generalLastStageUnscaledRadix2
    298 
    299 
    300 outScale:
    301         M_LDR   diff, diffOnStack
    302         @//check for even or odd order
    303         TST     diff,#0x00000001
    304         BEQ     generalUnscaledRadix4Loop
    305         B       unscaledRadix2Loop
    306 
    307 generalUnscaledRadix4Loop:
    308         CMP        subFFTNum,#4
    309          BEQ        generalLastStageUnscaledRadix4
    310          BL        armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
    311          B        generalUnscaledRadix4Loop
    312 
    313 generalLastStageUnscaledRadix4:
    314         BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
    315         B        End
    316 
    317 unscaledRadix2Loop:
    318         CMP        subFFTNum,#4
    319          BEQ        generalLastTwoStagesUnscaledRadix2
    320          BL        armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
    321          B        unscaledRadix2Loop
    322 
    323 generalLastTwoStagesUnscaledRadix2:
    324         BL      armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
    325 generalLastStageUnscaledRadix2:
    326         BL      armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
    327         B        End
    328 
    329 
    330 FFTEnd:                                               @// Does only the scaling
    331 
    332         M_LDR   diff, diffOnStack
    333         CMP     diff,#0
    334         BLE     End
    335 
    336         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
    337         VDUP    dShift,diff
    338 
    339 scaleFFTData:                                           @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
    340         VLD1    {dX0S32[0]},[pSrc]                        @// pSrc contains pDst pointer
    341         SUBS    subFFTSize,subFFTSize,#1
    342         VRSHL   dX0,dShift
    343         VST1    {dX0S32[0]},[pSrc]!
    344 
    345         BGT     scaleFFTData
    346 
    347 
    348 
    349 End:
    350         @// Set return value
    351         MOV     result, #OMX_Sts_NoErr
    352 
    353         @// Write function tail
    354         M_END
    355 
    356     .end
    357