Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 
     13 @//
     14 @//
     15 @// File Name:  omxSP_FFTInv_CToC_SC16_Sfs_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   6729
     18 @// Last Modified Date:       Tue, 17 Jul 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute an inverse FFT for a complex signal
     26 @//
     27 @//
     28 
     29 
     30 @// Include standard headers
     31 
     32 #include "dl/api/arm/armCOMM_s.h"
     33 #include "dl/api/arm/omxtypes_s.h"
     34 
     35 @// Import symbols required from other files
     36 @// (For example tables)
     37 
     38         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
     39         .extern  armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
     40         .extern  armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
     41         .extern  armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
     42         .extern  armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
     43         .extern  armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
     44         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
     45         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
     46         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
     47         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
     48         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
     49         .extern  armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
     50         .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
     51         .extern  armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
     52 
     53 @// Set debugging level
     54 @//DEBUG_ON    SETL {TRUE}
     55 
     56 
     57 
     58 @// Guarding implementation by the processor name
     59 
     60 
     61 
     62 @// Guarding implementation by the processor name
     63 
     64 
     65     .extern  armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
     66     .extern  armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
     67 
     68 @//Input Registers
     69 
     70 #define pSrc    r0
     71 #define pDst    r1
     72 #define pFFTSpec        r2
     73 #define scale   r3
     74 
     75 
     76 @// Output registers
     77 #define result  r0
     78 
     79 @//Local Scratch Registers
     80 
     81 #define argTwiddle      r1
     82 #define argDst  r2
     83 #define argScale        r4
     84 #define pTwiddle        r4
     85 #define tmpOrder        r4
     86 #define pOut    r5
     87 #define subFFTSize      r7
     88 #define subFFTNum       r6
     89 #define N       r6
     90 #define order   r14
     91 #define diff    r9
     92 @// Total num of radix stages required to comple the FFT
     93 #define count   r8
     94 #define x0r     r4
     95 #define x0i     r5
     96 #define diffMinusOne    r2
     97 #define round   r3
     98 
     99 @// Neon registers
    100 
    101 #define dX0  D0.S16
    102 #define dShift  D1.S16
    103 #define dX0S32  D0.S32
    104 
    105 
    106     @// Allocate stack memory required by the function
    107         M_ALLOC4        diffOnStack, 4
    108 
    109     @// Write function header
    110         M_START     omxSP_FFTInv_CToC_SC16_Sfs,r11,d15
    111 
    112 @ Structure offsets for the FFTSpec
    113         .set    ARMsFFTSpec_N, 0
    114         .set    ARMsFFTSpec_pBitRev, 4
    115         .set    ARMsFFTSpec_pTwiddle, 8
    116         .set    ARMsFFTSpec_pBuf, 12
    117 
    118         @// Define stack arguments
    119 
    120         @// Read the size from structure and take log
    121         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
    122 
    123         @// Read other structure parameters
    124         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
    125         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
    126 
    127         CLZ     order,N                             @// N = 2^order
    128         RSB     order,order,#31
    129         MOV     subFFTSize,#1
    130         @//MOV     subFFTNum,N
    131 
    132         ADD     scale,scale,order                   @// FFTInverse has a final scaling factor by N
    133 
    134         CMP     order,#3
    135         BGT     orderGreaterthan3                   @// order > 3
    136 
    137         CMP     order,#1
    138         BGE     orderGreaterthan0                   @// order > 0
    139         M_STR   scale, diffOnStack,LT               @// order = 0
    140         LDRLT   x0r,[pSrc]
    141         STRLT   x0r,[pDst]
    142         MOVLT   pSrc,pDst
    143         BLT     FFTEnd
    144 
    145 orderGreaterthan0:
    146         @// set the buffers appropriately for various orders
    147         CMP     order,#2
    148         MOVNE   argDst,pDst
    149         MOVEQ   argDst,pOut
    150         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    151         MOV     argTwiddle,pTwiddle
    152         @// Store the scale factor and scale at the end
    153         SUB     diff,scale,order
    154         M_STR   diff, diffOnStack
    155         BGE     orderGreaterthan1
    156         BLLT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
    157         B       FFTEnd
    158 
    159 
    160 orderGreaterthan1:
    161         MOV     tmpOrder,order                          @// tmpOrder = RN 4
    162         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
    163         CMP     tmpOrder,#2
    164         BLGT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
    165         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
    166         B       FFTEnd
    167 
    168 
    169 
    170 
    171 orderGreaterthan3:
    172         @// check scale = 0 or scale = order
    173         SUBS    diff, scale, order                 @// scale > order
    174         MOVGT   scale,order
    175         BGE     specialScaleCase                   @// scale = 0 or scale = order
    176         CMP     scale,#0
    177         BEQ     specialScaleCase
    178         B       generalScaleCase
    179 
    180 specialScaleCase:                                           @//  scale = 0 or scale = order  and order > 3
    181 
    182         TST     order, #2                           @// Set input args to fft stages
    183         MOVNE   argDst,pDst
    184         MOVEQ   argDst,pOut
    185         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
    186         MOV     argTwiddle,pTwiddle
    187 
    188         CMP      diff,#0
    189         M_STR    diff, diffOnStack
    190         BGE      scaleEqualsOrder
    191 
    192         @//check for even or odd order
    193         @// NOTE: The following combination of BL's would work fine eventhough the first
    194         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    195         @// armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    196 
    197         TST     order,#0x00000001
    198         BLEQ    armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
    199         BLNE    armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
    200 
    201         CMP        subFFTNum,#4
    202         BLT     FFTEnd
    203 
    204 unscaledRadix4Loop:
    205         BEQ        lastStageUnscaledRadix4
    206         BL        armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
    207          CMP        subFFTNum,#4
    208          B        unscaledRadix4Loop
    209 
    210 lastStageUnscaledRadix4:
    211         BL      armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
    212         B        FFTEnd
    213 
    214 scaleEqualsOrder:
    215         @//check for even or odd order
    216         @// NOTE: The following combination of BL's would work fine eventhough the first
    217         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
    218         @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
    219 
    220         TST     order,#0x00000001
    221         BLEQ    armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
    222         BLNE    armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
    223 
    224         CMP        subFFTNum,#4
    225         BLT     FFTEnd
    226 
    227 scaledRadix4Loop:
    228         BEQ        lastStageScaledRadix4
    229         BL        armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
    230          CMP        subFFTNum,#4
    231          B        scaledRadix4Loop
    232 
    233 lastStageScaledRadix4:
    234         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
    235         B        FFTEnd
    236 
    237 
    238 
    239 generalScaleCase:                                        @// 0 < scale < order and order > 3
    240         @// Determine the correct destination buffer
    241         SUB     diff,order,scale
    242         TST     diff,#0x01
    243         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
    244         MOVNE   count,order
    245         TST     count,#0x01                     @// Is count even or odd ?
    246 
    247         MOVNE   argDst,pDst                     @// Set input args to fft stages
    248         MOVEQ   argDst,pOut
    249         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
    250         MOV     argTwiddle,pTwiddle
    251 
    252         CMP     diff,#1
    253         M_STR   diff, diffOnStack
    254         BEQ     scaleps                         @// scaling including a radix2_ps stage
    255 
    256         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
    257         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    258         SUBS    argScale,argScale,#1
    259 
    260 scaledRadix2Loop:
    261         BLGT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
    262         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    263         BGT     scaledRadix2Loop
    264         B       outScale
    265 
    266 scaleps:
    267         SUB     argScale,scale,#1                   @// order>3 and diff=1 => scale >= 3
    268         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
    269         SUBS    argScale,argScale,#1
    270 
    271 scaledRadix2psLoop:
    272         BEQ     scaledRadix2psStage
    273         BLGT    armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
    274         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
    275         BGE     scaledRadix2psLoop
    276 
    277 scaledRadix2psStage:
    278         BL      armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
    279         B       generalLastStageUnscaledRadix2
    280 
    281 
    282 outScale:
    283         M_LDR   diff, diffOnStack
    284         @//check for even or odd order
    285         TST     diff,#0x00000001
    286         BEQ     generalUnscaledRadix4Loop
    287         B       unscaledRadix2Loop
    288 
    289 generalUnscaledRadix4Loop:
    290         CMP        subFFTNum,#4
    291          BEQ        generalLastStageUnscaledRadix4
    292          BL        armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
    293          B        generalUnscaledRadix4Loop
    294 
    295 generalLastStageUnscaledRadix4:
    296         BL      armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
    297         B        End
    298 
    299 unscaledRadix2Loop:
    300         CMP        subFFTNum,#4
    301          BEQ        generalLastTwoStagesUnscaledRadix2
    302          BL        armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
    303          B        unscaledRadix2Loop
    304 
    305 generalLastTwoStagesUnscaledRadix2:
    306         BL      armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
    307 generalLastStageUnscaledRadix2:
    308         BL      armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
    309         B        End
    310 
    311 
    312 FFTEnd:                                              @// Does only the scaling
    313 
    314         M_LDR   diff, diffOnStack
    315         CMP     diff,#0
    316         BLE     End
    317 
    318         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
    319         VDUP    dShift,diff
    320 
    321 scaleFFTData:                                        @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
    322         VLD1    {dX0S32[0]},[pSrc]                        @// pSrc contains pDst pointer
    323         SUBS    subFFTSize,subFFTSize,#1
    324         VRSHL   dX0,dShift
    325         VST1    {dX0S32[0]},[pSrc]!
    326 
    327         BGT     scaleFFTData
    328 
    329 
    330 End:
    331         @// Set return value
    332         MOV     result, #OMX_Sts_NoErr
    333 
    334         @// Write function tail
    335         M_END
    336 
    337 
    338 
    339 
    340 
    341 
    342     .END
    343