Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 @//
     13 
     14 @//
     15 @// File Name:  armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   7767
     18 @// Last Modified Date:       Thu, 27 Sep 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
     26 @//
     27 
     28 
     29 
     30 @// Include standard headers
     31 
     32 #include "dl/api/arm/armCOMM_s.h"
     33 #include "dl/api/arm/omxtypes_s.h"
     34 
     35 @// Import symbols required from other files
     36 @// (For example tables)
     37 
     38 
     39 
     40 
     41 @// Set debugging level
     42 @//DEBUG_ON    SETL {TRUE}
     43 
     44 
     45 
     46 @// Guarding implementation by the processor name
     47 
     48 
     49 
     50 @// Guarding implementation by the processor name
     51 
     52 
     53 @//Input Registers
     54 
     55 #define pSrc		r0
     56 #define pDst		r2
     57 #define pTwiddle	r1
     58 #define pPingPongBuf	r5
     59 #define subFFTNum	r6
     60 #define subFFTSize	r7
     61 
     62 
     63 @//Output Registers
     64 
     65 
     66 @//Local Scratch Registers
     67 
     68 #define grpSize		r3
     69 @// Reuse grpSize as setCount
     70 #define setCount	r3
     71 #define pointStep	r4
     72 #define outPointStep	r4
     73 #define setStep		r8
     74 #define step1		r9
     75 #define step3		r10
     76 
     77 @// Neon Registers
     78 
     79 #define dXr0	D0.S32
     80 #define dXi0	D1.S32
     81 #define dXr1	D2.S32
     82 #define dXi1	D3.S32
     83 #define dXr2	D4.S32
     84 #define dXi2	D5.S32
     85 #define dXr3	D6.S32
     86 #define dXi3	D7.S32
     87 #define dYr0	D8.S32
     88 #define dYi0	D9.S32
     89 #define dYr1	D10.S32
     90 #define dYi1	D11.S32
     91 #define dYr2	D12.S32
     92 #define dYi2	D13.S32
     93 #define dYr3	D14.S32
     94 #define dYi3	D15.S32
     95 #define qX0	Q0.S32
     96 #define qX1	Q1.S32
     97 #define qX2	Q2.S32
     98 #define qX3	Q3.S32
     99 #define qY0	Q4.S32
    100 #define qY1	Q5.S32
    101 #define qY2	Q6.S32
    102 #define qY3	Q7.S32
    103 #define dZr0	D16.S32
    104 #define dZi0	D17.S32
    105 #define dZr1	D18.S32
    106 #define dZi1	D19.S32
    107 #define dZr2	D20.S32
    108 #define dZi2	D21.S32
    109 #define dZr3	D22.S32
    110 #define dZi3	D23.S32
    111 #define qZ0	Q8.S32
    112 #define qZ1	Q9.S32
    113 #define qZ2	Q10.S32
    114 #define qZ3	Q11.S32
    115 
    116 
    117         .MACRO FFTSTAGE scaled, inverse, name
    118 
    119         @// Define stack arguments
    120 
    121         @// pT0+1 increments pT0 by 8 bytes
    122         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
    123         @// Note: outPointStep = pointStep for firststage
    124 
    125         MOV     pointStep,subFFTNum,LSL #1
    126 
    127 
    128         @// Update pSubFFTSize and pSubFFTNum regs
    129         VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
    130         MOV     subFFTSize,#4                                 @// subFFTSize = 1 for the first stage
    131 
    132         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
    133         LSR     grpSize,subFFTNum,#2
    134         VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
    135         MOV     subFFTNum,grpSize
    136 
    137 
    138         @// Calculate the step of input data for the next set
    139         @//MOV     setStep,pointStep,LSL #1
    140         MOV     setStep,grpSize,LSL #4
    141         VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
    142         ADD     setStep,setStep,pointStep                   @// setStep = 3*pointStep
    143         RSB     setStep,setStep,#16                         @// setStep = - 3*pointStep+16
    144 
    145         VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
    146         MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
    147 
    148         .ifeqs "\scaled", "TRUE"
    149             VHADD    qY0,qX0,qX2
    150         .else
    151             VADD    qY0,qX0,qX2
    152         .endif
    153 
    154         RSB     step3,pointStep,#0                          @// step3 = -pointStep
    155 
    156         @// grp = 0 a special case since all the twiddle factors are 1
    157         @// Loop on the sets : 2 sets at a time
    158 
    159 grpZeroSetLoop\name :
    160 
    161 
    162 
    163         @// Decrement setcount
    164         SUBS    setCount,setCount,#2                    @// decrement the set loop counter
    165 
    166         .ifeqs "\scaled", "TRUE"
    167 
    168             @// finish first stage of 4 point FFT
    169 
    170             VHSUB    qY2,qX0,qX2
    171 
    172             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
    173             VHADD    qY1,qX1,qX3
    174             VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
    175             VHSUB    qY3,qX1,qX3
    176 
    177 
    178             @// finish second stage of 4 point FFT
    179 
    180             .ifeqs "\inverse", "TRUE"
    181 
    182                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
    183                 VHADD    qZ0,qY0,qY1
    184 
    185                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
    186                 VHSUB    dZr3,dYr2,dYi3
    187 
    188                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
    189                 VHADD    dZi3,dYi2,dYr3
    190 
    191                 VHSUB    qZ1,qY0,qY1
    192                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
    193 
    194                 VHADD    dZr2,dYr2,dYi3
    195                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
    196                 VHSUB    dZi2,dYi2,dYr3
    197 
    198                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
    199                 VST2    {dZr2,dZi2},[pDst :128],setStep
    200 
    201 
    202             .else
    203 
    204                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
    205                 VHADD    qZ0,qY0,qY1
    206 
    207                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
    208                 VHADD    dZr2,dYr2,dYi3
    209 
    210                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
    211                 VHSUB    dZi2,dYi2,dYr3
    212 
    213                 VHSUB    qZ1,qY0,qY1
    214                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
    215 
    216                 VHSUB    dZr3,dYr2,dYi3
    217                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
    218                 VHADD    dZi3,dYi2,dYr3
    219 
    220                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
    221                 VST2    {dZr3,dZi3},[pDst :128],setStep
    222 
    223             .endif
    224 
    225 
    226 
    227         .else
    228 
    229             @// finish first stage of 4 point FFT
    230 
    231 
    232             VSUB    qY2,qX0,qX2
    233 
    234             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
    235             VADD    qY1,qX1,qX3
    236             VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
    237             VSUB    qY3,qX1,qX3
    238 
    239 
    240             @// finish second stage of 4 point FFT
    241 
    242             .ifeqs "\inverse", "TRUE"
    243 
    244                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
    245                 VADD    qZ0,qY0,qY1
    246 
    247                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
    248                 VSUB    dZr3,dYr2,dYi3
    249 
    250                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
    251                 VADD    dZi3,dYi2,dYr3
    252 
    253                 VSUB    qZ1,qY0,qY1
    254                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
    255 
    256                 VADD    dZr2,dYr2,dYi3
    257                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
    258                 VSUB    dZi2,dYi2,dYr3
    259 
    260                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
    261                 VST2    {dZr2,dZi2},[pDst :128],setStep
    262 
    263 
    264             .else
    265 
    266                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
    267                 VADD    qZ0,qY0,qY1
    268 
    269                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
    270                 VADD    dZr2,dYr2,dYi3
    271 
    272                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
    273                 VSUB    dZi2,dYi2,dYr3
    274 
    275                 VSUB    qZ1,qY0,qY1
    276                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
    277 
    278                 VSUB    dZr3,dYr2,dYi3
    279                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
    280                 VADD    dZi3,dYi2,dYr3
    281 
    282                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
    283                 VST2    {dZr3,dZi3},[pDst :128],setStep
    284 
    285             .endif
    286 
    287         .endif
    288 
    289         BGT     grpZeroSetLoop\name
    290 
    291         @// reset pSrc to pDst for the next stage
    292         SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize
    293         MOV     pDst,pPingPongBuf
    294 
    295 
    296         .endm
    297 
    298 
    299 
    300         M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
    301         FFTSTAGE "FALSE","FALSE",fwd
    302         M_END
    303 
    304 
    305 
    306         M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
    307         FFTSTAGE "FALSE","TRUE",inv
    308         M_END
    309 
    310 
    311         M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
    312         FFTSTAGE "TRUE","FALSE",fwdsfs
    313         M_END
    314 
    315 
    316         M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
    317         FFTSTAGE "TRUE","TRUE",invsfs
    318         M_END
    319 
    320 	.end
    321