Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 @//
     13 
     14 @//
     15 @// File Name:  armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   7493
     18 @// Last Modified Date:       Mon, 24 Sep 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
     26 @// stage for a N point complex signal.
     27 @//
     28 
     29 
     30 
     31 @// Include standard headers
     32 
     33 #include "dl/api/arm/armCOMM_s.h"
     34 #include "dl/api/arm/omxtypes_s.h"
     35 
     36 
     37 @// Import symbols required from other files
     38 @// (For example tables)
     39 
     40 
     41 
     42 
     43 @// Set debugging level
     44 @//DEBUG_ON    SETL {TRUE}
     45 
     46 
     47 @// Guarding implementation by the processor name
     48 
     49 
     50 @//Input Registers
     51 
     52 #define pSrc		r0
     53 #define pDst		r2
     54 #define pTwiddle	r1
     55 #define subFFTNum	r6
     56 #define subFFTSize	r7
     57 
     58 
     59 @//Output Registers
     60 
     61 
     62 @//Local Scratch Registers
     63 
     64 
     65 #define outPointStep	r3
     66 #define grpCount	r4
     67 #define dstStep		r5
     68 #define pTmp		r4
     69 
     70 @// Neon Registers
     71 
     72 #define dWr	D0.S32
     73 #define dWi	d1.s32
     74 #define dXr0	d2.s32
     75 #define dXi0	d3.s32
     76 #define dXr1	d4.s32
     77 #define dXi1	d5.s32
     78 #define dYr0	d6.s32
     79 #define dYi0	d7.s32
     80 #define dYr1	d8.s32
     81 #define dYi1	d9.s32
     82 #define qT0	q5.s64
     83 #define qT1	q6.s64
     84 
     85         .macro FFTSTAGE scaled, inverse, name
     86 
     87 
     88         MOV     outPointStep,subFFTSize,LSL #3
     89         @// Update grpCount and grpSize rightaway
     90 
     91         MOV     subFFTNum,#1                            @//after the last stage
     92         LSL     grpCount,subFFTSize,#1
     93 
     94         @// update subFFTSize for the next stage
     95         MOV     subFFTSize,grpCount
     96 
     97         RSB      dstStep,outPointStep,#16
     98 
     99 
    100         @// Loop on 2 grps at a time for the last stage
    101 
    102 grpLoop\name :
    103         VLD2    {dWr,dWi},[pTwiddle :64]!
    104 
    105         VLD4    {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
    106         SUBS    grpCount,grpCount,#4                   @// grpCount is multiplied by 2
    107 
    108         .ifeqs  "\inverse", "TRUE"
    109             VMULL   qT0,dWr,dXr1
    110             VMLAL   qT0,dWi,dXi1                       @// real part
    111             VMULL   qT1,dWr,dXi1
    112             VMLSL   qT1,dWi,dXr1                       @// imag part
    113 
    114         .else
    115 
    116             VMULL   qT0,dWr,dXr1
    117             VMLSL   qT0,dWi,dXi1                       @// real part
    118             VMULL   qT1,dWr,dXi1
    119             VMLAL   qT1,dWi,dXr1                       @// imag part
    120 
    121         .endif
    122 
    123         VRSHRN  dXr1,qT0,#31
    124         VRSHRN  dXi1,qT1,#31
    125 
    126 
    127         .ifeqs "\scaled", "TRUE"
    128 
    129             VHSUB    dYr0,dXr0,dXr1
    130             VHSUB    dYi0,dXi0,dXi1
    131             VHADD    dYr1,dXr0,dXr1
    132             VHADD    dYi1,dXi0,dXi1
    133 
    134         .else
    135 
    136             VSUB    dYr0,dXr0,dXr1
    137             VSUB    dYi0,dXi0,dXi1
    138             VADD    dYr1,dXr0,dXr1
    139             VADD    dYi1,dXi0,dXi1
    140 
    141 
    142         .endif
    143 
    144         VST2    {dYr0,dYi0},[pDst],outPointStep
    145         VST2    {dYr1,dYi1},[pDst],dstStep                  @// dstStep =  step = -outPointStep + 16
    146 
    147         bgt     grpLoop\name
    148 
    149 
    150         @// Reset and Swap pSrc and pDst for the next stage
    151         MOV     pTmp,pDst
    152         SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 4*size; pSrc -= 8*size bytes
    153         SUB     pSrc,pTmp,outPointStep
    154 
    155         @// Reset pTwiddle for the next stage
    156         SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 4*size bytes
    157 
    158         .endm
    159 
    160 
    161 
    162         M_START armSP_FFTFwd_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4,""
    163         FFTSTAGE "FALSE","FALSE",fwd
    164         M_END
    165 
    166 
    167 
    168         M_START armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe,r4
    169         FFTSTAGE "FALSE","TRUE",inv
    170         M_END
    171 
    172 
    173 
    174         M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
    175         FFTSTAGE "TRUE","FALSE",fwdsfs
    176         M_END
    177 
    178 
    179 
    180         M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe,r4
    181         FFTSTAGE "TRUE","TRUE",invsfs
    182         M_END
    183 
    184 	.end
    185