Home | History | Annotate | Download | only in neon
      1 @//
      2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
      3 @//
      4 @//  Use of this source code is governed by a BSD-style license
      5 @//  that can be found in the LICENSE file in the root of the source
      6 @//  tree. An additional intellectual property rights grant can be found
      7 @//  in the file PATENTS.  All contributing project authors may
      8 @//  be found in the AUTHORS file in the root of the source tree.
      9 @//
     10 @//  This file was originally licensed as follows. It has been
     11 @//  relicensed with permission from the copyright holders.
     12 
     13 @//
     14 @//
     15 @// File Name:  armSP_FFT_CToC_SC16_Radix2_unsafe_s.s
     16 @// OpenMAX DL: v1.0.2
     17 @// Last Modified Revision:   5892
     18 @// Last Modified Date:       Thu, 07 Jun 2007
     19 @//
     20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
     21 @//
     22 @//
     23 @//
     24 @// Description:
     25 @// Compute a Radix 2 FFT stage for a N point complex signal
     26 @//
     27 @//
     28 
     29 
     30 @// Include standard headers
     31 
     32 #include "dl/api/arm/armCOMM_s.h"
     33 #include "dl/api/arm/omxtypes_s.h"
     34 
     35 
     36 @// Import symbols required from other files
     37 @// (For example tables)
     38 
     39 
     40 
     41 @// Set debugging level
     42 @//DEBUG_ON    SETL {TRUE}
     43 
     44 
     45 
     46 @// Guarding implementation by the processor name
     47 
     48 
     49 
     50 
     51     @// Guarding implementation by the processor name
     52 
     53 
     54 @//Input Registers
     55 
     56 #define pSrc                            r0
     57 #define pDst                            r2
     58 #define pTwiddle                        r1
     59 #define subFFTNum                       r6
     60 #define subFFTSize                      r7
     61 
     62 
     63 @//Output Registers
     64 
     65 
     66 @//Local Scratch Registers
     67 
     68 #define outPointStep                    r3
     69 #define pointStep                       r4
     70 #define grpCount                        r5
     71 #define setCount                        r8
     72 #define step                            r10
     73 #define dstStep                         r11
     74 #define pTmp                            r9
     75 
     76 @// Neon Registers
     77 
     78 #define dW                              D0.S16
     79 #define dX0                             D2.S16
     80 #define dX1                             D3.S16
     81 #define dX2                             D4.S16
     82 #define dX3                             D5.S16
     83 #define dY0                             D6.S16
     84 #define dY1                             D7.S16
     85 #define dY2                             D8.S16
     86 #define dY3                             D9.S16
     87 #define qT0                             Q3.S32
     88 #define qT1                             Q4.S32
     89 
     90 
     91 
     92         .MACRO FFTSTAGE scaled, inverse, name
     93 
     94         @// Define stack arguments
     95 
     96 
     97         @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
     98 
     99         LSR     subFFTNum,subFFTNum,#1                      @//grpSize
    100         LSL     grpCount,subFFTSize,#1
    101 
    102 
    103         @// pT0+1 increments pT0 by 8 bytes
    104         @// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes
    105         MOV     pointStep,subFFTNum,LSL #1
    106 
    107         @// update subFFTSize for the next stage
    108         MOV     subFFTSize,grpCount
    109 
    110         @// pOut0+1 increments pOut0 by 8 bytes
    111         @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
    112         SMULBB  outPointStep,grpCount,pointStep
    113         LSL     pointStep,pointStep,#1
    114 
    115 
    116         RSB      step,pointStep,#16
    117         RSB      dstStep,outPointStep,#16
    118 
    119         @// Loop on the groups
    120 
    121 grpLoop\name:
    122 
    123         VLD1     dW,[pTwiddle],pointStep                @//[wi | wr]
    124         MOV      setCount,pointStep,LSR #2
    125 
    126 
    127         @// Loop on the sets: 4 at a time
    128 
    129 
    130 setLoop\name:
    131 
    132 
    133         VLD2    {dX0,dX1},[pSrc],pointStep            @// point0: dX0-real part dX1-img part
    134         VLD2    {dX2,dX3},[pSrc],step                 @// point1: dX2-real part dX3-img part
    135 
    136         SUBS    setCount,setCount,#4
    137 
    138         .ifeqs  "\inverse", "TRUE"
    139             VMULL   qT0,dX2,dW[0]
    140             VMLAL   qT0,dX3,dW[1]                       @// real part
    141             VMULL   qT1,dX3,dW[0]
    142             VMLSL   qT1,dX2,dW[1]                       @// imag part
    143 
    144         .ELSE
    145 
    146             VMULL   qT0,dX2,dW[0]
    147             VMLSL   qT0,dX3,dW[1]                       @// real part
    148             VMULL   qT1,dX3,dW[0]
    149             VMLAL   qT1,dX2,dW[1]                       @// imag part
    150 
    151         .ENDIF
    152 
    153         VRSHRN  dX2,qT0,#15
    154         VRSHRN  dX3,qT1,#15
    155 
    156         .ifeqs "\scaled", "TRUE"
    157             VHSUB    dY0,dX0,dX2
    158             VHSUB    dY1,dX1,dX3
    159             VHADD    dY2,dX0,dX2
    160             VHADD    dY3,dX1,dX3
    161 
    162         .ELSE
    163             VSUB    dY0,dX0,dX2
    164             VSUB    dY1,dX1,dX3
    165             VADD    dY2,dX0,dX2
    166             VADD    dY3,dX1,dX3
    167 
    168         .ENDIF
    169 
    170         VST2    {dY0,dY1},[pDst],outPointStep
    171         VST2    {dY2,dY3},[pDst],dstStep              @// dstStep = -outPointStep + 16
    172 
    173         BGT     setLoop\name
    174 
    175         SUBS    grpCount,grpCount,#2
    176         ADD     pSrc,pSrc,pointStep
    177         BGT     grpLoop\name
    178 
    179 
    180         @// Reset and Swap pSrc and pDst for the next stage
    181         MOV     pTmp,pDst
    182         SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
    183         SUB     pSrc,pTmp,outPointStep
    184 
    185         @// Reset pTwiddle for the next stage
    186         SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
    187 
    188 
    189         .endm
    190 
    191 
    192 
    193         M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
    194         FFTSTAGE "FALSE","FALSE",FWD
    195         M_END
    196 
    197 
    198 
    199         M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4
    200         FFTSTAGE "FALSE","TRUE",INV
    201         M_END
    202 
    203 
    204 
    205         M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
    206         FFTSTAGE "TRUE","FALSE",FWDSFS
    207         M_END
    208 
    209 
    210 
    211         M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4
    212         FFTSTAGE "TRUE","TRUE",INVSFS
    213         M_END
    214 
    215 
    216 
    217 
    218 
    219     .END
    220