Home | History | Annotate | Download | only in mips
      1 /*Copyright (c) 2013, Xiph.Org Foundation and contributors.
      2 
      3   All rights reserved.
      4 
      5   Redistribution and use in source and binary forms, with or without
      6    modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9        this list of conditions and the following disclaimer.
     10     * Redistributions in binary form must reproduce the above copyright notice,
     11        this list of conditions and the following disclaimer in the
     12        documentation and/or other materials provided with the distribution.
     13 
     14   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     15   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     18   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     19   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     20   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     21   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     22   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     23   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     24   POSSIBILITY OF SUCH DAMAGE.*/
     25 
     26 #ifndef KISS_FFT_MIPSR1_H
     27 #define KISS_FFT_MIPSR1_H
     28 
     29 #if !defined(KISS_FFT_GUTS_H)
     30 #error "This file should only be included from _kiss_fft_guts.h"
     31 #endif
     32 
     33 #ifdef FIXED_POINT
     34 
     35 #define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
     36 #define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
     37 
     38 #undef S_MUL_ADD
     39 static inline int S_MUL_ADD(int a, int b, int c, int d) {
     40     int m;
     41     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
     42     asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
     43     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
     44     return m;
     45 }
     46 
     47 #undef S_MUL_SUB
     48 static inline int S_MUL_SUB(int a, int b, int c, int d) {
     49     int m;
     50     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
     51     asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
     52     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
     53     return m;
     54 }
     55 
     56 #undef C_MUL
     57 #   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
     58 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     59     kiss_fft_cpx m;
     60 
     61     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
     62     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
     63     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
     64     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
     65     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
     66     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
     67 
     68     return m;
     69 }
     70 #undef C_MULC
     71 #   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
     72 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
     73     kiss_fft_cpx m;
     74 
     75     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
     76     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
     77     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
     78     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
     79     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
     80     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
     81 
     82     return m;
     83 }
     84 
     85 #endif /* FIXED_POINT */
     86 
     87 #define OVERRIDE_kf_bfly5
     88 static void kf_bfly5(
     89                      kiss_fft_cpx * Fout,
     90                      const size_t fstride,
     91                      const kiss_fft_state *st,
     92                      int m,
     93                      int N,
     94                      int mm
     95                     )
     96 {
     97    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
     98    int i, u;
     99    kiss_fft_cpx scratch[13];
    100 
    101    const kiss_twiddle_cpx *tw;
    102    kiss_twiddle_cpx ya,yb;
    103    kiss_fft_cpx * Fout_beg = Fout;
    104 
    105 #ifdef FIXED_POINT
    106    ya.r = 10126;
    107    ya.i = -31164;
    108    yb.r = -26510;
    109    yb.i = -19261;
    110 #else
    111    ya = st->twiddles[fstride*m];
    112    yb = st->twiddles[fstride*2*m];
    113 #endif
    114 
    115    tw=st->twiddles;
    116 
    117    for (i=0;i<N;i++)
    118    {
    119       Fout = Fout_beg + i*mm;
    120       Fout0=Fout;
    121       Fout1=Fout0+m;
    122       Fout2=Fout0+2*m;
    123       Fout3=Fout0+3*m;
    124       Fout4=Fout0+4*m;
    125 
    126       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
    127       for ( u=0; u<m; ++u ) {
    128          scratch[0] = *Fout0;
    129 
    130 
    131          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
    132          C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
    133          C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
    134          C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
    135 
    136          C_ADD( scratch[7],scratch[1],scratch[4]);
    137          C_SUB( scratch[10],scratch[1],scratch[4]);
    138          C_ADD( scratch[8],scratch[2],scratch[3]);
    139          C_SUB( scratch[9],scratch[2],scratch[3]);
    140 
    141          Fout0->r += scratch[7].r + scratch[8].r;
    142          Fout0->i += scratch[7].i + scratch[8].i;
    143          scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
    144          scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
    145 
    146          scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
    147          scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
    148 
    149          C_SUB(*Fout1,scratch[5],scratch[6]);
    150          C_ADD(*Fout4,scratch[5],scratch[6]);
    151 
    152          scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
    153          scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
    154 
    155          scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
    156          scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
    157 
    158          C_ADD(*Fout2,scratch[11],scratch[12]);
    159          C_SUB(*Fout3,scratch[11],scratch[12]);
    160 
    161          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
    162       }
    163    }
    164 }
    165 
    166 
    167 #endif /* KISS_FFT_MIPSR1_H */
    168