Home | History | Annotate | Download | only in aec
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
     12 #include "webrtc/typedefs.h"
     13 
     14 static void bitrv2_128_mips(float* a) {
     15   // n is 128
     16   float xr, xi, yr, yi;
     17 
     18   xr = a[8];
     19   xi = a[9];
     20   yr = a[16];
     21   yi = a[17];
     22   a[8] = yr;
     23   a[9] = yi;
     24   a[16] = xr;
     25   a[17] = xi;
     26 
     27   xr = a[64];
     28   xi = a[65];
     29   yr = a[2];
     30   yi = a[3];
     31   a[64] = yr;
     32   a[65] = yi;
     33   a[2] = xr;
     34   a[3] = xi;
     35 
     36   xr = a[72];
     37   xi = a[73];
     38   yr = a[18];
     39   yi = a[19];
     40   a[72] = yr;
     41   a[73] = yi;
     42   a[18] = xr;
     43   a[19] = xi;
     44 
     45   xr = a[80];
     46   xi = a[81];
     47   yr = a[10];
     48   yi = a[11];
     49   a[80] = yr;
     50   a[81] = yi;
     51   a[10] = xr;
     52   a[11] = xi;
     53 
     54   xr = a[88];
     55   xi = a[89];
     56   yr = a[26];
     57   yi = a[27];
     58   a[88] = yr;
     59   a[89] = yi;
     60   a[26] = xr;
     61   a[27] = xi;
     62 
     63   xr = a[74];
     64   xi = a[75];
     65   yr = a[82];
     66   yi = a[83];
     67   a[74] = yr;
     68   a[75] = yi;
     69   a[82] = xr;
     70   a[83] = xi;
     71 
     72   xr = a[32];
     73   xi = a[33];
     74   yr = a[4];
     75   yi = a[5];
     76   a[32] = yr;
     77   a[33] = yi;
     78   a[4] = xr;
     79   a[5] = xi;
     80 
     81   xr = a[40];
     82   xi = a[41];
     83   yr = a[20];
     84   yi = a[21];
     85   a[40] = yr;
     86   a[41] = yi;
     87   a[20] = xr;
     88   a[21] = xi;
     89 
     90   xr = a[48];
     91   xi = a[49];
     92   yr = a[12];
     93   yi = a[13];
     94   a[48] = yr;
     95   a[49] = yi;
     96   a[12] = xr;
     97   a[13] = xi;
     98 
     99   xr = a[56];
    100   xi = a[57];
    101   yr = a[28];
    102   yi = a[29];
    103   a[56] = yr;
    104   a[57] = yi;
    105   a[28] = xr;
    106   a[29] = xi;
    107 
    108   xr = a[34];
    109   xi = a[35];
    110   yr = a[68];
    111   yi = a[69];
    112   a[34] = yr;
    113   a[35] = yi;
    114   a[68] = xr;
    115   a[69] = xi;
    116 
    117   xr = a[42];
    118   xi = a[43];
    119   yr = a[84];
    120   yi = a[85];
    121   a[42] = yr;
    122   a[43] = yi;
    123   a[84] = xr;
    124   a[85] = xi;
    125 
    126   xr = a[50];
    127   xi = a[51];
    128   yr = a[76];
    129   yi = a[77];
    130   a[50] = yr;
    131   a[51] = yi;
    132   a[76] = xr;
    133   a[77] = xi;
    134 
    135   xr = a[58];
    136   xi = a[59];
    137   yr = a[92];
    138   yi = a[93];
    139   a[58] = yr;
    140   a[59] = yi;
    141   a[92] = xr;
    142   a[93] = xi;
    143 
    144   xr = a[44];
    145   xi = a[45];
    146   yr = a[52];
    147   yi = a[53];
    148   a[44] = yr;
    149   a[45] = yi;
    150   a[52] = xr;
    151   a[53] = xi;
    152 
    153   xr = a[96];
    154   xi = a[97];
    155   yr = a[6];
    156   yi = a[7];
    157   a[96] = yr;
    158   a[97] = yi;
    159   a[6] = xr;
    160   a[7] = xi;
    161 
    162   xr = a[104];
    163   xi = a[105];
    164   yr = a[22];
    165   yi = a[23];
    166   a[104] = yr;
    167   a[105] = yi;
    168   a[22] = xr;
    169   a[23] = xi;
    170 
    171   xr = a[112];
    172   xi = a[113];
    173   yr = a[14];
    174   yi = a[15];
    175   a[112] = yr;
    176   a[113] = yi;
    177   a[14] = xr;
    178   a[15] = xi;
    179 
    180   xr = a[120];
    181   xi = a[121];
    182   yr = a[30];
    183   yi = a[31];
    184   a[120] = yr;
    185   a[121] = yi;
    186   a[30] = xr;
    187   a[31] = xi;
    188 
    189   xr = a[98];
    190   xi = a[99];
    191   yr = a[70];
    192   yi = a[71];
    193   a[98] = yr;
    194   a[99] = yi;
    195   a[70] = xr;
    196   a[71] = xi;
    197 
    198   xr = a[106];
    199   xi = a[107];
    200   yr = a[86];
    201   yi = a[87];
    202   a[106] = yr;
    203   a[107] = yi;
    204   a[86] = xr;
    205   a[87] = xi;
    206 
    207   xr = a[114];
    208   xi = a[115];
    209   yr = a[78];
    210   yi = a[79];
    211   a[114] = yr;
    212   a[115] = yi;
    213   a[78] = xr;
    214   a[79] = xi;
    215 
    216   xr = a[122];
    217   xi = a[123];
    218   yr = a[94];
    219   yi = a[95];
    220   a[122] = yr;
    221   a[123] = yi;
    222   a[94] = xr;
    223   a[95] = xi;
    224 
    225   xr = a[100];
    226   xi = a[101];
    227   yr = a[38];
    228   yi = a[39];
    229   a[100] = yr;
    230   a[101] = yi;
    231   a[38] = xr;
    232   a[39] = xi;
    233 
    234   xr = a[108];
    235   xi = a[109];
    236   yr = a[54];
    237   yi = a[55];
    238   a[108] = yr;
    239   a[109] = yi;
    240   a[54] = xr;
    241   a[55] = xi;
    242 
    243   xr = a[116];
    244   xi = a[117];
    245   yr = a[46];
    246   yi = a[47];
    247   a[116] = yr;
    248   a[117] = yi;
    249   a[46] = xr;
    250   a[47] = xi;
    251 
    252   xr = a[124];
    253   xi = a[125];
    254   yr = a[62];
    255   yi = a[63];
    256   a[124] = yr;
    257   a[125] = yi;
    258   a[62] = xr;
    259   a[63] = xi;
    260 
    261   xr = a[110];
    262   xi = a[111];
    263   yr = a[118];
    264   yi = a[119];
    265   a[110] = yr;
    266   a[111] = yi;
    267   a[118] = xr;
    268   a[119] = xi;
    269 }
    270 
    271 static void cft1st_128_mips(float* a) {
    272   float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
    273   int a_ptr, p1_rdft, p2_rdft, count;
    274   float* first = rdft_wk3ri_first;
    275   float* second = rdft_wk3ri_second;
    276 
    277   __asm __volatile (
    278     ".set       push                                                    \n\t"
    279     ".set       noreorder                                               \n\t"
    280     // first 8
    281     "lwc1       %[f0],        0(%[a])                                   \n\t"
    282     "lwc1       %[f1],        4(%[a])                                   \n\t"
    283     "lwc1       %[f2],        8(%[a])                                   \n\t"
    284     "lwc1       %[f3],        12(%[a])                                  \n\t"
    285     "lwc1       %[f4],        16(%[a])                                  \n\t"
    286     "lwc1       %[f5],        20(%[a])                                  \n\t"
    287     "lwc1       %[f6],        24(%[a])                                  \n\t"
    288     "lwc1       %[f7],        28(%[a])                                  \n\t"
    289     "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
    290     "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
    291     "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
    292     "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
    293     "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
    294     "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
    295     "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
    296     "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
    297     "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
    298     "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
    299     "sub.s      %[f2],        %[f1],        %[f4]                       \n\t"
    300     "add.s      %[f1],        %[f1],        %[f4]                       \n\t"
    301     "add.s      %[f4],        %[f6],        %[f3]                       \n\t"
    302     "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
    303     "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
    304     "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
    305     "swc1       %[f7],        0(%[a])                                   \n\t"
    306     "swc1       %[f8],        16(%[a])                                  \n\t"
    307     "swc1       %[f2],        28(%[a])                                  \n\t"
    308     "swc1       %[f1],        12(%[a])                                  \n\t"
    309     "swc1       %[f4],        4(%[a])                                   \n\t"
    310     "swc1       %[f6],        20(%[a])                                  \n\t"
    311     "swc1       %[f3],        8(%[a])                                   \n\t"
    312     "swc1       %[f0],        24(%[a])                                  \n\t"
    313     // second 8
    314     "lwc1       %[f0],        32(%[a])                                  \n\t"
    315     "lwc1       %[f1],        36(%[a])                                  \n\t"
    316     "lwc1       %[f2],        40(%[a])                                  \n\t"
    317     "lwc1       %[f3],        44(%[a])                                  \n\t"
    318     "lwc1       %[f4],        48(%[a])                                  \n\t"
    319     "lwc1       %[f5],        52(%[a])                                  \n\t"
    320     "lwc1       %[f6],        56(%[a])                                  \n\t"
    321     "lwc1       %[f7],        60(%[a])                                  \n\t"
    322     "add.s      %[f8],        %[f4],        %[f6]                       \n\t"
    323     "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
    324     "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
    325     "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
    326     "add.s      %[f3],        %[f0],        %[f2]                       \n\t"
    327     "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
    328     "add.s      %[f2],        %[f5],        %[f7]                       \n\t"
    329     "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
    330     "add.s      %[f7],        %[f4],        %[f1]                       \n\t"
    331     "sub.s      %[f4],        %[f4],        %[f1]                       \n\t"
    332     "add.s      %[f1],        %[f3],        %[f8]                       \n\t"
    333     "sub.s      %[f3],        %[f3],        %[f8]                       \n\t"
    334     "sub.s      %[f8],        %[f0],        %[f5]                       \n\t"
    335     "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
    336     "add.s      %[f5],        %[f6],        %[f2]                       \n\t"
    337     "sub.s      %[f6],        %[f2],        %[f6]                       \n\t"
    338     "lwc1       %[f9],        8(%[rdft_w])                              \n\t"
    339     "sub.s      %[f2],        %[f8],        %[f7]                       \n\t"
    340     "add.s      %[f8],        %[f8],        %[f7]                       \n\t"
    341     "sub.s      %[f7],        %[f4],        %[f0]                       \n\t"
    342     "add.s      %[f4],        %[f4],        %[f0]                       \n\t"
    343     // prepare for loop
    344     "addiu      %[a_ptr],     %[a],         64                          \n\t"
    345     "addiu      %[p1_rdft],   %[rdft_w],    8                           \n\t"
    346     "addiu      %[p2_rdft],   %[rdft_w],    16                          \n\t"
    347     "addiu      %[count],     $zero,        7                           \n\t"
    348     // finish second 8
    349     "mul.s      %[f2],        %[f9],        %[f2]                       \n\t"
    350     "mul.s      %[f8],        %[f9],        %[f8]                       \n\t"
    351     "mul.s      %[f7],        %[f9],        %[f7]                       \n\t"
    352     "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
    353     "swc1       %[f1],        32(%[a])                                  \n\t"
    354     "swc1       %[f3],        52(%[a])                                  \n\t"
    355     "swc1       %[f5],        36(%[a])                                  \n\t"
    356     "swc1       %[f6],        48(%[a])                                  \n\t"
    357     "swc1       %[f2],        40(%[a])                                  \n\t"
    358     "swc1       %[f8],        44(%[a])                                  \n\t"
    359     "swc1       %[f7],        56(%[a])                                  \n\t"
    360     "swc1       %[f4],        60(%[a])                                  \n\t"
    361     // loop
    362    "1:                                                                  \n\t"
    363     "lwc1       %[f0],        0(%[a_ptr])                               \n\t"
    364     "lwc1       %[f1],        4(%[a_ptr])                               \n\t"
    365     "lwc1       %[f2],        8(%[a_ptr])                               \n\t"
    366     "lwc1       %[f3],        12(%[a_ptr])                              \n\t"
    367     "lwc1       %[f4],        16(%[a_ptr])                              \n\t"
    368     "lwc1       %[f5],        20(%[a_ptr])                              \n\t"
    369     "lwc1       %[f6],        24(%[a_ptr])                              \n\t"
    370     "lwc1       %[f7],        28(%[a_ptr])                              \n\t"
    371     "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
    372     "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
    373     "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
    374     "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
    375     "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
    376     "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
    377     "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
    378     "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
    379     "lwc1       %[f10],       4(%[p1_rdft])                             \n\t"
    380     "lwc1       %[f11],       0(%[p2_rdft])                             \n\t"
    381     "lwc1       %[f12],       4(%[p2_rdft])                             \n\t"
    382     "lwc1       %[f13],       8(%[first])                               \n\t"
    383     "lwc1       %[f14],       12(%[first])                              \n\t"
    384     "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
    385     "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
    386     "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
    387     "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
    388     "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
    389     "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
    390     "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
    391     "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
    392     "swc1       %[f7],        0(%[a_ptr])                               \n\t"
    393     "swc1       %[f2],        4(%[a_ptr])                               \n\t"
    394     "mul.s      %[f4],        %[f9],        %[f8]                       \n\t"
    395 #if defined(MIPS32_R2_LE)
    396     "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
    397     "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
    398     "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
    399     "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
    400     "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
    401     "nmsub.s    %[f4],        %[f4],        %[f10],       %[f6]         \n\t"
    402     "madd.s     %[f8],        %[f8],        %[f9],        %[f6]         \n\t"
    403     "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
    404     "madd.s     %[f0],        %[f0],        %[f11],       %[f5]         \n\t"
    405     "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
    406     "madd.s     %[f3],        %[f3],        %[f13],       %[f1]         \n\t"
    407 #else
    408     "mul.s      %[f7],        %[f10],       %[f6]                       \n\t"
    409     "mul.s      %[f6],        %[f9],        %[f6]                       \n\t"
    410     "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
    411     "mul.s      %[f2],        %[f11],       %[f0]                       \n\t"
    412     "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
    413     "mul.s      %[f5],        %[f12],       %[f5]                       \n\t"
    414     "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
    415     "mul.s      %[f12],       %[f13],       %[f3]                       \n\t"
    416     "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
    417     "mul.s      %[f1],        %[f14],       %[f1]                       \n\t"
    418     "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
    419     "sub.s      %[f4],        %[f4],        %[f7]                       \n\t"
    420     "add.s      %[f8],        %[f6],        %[f8]                       \n\t"
    421     "sub.s      %[f7],        %[f2],        %[f5]                       \n\t"
    422     "add.s      %[f0],        %[f11],       %[f0]                       \n\t"
    423     "sub.s      %[f2],        %[f12],       %[f1]                       \n\t"
    424     "add.s      %[f3],        %[f13],       %[f3]                       \n\t"
    425 #endif
    426     "swc1       %[f4],        16(%[a_ptr])                              \n\t"
    427     "swc1       %[f8],        20(%[a_ptr])                              \n\t"
    428     "swc1       %[f7],        8(%[a_ptr])                               \n\t"
    429     "swc1       %[f0],        12(%[a_ptr])                              \n\t"
    430     "swc1       %[f2],        24(%[a_ptr])                              \n\t"
    431     "swc1       %[f3],        28(%[a_ptr])                              \n\t"
    432     "lwc1       %[f0],        32(%[a_ptr])                              \n\t"
    433     "lwc1       %[f1],        36(%[a_ptr])                              \n\t"
    434     "lwc1       %[f2],        40(%[a_ptr])                              \n\t"
    435     "lwc1       %[f3],        44(%[a_ptr])                              \n\t"
    436     "lwc1       %[f4],        48(%[a_ptr])                              \n\t"
    437     "lwc1       %[f5],        52(%[a_ptr])                              \n\t"
    438     "lwc1       %[f6],        56(%[a_ptr])                              \n\t"
    439     "lwc1       %[f7],        60(%[a_ptr])                              \n\t"
    440     "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
    441     "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
    442     "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
    443     "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
    444     "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
    445     "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
    446     "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
    447     "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
    448     "lwc1       %[f11],       8(%[p2_rdft])                             \n\t"
    449     "lwc1       %[f12],       12(%[p2_rdft])                            \n\t"
    450     "lwc1       %[f13],       8(%[second])                              \n\t"
    451     "lwc1       %[f14],       12(%[second])                             \n\t"
    452     "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
    453     "sub.s      %[f8],        %[f2],        %[f8]                       \n\t"
    454     "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
    455     "sub.s      %[f6],        %[f3],        %[f6]                       \n\t"
    456     "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
    457     "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
    458     "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
    459     "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
    460     "swc1       %[f7],        32(%[a_ptr])                              \n\t"
    461     "swc1       %[f2],        36(%[a_ptr])                              \n\t"
    462     "mul.s      %[f4],        %[f10],       %[f8]                       \n\t"
    463 #if defined(MIPS32_R2_LE)
    464     "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
    465     "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
    466     "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
    467     "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
    468     "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
    469     "madd.s     %[f4],        %[f4],        %[f9],        %[f6]         \n\t"
    470     "nmsub.s    %[f10],       %[f10],       %[f9],        %[f8]         \n\t"
    471     "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
    472     "madd.s     %[f11],       %[f11],       %[f12],       %[f0]         \n\t"
    473     "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
    474     "madd.s     %[f13],       %[f13],       %[f14],       %[f3]         \n\t"
    475 #else
    476     "mul.s      %[f2],        %[f9],        %[f6]                       \n\t"
    477     "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
    478     "mul.s      %[f9],        %[f9],        %[f8]                       \n\t"
    479     "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
    480     "mul.s      %[f8],        %[f12],       %[f5]                       \n\t"
    481     "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
    482     "mul.s      %[f12],       %[f12],       %[f0]                       \n\t"
    483     "mul.s      %[f5],        %[f13],       %[f3]                       \n\t"
    484     "mul.s      %[f0],        %[f14],       %[f1]                       \n\t"
    485     "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
    486     "mul.s      %[f14],       %[f14],       %[f3]                       \n\t"
    487     "add.s      %[f4],        %[f4],        %[f2]                       \n\t"
    488     "sub.s      %[f10],       %[f10],       %[f9]                       \n\t"
    489     "sub.s      %[f7],        %[f7],        %[f8]                       \n\t"
    490     "add.s      %[f11],       %[f11],       %[f12]                      \n\t"
    491     "sub.s      %[f2],        %[f5],        %[f0]                       \n\t"
    492     "add.s      %[f13],       %[f13],       %[f14]                      \n\t"
    493 #endif
    494     "swc1       %[f4],        48(%[a_ptr])                              \n\t"
    495     "swc1       %[f10],       52(%[a_ptr])                              \n\t"
    496     "swc1       %[f7],        40(%[a_ptr])                              \n\t"
    497     "swc1       %[f11],       44(%[a_ptr])                              \n\t"
    498     "swc1       %[f2],        56(%[a_ptr])                              \n\t"
    499     "swc1       %[f13],       60(%[a_ptr])                              \n\t"
    500     "addiu      %[count],     %[count],     -1                          \n\t"
    501     "lwc1       %[f9],        8(%[p1_rdft])                             \n\t"
    502     "addiu      %[a_ptr],     %[a_ptr],     64                          \n\t"
    503     "addiu      %[p1_rdft],   %[p1_rdft],   8                           \n\t"
    504     "addiu      %[p2_rdft],   %[p2_rdft],   16                          \n\t"
    505     "addiu      %[first],     %[first],     8                           \n\t"
    506     "bgtz       %[count],     1b                                        \n\t"
    507     " addiu     %[second],    %[second],    8                           \n\t"
    508     ".set       pop                                                     \n\t"
    509     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    510       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    511       [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
    512       [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14),
    513       [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first),
    514       [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second)
    515     : [a] "r" (a), [rdft_w] "r" (rdft_w)
    516     : "memory"
    517   );
    518 }
    519 
    520 static void cftmdl_128_mips(float* a) {
    521   float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
    522   int tmp_a, count;
    523   __asm __volatile (
    524     ".set       push                                      \n\t"
    525     ".set       noreorder                                 \n\t"
    526     "addiu      %[tmp_a],   %[a],         0               \n\t"
    527     "addiu      %[count],   $zero,        4               \n\t"
    528    "1:                                                    \n\t"
    529     "addiu      %[count],   %[count],     -1              \n\t"
    530     "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
    531     "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
    532     "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
    533     "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
    534     "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
    535     "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
    536     "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
    537     "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
    538     "add.s      %[f8],      %[f0],        %[f2]           \n\t"
    539     "sub.s      %[f0],      %[f0],        %[f2]           \n\t"
    540     "add.s      %[f2],      %[f4],        %[f6]           \n\t"
    541     "sub.s      %[f4],      %[f4],        %[f6]           \n\t"
    542     "add.s      %[f6],      %[f1],        %[f3]           \n\t"
    543     "sub.s      %[f1],      %[f1],        %[f3]           \n\t"
    544     "add.s      %[f3],      %[f5],        %[f7]           \n\t"
    545     "sub.s      %[f5],      %[f5],        %[f7]           \n\t"
    546     "add.s      %[f7],      %[f8],        %[f2]           \n\t"
    547     "sub.s      %[f8],      %[f8],        %[f2]           \n\t"
    548     "add.s      %[f2],      %[f1],        %[f4]           \n\t"
    549     "sub.s      %[f1],      %[f1],        %[f4]           \n\t"
    550     "add.s      %[f4],      %[f6],        %[f3]           \n\t"
    551     "sub.s      %[f6],      %[f6],        %[f3]           \n\t"
    552     "sub.s      %[f3],      %[f0],        %[f5]           \n\t"
    553     "add.s      %[f0],      %[f0],        %[f5]           \n\t"
    554     "swc1       %[f7],      0(%[tmp_a])                   \n\t"
    555     "swc1       %[f8],      64(%[tmp_a])                  \n\t"
    556     "swc1       %[f2],      36(%[tmp_a])                  \n\t"
    557     "swc1       %[f1],      100(%[tmp_a])                 \n\t"
    558     "swc1       %[f4],      4(%[tmp_a])                   \n\t"
    559     "swc1       %[f6],      68(%[tmp_a])                  \n\t"
    560     "swc1       %[f3],      32(%[tmp_a])                  \n\t"
    561     "swc1       %[f0],      96(%[tmp_a])                  \n\t"
    562     "bgtz       %[count],   1b                            \n\t"
    563     " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
    564     ".set       pop                                       \n\t"
    565     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    566       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    567       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
    568     : [a] "r" (a)
    569     : "memory"
    570   );
    571   f9 = rdft_w[2];
    572   __asm __volatile (
    573     ".set       push                                      \n\t"
    574     ".set       noreorder                                 \n\t"
    575     "addiu      %[tmp_a],   %[a],         128             \n\t"
    576     "addiu      %[count],   $zero,        4               \n\t"
    577    "1:                                                    \n\t"
    578     "addiu      %[count],   %[count],     -1              \n\t"
    579     "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
    580     "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
    581     "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
    582     "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
    583     "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
    584     "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
    585     "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
    586     "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
    587     "sub.s      %[f8],      %[f0],        %[f2]           \n\t"
    588     "add.s      %[f0],      %[f0],        %[f2]           \n\t"
    589     "sub.s      %[f2],      %[f5],        %[f7]           \n\t"
    590     "add.s      %[f5],      %[f5],        %[f7]           \n\t"
    591     "sub.s      %[f7],      %[f1],        %[f3]           \n\t"
    592     "add.s      %[f1],      %[f1],        %[f3]           \n\t"
    593     "sub.s      %[f3],      %[f4],        %[f6]           \n\t"
    594     "add.s      %[f4],      %[f4],        %[f6]           \n\t"
    595     "sub.s      %[f6],      %[f8],        %[f2]           \n\t"
    596     "add.s      %[f8],      %[f8],        %[f2]           \n\t"
    597     "add.s      %[f2],      %[f5],        %[f1]           \n\t"
    598     "sub.s      %[f5],      %[f5],        %[f1]           \n\t"
    599     "add.s      %[f1],      %[f3],        %[f7]           \n\t"
    600     "sub.s      %[f3],      %[f3],        %[f7]           \n\t"
    601     "add.s      %[f7],      %[f0],        %[f4]           \n\t"
    602     "sub.s      %[f0],      %[f0],        %[f4]           \n\t"
    603     "sub.s      %[f4],      %[f6],        %[f1]           \n\t"
    604     "add.s      %[f6],      %[f6],        %[f1]           \n\t"
    605     "sub.s      %[f1],      %[f3],        %[f8]           \n\t"
    606     "add.s      %[f3],      %[f3],        %[f8]           \n\t"
    607     "mul.s      %[f4],      %[f4],        %[f9]           \n\t"
    608     "mul.s      %[f6],      %[f6],        %[f9]           \n\t"
    609     "mul.s      %[f1],      %[f1],        %[f9]           \n\t"
    610     "mul.s      %[f3],      %[f3],        %[f9]           \n\t"
    611     "swc1       %[f7],      0(%[tmp_a])                   \n\t"
    612     "swc1       %[f2],      4(%[tmp_a])                   \n\t"
    613     "swc1       %[f5],      64(%[tmp_a])                  \n\t"
    614     "swc1       %[f0],      68(%[tmp_a])                  \n\t"
    615     "swc1       %[f4],      32(%[tmp_a])                  \n\t"
    616     "swc1       %[f6],      36(%[tmp_a])                  \n\t"
    617     "swc1       %[f1],      96(%[tmp_a])                  \n\t"
    618     "swc1       %[f3],      100(%[tmp_a])                 \n\t"
    619     "bgtz       %[count],   1b                            \n\t"
    620     " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
    621     ".set       pop                                       \n\t"
    622     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    623       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    624       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
    625     : [a] "r" (a), [f9] "f" (f9)
    626     : "memory"
    627   );
    628   f10 = rdft_w[3];
    629   f11 = rdft_w[4];
    630   f12 = rdft_w[5];
    631   f13 = rdft_wk3ri_first[2];
    632   f14 = rdft_wk3ri_first[3];
    633 
    634   __asm __volatile (
    635     ".set       push                                                    \n\t"
    636     ".set       noreorder                                               \n\t"
    637     "addiu      %[tmp_a],     %[a],         256                         \n\t"
    638     "addiu      %[count],     $zero,        4                           \n\t"
    639    "1:                                                                  \n\t"
    640     "addiu      %[count],     %[count],     -1                          \n\t"
    641     "lwc1       %[f0],        0(%[tmp_a])                               \n\t"
    642     "lwc1       %[f2],        32(%[tmp_a])                              \n\t"
    643     "lwc1       %[f4],        64(%[tmp_a])                              \n\t"
    644     "lwc1       %[f6],        96(%[tmp_a])                              \n\t"
    645     "lwc1       %[f1],        4(%[tmp_a])                               \n\t"
    646     "lwc1       %[f3],        36(%[tmp_a])                              \n\t"
    647     "lwc1       %[f5],        68(%[tmp_a])                              \n\t"
    648     "lwc1       %[f7],        100(%[tmp_a])                             \n\t"
    649     "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
    650     "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
    651     "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
    652     "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
    653     "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
    654     "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
    655     "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
    656     "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
    657     "sub.s      %[f7],        %[f8],        %[f2]                       \n\t"
    658     "add.s      %[f8],        %[f8],        %[f2]                       \n\t"
    659     "add.s      %[f2],        %[f1],        %[f4]                       \n\t"
    660     "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
    661     "sub.s      %[f4],        %[f6],        %[f3]                       \n\t"
    662     "add.s      %[f6],        %[f6],        %[f3]                       \n\t"
    663     "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
    664     "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
    665     "swc1       %[f8],        0(%[tmp_a])                               \n\t"
    666     "swc1       %[f6],        4(%[tmp_a])                               \n\t"
    667     "mul.s      %[f5],        %[f9],        %[f7]                       \n\t"
    668 #if defined(MIPS32_R2_LE)
    669     "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
    670     "mul.s      %[f8],        %[f11],       %[f3]                       \n\t"
    671     "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
    672     "mul.s      %[f6],        %[f13],       %[f0]                       \n\t"
    673     "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
    674     "nmsub.s    %[f5],        %[f5],        %[f10],       %[f4]         \n\t"
    675     "madd.s     %[f7],        %[f7],        %[f9],        %[f4]         \n\t"
    676     "nmsub.s    %[f8],        %[f8],        %[f12],       %[f2]         \n\t"
    677     "madd.s     %[f3],        %[f3],        %[f11],       %[f2]         \n\t"
    678     "nmsub.s    %[f6],        %[f6],        %[f14],       %[f1]         \n\t"
    679     "madd.s     %[f0],        %[f0],        %[f13],       %[f1]         \n\t"
    680     "swc1       %[f5],        64(%[tmp_a])                              \n\t"
    681     "swc1       %[f7],        68(%[tmp_a])                              \n\t"
    682 #else
    683     "mul.s      %[f8],        %[f10],       %[f4]                       \n\t"
    684     "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
    685     "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
    686     "mul.s      %[f6],        %[f11],       %[f3]                       \n\t"
    687     "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
    688     "sub.s      %[f5],        %[f5],        %[f8]                       \n\t"
    689     "mul.s      %[f8],        %[f12],       %[f2]                       \n\t"
    690     "mul.s      %[f2],        %[f11],       %[f2]                       \n\t"
    691     "add.s      %[f7],        %[f4],        %[f7]                       \n\t"
    692     "mul.s      %[f4],        %[f13],       %[f0]                       \n\t"
    693     "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
    694     "sub.s      %[f8],        %[f6],        %[f8]                       \n\t"
    695     "mul.s      %[f6],        %[f14],       %[f1]                       \n\t"
    696     "mul.s      %[f1],        %[f13],       %[f1]                       \n\t"
    697     "add.s      %[f3],        %[f2],        %[f3]                       \n\t"
    698     "swc1       %[f5],        64(%[tmp_a])                              \n\t"
    699     "swc1       %[f7],        68(%[tmp_a])                              \n\t"
    700     "sub.s      %[f6],        %[f4],        %[f6]                       \n\t"
    701     "add.s      %[f0],        %[f1],        %[f0]                       \n\t"
    702 #endif
    703     "swc1       %[f8],        32(%[tmp_a])                              \n\t"
    704     "swc1       %[f3],        36(%[tmp_a])                              \n\t"
    705     "swc1       %[f6],        96(%[tmp_a])                              \n\t"
    706     "swc1       %[f0],        100(%[tmp_a])                             \n\t"
    707     "bgtz       %[count],     1b                                        \n\t"
    708     " addiu     %[tmp_a],     %[tmp_a],     8                           \n\t"
    709     ".set       pop                                                     \n\t"
    710     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    711       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    712       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
    713     : [a] "r" (a),  [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
    714       [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
    715     : "memory"
    716   );
    717   f11 = rdft_w[6];
    718   f12 = rdft_w[7];
    719   f13 = rdft_wk3ri_second[2];
    720   f14 = rdft_wk3ri_second[3];
    721   __asm __volatile (
    722     ".set       push                                                       \n\t"
    723     ".set       noreorder                                                  \n\t"
    724     "addiu      %[tmp_a],       %[a],           384                        \n\t"
    725     "addiu      %[count],       $zero,          4                          \n\t"
    726    "1:                                                                     \n\t"
    727     "addiu      %[count],       %[count],       -1                         \n\t"
    728     "lwc1       %[f0],          0(%[tmp_a])                                \n\t"
    729     "lwc1       %[f1],          4(%[tmp_a])                                \n\t"
    730     "lwc1       %[f2],          32(%[tmp_a])                               \n\t"
    731     "lwc1       %[f3],          36(%[tmp_a])                               \n\t"
    732     "lwc1       %[f4],          64(%[tmp_a])                               \n\t"
    733     "lwc1       %[f5],          68(%[tmp_a])                               \n\t"
    734     "lwc1       %[f6],          96(%[tmp_a])                               \n\t"
    735     "lwc1       %[f7],          100(%[tmp_a])                              \n\t"
    736     "add.s      %[f8],          %[f0],          %[f2]                      \n\t"
    737     "sub.s      %[f0],          %[f0],          %[f2]                      \n\t"
    738     "add.s      %[f2],          %[f4],          %[f6]                      \n\t"
    739     "sub.s      %[f4],          %[f4],          %[f6]                      \n\t"
    740     "add.s      %[f6],          %[f1],          %[f3]                      \n\t"
    741     "sub.s      %[f1],          %[f1],          %[f3]                      \n\t"
    742     "add.s      %[f3],          %[f5],          %[f7]                      \n\t"
    743     "sub.s      %[f5],          %[f5],          %[f7]                      \n\t"
    744     "sub.s      %[f7],          %[f2],          %[f8]                      \n\t"
    745     "add.s      %[f2],          %[f2],          %[f8]                      \n\t"
    746     "add.s      %[f8],          %[f1],          %[f4]                      \n\t"
    747     "sub.s      %[f1],          %[f1],          %[f4]                      \n\t"
    748     "sub.s      %[f4],          %[f3],          %[f6]                      \n\t"
    749     "add.s      %[f3],          %[f3],          %[f6]                      \n\t"
    750     "sub.s      %[f6],          %[f0],          %[f5]                      \n\t"
    751     "add.s      %[f0],          %[f0],          %[f5]                      \n\t"
    752     "swc1       %[f2],          0(%[tmp_a])                                \n\t"
    753     "swc1       %[f3],          4(%[tmp_a])                                \n\t"
    754     "mul.s      %[f5],          %[f10],         %[f7]                      \n\t"
    755 #if defined(MIPS32_R2_LE)
    756     "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
    757     "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
    758     "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
    759     "mul.s      %[f3],          %[f14],         %[f1]                      \n\t"
    760     "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
    761     "madd.s     %[f5],          %[f5],          %[f9],       %[f4]         \n\t"
    762     "msub.s     %[f7],          %[f7],          %[f10],      %[f4]         \n\t"
    763     "msub.s     %[f2],          %[f2],          %[f11],      %[f6]         \n\t"
    764     "madd.s     %[f8],          %[f8],          %[f12],      %[f6]         \n\t"
    765     "msub.s     %[f3],          %[f3],          %[f13],      %[f0]         \n\t"
    766     "madd.s     %[f1],          %[f1],          %[f14],      %[f0]         \n\t"
    767     "swc1       %[f5],          64(%[tmp_a])                               \n\t"
    768     "swc1       %[f7],          68(%[tmp_a])                               \n\t"
    769 #else
    770     "mul.s      %[f2],          %[f9],          %[f4]                      \n\t"
    771     "mul.s      %[f4],          %[f10],         %[f4]                      \n\t"
    772     "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
    773     "mul.s      %[f3],          %[f11],         %[f6]                      \n\t"
    774     "mul.s      %[f6],          %[f12],         %[f6]                      \n\t"
    775     "add.s      %[f5],          %[f5],          %[f2]                      \n\t"
    776     "sub.s      %[f7],          %[f4],          %[f7]                      \n\t"
    777     "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
    778     "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
    779     "mul.s      %[f4],          %[f14],         %[f1]                      \n\t"
    780     "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
    781     "sub.s      %[f2],          %[f3],          %[f2]                      \n\t"
    782     "mul.s      %[f3],          %[f13],         %[f0]                      \n\t"
    783     "mul.s      %[f0],          %[f14],         %[f0]                      \n\t"
    784     "add.s      %[f8],          %[f8],          %[f6]                      \n\t"
    785     "swc1       %[f5],          64(%[tmp_a])                               \n\t"
    786     "swc1       %[f7],          68(%[tmp_a])                               \n\t"
    787     "sub.s      %[f3],          %[f3],          %[f4]                      \n\t"
    788     "add.s      %[f1],          %[f1],          %[f0]                      \n\t"
    789 #endif
    790     "swc1       %[f2],          32(%[tmp_a])                               \n\t"
    791     "swc1       %[f8],          36(%[tmp_a])                               \n\t"
    792     "swc1       %[f3],          96(%[tmp_a])                               \n\t"
    793     "swc1       %[f1],          100(%[tmp_a])                              \n\t"
    794     "bgtz       %[count],       1b                                         \n\t"
    795     " addiu     %[tmp_a],       %[tmp_a],       8                          \n\t"
    796     ".set       pop                                                        \n\t"
    797     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    798       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    799       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
    800     : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
    801       [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
    802     : "memory"
    803   );
    804 }
    805 
    806 static void cftfsub_128_mips(float* a) {
    807   float f0, f1, f2, f3, f4, f5, f6, f7, f8;
    808   int tmp_a, count;
    809 
    810   cft1st_128(a);
    811   cftmdl_128(a);
    812 
    813   __asm __volatile (
    814     ".set       push                                      \n\t"
    815     ".set       noreorder                                 \n\t"
    816     "addiu      %[tmp_a],       %[a],         0           \n\t"
    817     "addiu      %[count],       $zero,        16          \n\t"
    818    "1:                                                    \n\t"
    819     "addiu      %[count],       %[count],     -1          \n\t"
    820     "lwc1       %[f0],          0(%[tmp_a])               \n\t"
    821     "lwc1       %[f2],          128(%[tmp_a])             \n\t"
    822     "lwc1       %[f4],          256(%[tmp_a])             \n\t"
    823     "lwc1       %[f6],          384(%[tmp_a])             \n\t"
    824     "lwc1       %[f1],          4(%[tmp_a])               \n\t"
    825     "lwc1       %[f3],          132(%[tmp_a])             \n\t"
    826     "lwc1       %[f5],          260(%[tmp_a])             \n\t"
    827     "lwc1       %[f7],          388(%[tmp_a])             \n\t"
    828     "add.s      %[f8],          %[f0],        %[f2]       \n\t"
    829     "sub.s      %[f0],          %[f0],        %[f2]       \n\t"
    830     "add.s      %[f2],          %[f4],        %[f6]       \n\t"
    831     "sub.s      %[f4],          %[f4],        %[f6]       \n\t"
    832     "add.s      %[f6],          %[f1],        %[f3]       \n\t"
    833     "sub.s      %[f1],          %[f1],        %[f3]       \n\t"
    834     "add.s      %[f3],          %[f5],        %[f7]       \n\t"
    835     "sub.s      %[f5],          %[f5],        %[f7]       \n\t"
    836     "add.s      %[f7],          %[f8],        %[f2]       \n\t"
    837     "sub.s      %[f8],          %[f8],        %[f2]       \n\t"
    838     "add.s      %[f2],          %[f1],        %[f4]       \n\t"
    839     "sub.s      %[f1],          %[f1],        %[f4]       \n\t"
    840     "add.s      %[f4],          %[f6],        %[f3]       \n\t"
    841     "sub.s      %[f6],          %[f6],        %[f3]       \n\t"
    842     "sub.s      %[f3],          %[f0],        %[f5]       \n\t"
    843     "add.s      %[f0],          %[f0],        %[f5]       \n\t"
    844     "swc1       %[f7],          0(%[tmp_a])               \n\t"
    845     "swc1       %[f8],          256(%[tmp_a])             \n\t"
    846     "swc1       %[f2],          132(%[tmp_a])             \n\t"
    847     "swc1       %[f1],          388(%[tmp_a])             \n\t"
    848     "swc1       %[f4],          4(%[tmp_a])               \n\t"
    849     "swc1       %[f6],          260(%[tmp_a])             \n\t"
    850     "swc1       %[f3],          128(%[tmp_a])             \n\t"
    851     "swc1       %[f0],          384(%[tmp_a])             \n\t"
    852     "bgtz       %[count],       1b                        \n\t"
    853     " addiu     %[tmp_a],       %[tmp_a],   8             \n\t"
    854     ".set       pop                                       \n\t"
    855     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    856       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    857       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a),
    858       [count] "=&r" (count)
    859     : [a] "r" (a)
    860     : "memory"
    861   );
    862 }
    863 
    864 static void cftbsub_128_mips(float* a) {
    865   float f0, f1, f2, f3, f4, f5, f6, f7, f8;
    866   int tmp_a, count;
    867 
    868   cft1st_128(a);
    869   cftmdl_128(a);
    870 
    871   __asm __volatile (
    872     ".set       push                                        \n\t"
    873     ".set       noreorder                                   \n\t"
    874     "addiu      %[tmp_a],   %[a],           0               \n\t"
    875     "addiu      %[count],   $zero,          16              \n\t"
    876    "1:                                                      \n\t"
    877     "addiu      %[count],   %[count],       -1              \n\t"
    878     "lwc1       %[f0],      0(%[tmp_a])                     \n\t"
    879     "lwc1       %[f2],      128(%[tmp_a])                   \n\t"
    880     "lwc1       %[f4],      256(%[tmp_a])                   \n\t"
    881     "lwc1       %[f6],      384(%[tmp_a])                   \n\t"
    882     "lwc1       %[f1],      4(%[tmp_a])                     \n\t"
    883     "lwc1       %[f3],      132(%[tmp_a])                   \n\t"
    884     "lwc1       %[f5],      260(%[tmp_a])                   \n\t"
    885     "lwc1       %[f7],      388(%[tmp_a])                   \n\t"
    886     "add.s      %[f8],      %[f0],          %[f2]           \n\t"
    887     "sub.s      %[f0],      %[f0],          %[f2]           \n\t"
    888     "add.s      %[f2],      %[f4],          %[f6]           \n\t"
    889     "sub.s      %[f4],      %[f4],          %[f6]           \n\t"
    890     "add.s      %[f6],      %[f1],          %[f3]           \n\t"
    891     "sub.s      %[f1],      %[f3],          %[f1]           \n\t"
    892     "add.s      %[f3],      %[f5],          %[f7]           \n\t"
    893     "sub.s      %[f5],      %[f5],          %[f7]           \n\t"
    894     "add.s      %[f7],      %[f8],          %[f2]           \n\t"
    895     "sub.s      %[f8],      %[f8],          %[f2]           \n\t"
    896     "sub.s      %[f2],      %[f1],          %[f4]           \n\t"
    897     "add.s      %[f1],      %[f1],          %[f4]           \n\t"
    898     "add.s      %[f4],      %[f3],          %[f6]           \n\t"
    899     "sub.s      %[f6],      %[f3],          %[f6]           \n\t"
    900     "sub.s      %[f3],      %[f0],          %[f5]           \n\t"
    901     "add.s      %[f0],      %[f0],          %[f5]           \n\t"
    902     "neg.s      %[f4],      %[f4]                           \n\t"
    903     "swc1       %[f7],      0(%[tmp_a])                     \n\t"
    904     "swc1       %[f8],      256(%[tmp_a])                   \n\t"
    905     "swc1       %[f2],      132(%[tmp_a])                   \n\t"
    906     "swc1       %[f1],      388(%[tmp_a])                   \n\t"
    907     "swc1       %[f6],      260(%[tmp_a])                   \n\t"
    908     "swc1       %[f3],      128(%[tmp_a])                   \n\t"
    909     "swc1       %[f0],      384(%[tmp_a])                   \n\t"
    910     "swc1       %[f4],       4(%[tmp_a])                     \n\t"
    911     "bgtz       %[count],   1b                              \n\t"
    912     " addiu     %[tmp_a],   %[tmp_a],       8               \n\t"
    913     ".set       pop                                         \n\t"
    914     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
    915       [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
    916       [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
    917     : [a] "r" (a)
    918     : "memory"
    919   );
    920 }
    921 
    922 static void rftfsub_128_mips(float* a) {
    923   const float* c = rdft_w + 32;
    924   const float f0 = 0.5f;
    925   float* a1 = &a[2];
    926   float* a2 = &a[126];
    927   const float* c1 = &c[1];
    928   const float* c2 = &c[31];
    929   float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
    930   int count;
    931 
    932   __asm __volatile (
    933     ".set      push                                             \n\t"
    934     ".set      noreorder                                        \n\t"
    935     "lwc1      %[f6],       0(%[c2])                            \n\t"
    936     "lwc1      %[f1],       0(%[a1])                            \n\t"
    937     "lwc1      %[f2],       0(%[a2])                            \n\t"
    938     "lwc1      %[f3],       4(%[a1])                            \n\t"
    939     "lwc1      %[f4],       4(%[a2])                            \n\t"
    940     "lwc1      %[f5],       0(%[c1])                            \n\t"
    941     "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
    942     "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
    943     "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
    944     "addiu     %[count],    $zero,        15                    \n\t"
    945     "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
    946     "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
    947 #if !defined(MIPS32_R2_LE)
    948     "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
    949     "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
    950     "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
    951     "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
    952 #else
    953     "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
    954     "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
    955 #endif
    956     "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
    957     "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
    958     "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
    959     "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
    960     "swc1      %[f1],       0(%[a1])                            \n\t"
    961     "swc1      %[f2],       0(%[a2])                            \n\t"
    962     "swc1      %[f3],       4(%[a1])                            \n\t"
    963     "swc1      %[f4],       4(%[a2])                            \n\t"
    964     "addiu     %[a1],       %[a1],        8                     \n\t"
    965     "addiu     %[a2],       %[a2],        -8                    \n\t"
    966     "addiu     %[c1],       %[c1],        4                     \n\t"
    967     "addiu     %[c2],       %[c2],        -4                    \n\t"
    968    "1:                                                          \n\t"
    969     "lwc1      %[f6],       0(%[c2])                            \n\t"
    970     "lwc1      %[f1],       0(%[a1])                            \n\t"
    971     "lwc1      %[f2],       0(%[a2])                            \n\t"
    972     "lwc1      %[f3],       4(%[a1])                            \n\t"
    973     "lwc1      %[f4],       4(%[a2])                            \n\t"
    974     "lwc1      %[f5],       0(%[c1])                            \n\t"
    975     "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
    976     "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
    977     "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
    978     "lwc1      %[f10],      -4(%[c2])                           \n\t"
    979     "lwc1      %[f11],      8(%[a1])                            \n\t"
    980     "lwc1      %[f12],      -8(%[a2])                           \n\t"
    981     "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
    982     "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
    983 #if !defined(MIPS32_R2_LE)
    984     "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
    985     "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
    986     "lwc1      %[f13],      12(%[a1])                           \n\t"
    987     "lwc1      %[f14],      -4(%[a2])                           \n\t"
    988     "lwc1      %[f15],      4(%[c1])                            \n\t"
    989     "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
    990     "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
    991 #else
    992     "lwc1      %[f13],      12(%[a1])                           \n\t"
    993     "lwc1      %[f14],      -4(%[a2])                           \n\t"
    994     "lwc1      %[f15],      4(%[c1])                            \n\t"
    995     "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
    996     "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
    997 #endif
    998     "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
    999     "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
   1000     "add.s     %[f7],       %[f13],       %[f14]                \n\t"
   1001     "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
   1002     "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
   1003     "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
   1004     "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
   1005     "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
   1006 #if !defined(MIPS32_R2_LE)
   1007     "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
   1008     "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
   1009     "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
   1010     "swc1      %[f1],       0(%[a1])                            \n\t"
   1011     "swc1      %[f2],       0(%[a2])                            \n\t"
   1012     "sub.s     %[f8],       %[f8],        %[f9]                 \n\t"
   1013     "add.s     %[f10],      %[f10],       %[f15]                \n\t"
   1014 #else
   1015     "swc1      %[f1],       0(%[a1])                            \n\t"
   1016     "swc1      %[f2],       0(%[a2])                            \n\t"
   1017     "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
   1018     "nmsub.s   %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
   1019     "madd.s    %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
   1020 #endif
   1021     "swc1      %[f3],       4(%[a1])                            \n\t"
   1022     "swc1      %[f4],       4(%[a2])                            \n\t"
   1023     "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
   1024     "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
   1025     "sub.s     %[f13],      %[f13],       %[f10]                \n\t"
   1026     "sub.s     %[f14],      %[f14],       %[f10]                \n\t"
   1027     "addiu     %[c2],       %[c2],        -8                    \n\t"
   1028     "addiu     %[c1],       %[c1],        8                     \n\t"
   1029     "swc1      %[f11],      8(%[a1])                            \n\t"
   1030     "swc1      %[f12],      -8(%[a2])                           \n\t"
   1031     "swc1      %[f13],      12(%[a1])                           \n\t"
   1032     "swc1      %[f14],      -4(%[a2])                           \n\t"
   1033     "addiu     %[a1],       %[a1],        16                    \n\t"
   1034     "addiu     %[count],    %[count],     -1                    \n\t"
   1035     "bgtz      %[count],    1b                                  \n\t"
   1036     " addiu    %[a2],       %[a2],        -16                   \n\t"
   1037     ".set      pop                                              \n\t"
   1038     : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
   1039       [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
   1040       [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
   1041       [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
   1042       [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
   1043       [count] "=&r" (count)
   1044     : [f0] "f" (f0)
   1045     : "memory"
   1046   );
   1047 }
   1048 
   1049 static void rftbsub_128_mips(float* a) {
   1050   const float *c = rdft_w + 32;
   1051   const float f0 = 0.5f;
   1052   float* a1 = &a[2];
   1053   float* a2 = &a[126];
   1054   const float* c1 = &c[1];
   1055   const float* c2 = &c[31];
   1056   float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
   1057   int count;
   1058 
   1059   a[1] = -a[1];
   1060   a[65] = -a[65];
   1061 
   1062   __asm __volatile (
   1063     ".set      push                                             \n\t"
   1064     ".set      noreorder                                        \n\t"
   1065     "lwc1      %[f6],       0(%[c2])                            \n\t"
   1066     "lwc1      %[f1],       0(%[a1])                            \n\t"
   1067     "lwc1      %[f2],       0(%[a2])                            \n\t"
   1068     "lwc1      %[f3],       4(%[a1])                            \n\t"
   1069     "lwc1      %[f4],       4(%[a2])                            \n\t"
   1070     "lwc1      %[f5],       0(%[c1])                            \n\t"
   1071     "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
   1072     "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
   1073     "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
   1074     "addiu     %[count],    $zero,        15                    \n\t"
   1075     "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
   1076     "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
   1077 #if !defined(MIPS32_R2_LE)
   1078     "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
   1079     "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
   1080     "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
   1081     "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
   1082 #else
   1083     "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
   1084     "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
   1085 #endif
   1086     "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
   1087     "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
   1088     "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
   1089     "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
   1090     "swc1      %[f1],       0(%[a1])                            \n\t"
   1091     "swc1      %[f2],       0(%[a2])                            \n\t"
   1092     "swc1      %[f3],       4(%[a1])                            \n\t"
   1093     "swc1      %[f4],       4(%[a2])                            \n\t"
   1094     "addiu     %[a1],       %[a1],        8                     \n\t"
   1095     "addiu     %[a2],       %[a2],        -8                    \n\t"
   1096     "addiu     %[c1],       %[c1],        4                     \n\t"
   1097     "addiu     %[c2],       %[c2],        -4                    \n\t"
   1098    "1:                                                          \n\t"
   1099     "lwc1      %[f6],       0(%[c2])                            \n\t"
   1100     "lwc1      %[f1],       0(%[a1])                            \n\t"
   1101     "lwc1      %[f2],       0(%[a2])                            \n\t"
   1102     "lwc1      %[f3],       4(%[a1])                            \n\t"
   1103     "lwc1      %[f4],       4(%[a2])                            \n\t"
   1104     "lwc1      %[f5],       0(%[c1])                            \n\t"
   1105     "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
   1106     "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
   1107     "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
   1108     "lwc1      %[f10],      -4(%[c2])                           \n\t"
   1109     "lwc1      %[f11],      8(%[a1])                            \n\t"
   1110     "lwc1      %[f12],      -8(%[a2])                           \n\t"
   1111     "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
   1112     "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
   1113 #if !defined(MIPS32_R2_LE)
   1114     "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
   1115     "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
   1116     "lwc1      %[f13],      12(%[a1])                           \n\t"
   1117     "lwc1      %[f14],      -4(%[a2])                           \n\t"
   1118     "lwc1      %[f15],      4(%[c1])                            \n\t"
   1119     "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
   1120     "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
   1121 #else
   1122     "lwc1      %[f13],      12(%[a1])                           \n\t"
   1123     "lwc1      %[f14],      -4(%[a2])                           \n\t"
   1124     "lwc1      %[f15],      4(%[c1])                            \n\t"
   1125     "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
   1126     "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
   1127 #endif
   1128     "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
   1129     "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
   1130     "add.s     %[f7],       %[f13],       %[f14]                \n\t"
   1131     "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
   1132     "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
   1133     "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
   1134     "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
   1135     "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
   1136 #if !defined(MIPS32_R2_LE)
   1137     "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
   1138     "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
   1139     "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
   1140     "swc1      %[f1],       0(%[a1])                            \n\t"
   1141     "swc1      %[f2],       0(%[a2])                            \n\t"
   1142     "add.s     %[f8],       %[f8],        %[f9]                 \n\t"
   1143     "sub.s     %[f10],      %[f10],       %[f15]                \n\t"
   1144 #else
   1145     "swc1      %[f1],       0(%[a1])                            \n\t"
   1146     "swc1      %[f2],       0(%[a2])                            \n\t"
   1147     "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
   1148     "madd.s    %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
   1149     "nmsub.s   %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
   1150 #endif
   1151     "swc1      %[f3],       4(%[a1])                            \n\t"
   1152     "swc1      %[f4],       4(%[a2])                            \n\t"
   1153     "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
   1154     "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
   1155     "sub.s     %[f13],      %[f10],       %[f13]                \n\t"
   1156     "sub.s     %[f14],      %[f10],       %[f14]                \n\t"
   1157     "addiu     %[c2],       %[c2],        -8                    \n\t"
   1158     "addiu     %[c1],       %[c1],        8                     \n\t"
   1159     "swc1      %[f11],      8(%[a1])                            \n\t"
   1160     "swc1      %[f12],      -8(%[a2])                           \n\t"
   1161     "swc1      %[f13],      12(%[a1])                           \n\t"
   1162     "swc1      %[f14],      -4(%[a2])                           \n\t"
   1163     "addiu     %[a1],       %[a1],        16                    \n\t"
   1164     "addiu     %[count],    %[count],     -1                    \n\t"
   1165     "bgtz      %[count],    1b                                  \n\t"
   1166     " addiu    %[a2],       %[a2],        -16                   \n\t"
   1167     ".set      pop                                              \n\t"
   1168     : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
   1169       [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
   1170       [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
   1171       [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
   1172       [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
   1173       [count] "=&r" (count)
   1174     : [f0] "f" (f0)
   1175     : "memory"
   1176   );
   1177 }
   1178 
   1179 void aec_rdft_init_mips(void) {
   1180   cft1st_128 = cft1st_128_mips;
   1181   cftmdl_128 = cftmdl_128_mips;
   1182   rftfsub_128 = rftfsub_128_mips;
   1183   rftbsub_128 = rftbsub_128_mips;
   1184   cftfsub_128 = cftfsub_128_mips;
   1185   cftbsub_128 = cftbsub_128_mips;
   1186   bitrv2_128 = bitrv2_128_mips;
   1187 }
   1188