Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
     12 #include "webrtc/typedefs.h"
     13 
     14 // Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
     15 // cth_Q15[] and sth_Q15[].
     16 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,     // Input samples
     17                                 int16_t* ar_f_Q0,     // Input samples
     18                                 int16_t* cth_Q15,     // Filter coefficients
     19                                 int16_t* sth_Q15,     // Filter coefficients
     20                                 int16_t order_coef) { // order of the filter
     21   int n = 0;
     22 
     23   for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
     24     int count = order_coef - 1;
     25     int offset;
     26 #if !defined(MIPS_DSP_R1_LE)
     27     int16_t* tmp_cth;
     28     int16_t* tmp_sth;
     29     int16_t* tmp_arg;
     30     int32_t max_q16 = 0x7fff;
     31     int32_t min_q16 = 0xffff8000;
     32 #endif
     33     // Declare variables used as temporary registers.
     34     int32_t r0, r1, r2, t0, t1, t2, t_ar;
     35 
     36     __asm __volatile (
     37       ".set          push                                                \n\t"
     38       ".set          noreorder                                           \n\t"
     39       "bltz          %[count],     2f                                    \n\t"
     40       " lh           %[t_ar],      0(%[tmp])                             \n\t"
     41       // Inner loop
     42      "1:                                                                 \n\t"
     43       "sll           %[offset],    %[count],               1             \n\t"
     44 #if defined(MIPS_DSP_R1_LE)
     45       "lhx           %[r0],        %[offset](%[cth_Q15])                 \n\t"
     46       "lhx           %[r1],        %[offset](%[sth_Q15])                 \n\t"
     47       "lhx           %[r2],        %[offset](%[ar_g_Q0])                 \n\t"
     48 #else
     49       "addu          %[tmp_cth],   %[cth_Q15],             %[offset]     \n\t"
     50       "addu          %[tmp_sth],   %[sth_Q15],             %[offset]     \n\t"
     51       "addu          %[tmp_arg],   %[ar_g_Q0],             %[offset]     \n\t"
     52       "lh            %[r0],        0(%[tmp_cth])                         \n\t"
     53       "lh            %[r1],        0(%[tmp_sth])                         \n\t"
     54       "lh            %[r2],        0(%[tmp_arg])                         \n\t"
     55 #endif
     56       "mul           %[t0],        %[r0],                  %[t_ar]       \n\t"
     57       "mul           %[t1],        %[r1],                  %[t_ar]       \n\t"
     58       "mul           %[t2],        %[r1],                  %[r2]         \n\t"
     59       "mul           %[r0],        %[r0],                  %[r2]         \n\t"
     60       "subu          %[t0],        %[t0],                  %[t2]         \n\t"
     61       "addu          %[t1],        %[t1],                  %[r0]         \n\t"
     62 #if defined(MIPS_DSP_R1_LE)
     63       "shra_r.w      %[t1],        %[t1],                  15            \n\t"
     64       "shra_r.w      %[t0],        %[t0],                  15            \n\t"
     65 #else
     66       "addiu         %[t1],        %[t1],                  0x4000        \n\t"
     67       "sra           %[t1],        %[t1],                  15            \n\t"
     68       "addiu         %[t0],        %[t0],                  0x4000        \n\t"
     69       "sra           %[t0],        %[t0],                  15            \n\t"
     70 #endif
     71       "addiu         %[offset],    %[offset],              2             \n\t"
     72 #if defined(MIPS_DSP_R1_LE)
     73       "shll_s.w      %[t1],        %[t1],                  16            \n\t"
     74       "shll_s.w      %[t_ar],      %[t0],                  16            \n\t"
     75 #else
     76       "slt           %[r0],        %[t1],                  %[max_q16]    \n\t"
     77       "slt           %[r1],        %[t0],                  %[max_q16]    \n\t"
     78       "movz          %[t1],        %[max_q16],             %[r0]         \n\t"
     79       "movz          %[t0],        %[max_q16],             %[r1]         \n\t"
     80 #endif
     81       "addu          %[offset],    %[offset],              %[ar_g_Q0]    \n\t"
     82 #if defined(MIPS_DSP_R1_LE)
     83       "sra           %[t1],        %[t1],                  16            \n\t"
     84       "sra           %[t_ar],      %[t_ar],                16            \n\t"
     85 #else
     86       "slt           %[r0],        %[t1],                  %[min_q16]    \n\t"
     87       "slt           %[r1],        %[t0],                  %[min_q16]    \n\t"
     88       "movn          %[t1],        %[min_q16],             %[r0]         \n\t"
     89       "movn          %[t0],        %[min_q16],             %[r1]         \n\t"
     90       "addu          %[t_ar],      $zero,                  %[t0]         \n\t"
     91 #endif
     92       "sh            %[t1],        0(%[offset])                          \n\t"
     93       "bgtz          %[count],     1b                                    \n\t"
     94       " addiu        %[count],     %[count],               -1            \n\t"
     95      "2:                                                                 \n\t"
     96       "sh            %[t_ar],      0(%[tmp])                             \n\t"
     97       "sh            %[t_ar],      0(%[ar_g_Q0])                         \n\t"
     98       ".set          pop                                                 \n\t"
     99       : [t_ar] "=&r" (t_ar), [count] "+r" (count), [offset] "=&r" (offset),
    100         [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [t0] "=&r" (t0),
    101 #if !defined(MIPS_DSP_R1_LE)
    102         [tmp_cth] "=&r" (tmp_cth), [tmp_sth] "=&r" (tmp_sth),
    103         [tmp_arg] "=&r" (tmp_arg),
    104 #endif
    105         [t1] "=&r" (t1), [t2] "=&r" (t2)
    106       : [tmp] "r" (&ar_f_Q0[n+1]), [cth_Q15] "r" (cth_Q15),
    107 #if !defined(MIPS_DSP_R1_LE)
    108         [max_q16] "r" (max_q16), [min_q16] "r" (min_q16),
    109 #endif
    110         [sth_Q15] "r" (sth_Q15), [ar_g_Q0] "r" (ar_g_Q0)
    111       : "memory", "hi", "lo"
    112     );
    113   }
    114 }
    115 
    116 // MIPS optimization of the inner loop used for function
    117 // WebRtcIsacfix_NormLatticeFilterMa(). It does:
    118 //
    119 // for 0 <= n < HALF_SUBFRAMELEN - 1:
    120 //   *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
    121 //   *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
    122 //
    123 // Note, function WebRtcIsacfix_FilterMaLoopMIPS and WebRtcIsacfix_FilterMaLoopC
    124 // are not bit-exact. The accuracy of the MIPS function is same or better.
    125 void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,  // Filter coefficient
    126                                     int16_t input1,  // Filter coefficient
    127                                     int32_t input2,  // Inverse coeff (1/input1)
    128                                     int32_t* ptr0,   // Sample buffer
    129                                     int32_t* ptr1,   // Sample buffer
    130                                     int32_t* ptr2) { // Sample buffer
    131 #if defined(MIPS_DSP_R2_LE)
    132   // MIPS DSPR2 version. 4 available accumulators allows loop unrolling 4 times.
    133   // This variant is not bit-exact with WebRtcIsacfix_FilterMaLoopC, since we
    134   // are exploiting 64-bit accumulators. The accuracy of the MIPS DSPR2 function
    135   // is same or better.
    136   int n = (HALF_SUBFRAMELEN - 1) >> 2;
    137   int m = (HALF_SUBFRAMELEN - 1) & 3;
    138 
    139   int r0, r1, r2, r3;
    140   int t0, t1, t2, t3;
    141   int s0, s1, s2, s3;
    142 
    143   __asm __volatile (
    144     ".set          push                                      \n\t"
    145     ".set          noreorder                                 \n\t"
    146    "1:                                                       \n\t"
    147     "lw            %[r0],        0(%[ptr0])                  \n\t"
    148     "lw            %[r1],        4(%[ptr0])                  \n\t"
    149     "lw            %[r2],        8(%[ptr0])                  \n\t"
    150     "lw            %[r3],        12(%[ptr0])                 \n\t"
    151     "mult          $ac0,         %[r0],        %[input0]     \n\t"
    152     "mult          $ac1,         %[r1],        %[input0]     \n\t"
    153     "mult          $ac2,         %[r2],        %[input0]     \n\t"
    154     "mult          $ac3,         %[r3],        %[input0]     \n\t"
    155     "lw            %[t0],        0(%[ptr2])                  \n\t"
    156     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
    157     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
    158     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
    159     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
    160     "lw            %[t1],        4(%[ptr2])                  \n\t"
    161     "lw            %[t2],        8(%[ptr2])                  \n\t"
    162     "lw            %[t3],        12(%[ptr2])                 \n\t"
    163     "addu          %[t0],        %[t0],        %[s0]         \n\t"
    164     "addu          %[t1],        %[t1],        %[s1]         \n\t"
    165     "addu          %[t2],        %[t2],        %[s2]         \n\t"
    166     "addu          %[t3],        %[t3],        %[s3]         \n\t"
    167     "mult          $ac0,         %[t0],        %[input2]     \n\t"
    168     "mult          $ac1,         %[t1],        %[input2]     \n\t"
    169     "mult          $ac2,         %[t2],        %[input2]     \n\t"
    170     "mult          $ac3,         %[t3],        %[input2]     \n\t"
    171     "addiu         %[ptr0],      %[ptr0],      16            \n\t"
    172     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
    173     "extr_rs.w     %[t1],        $ac1,         16            \n\t"
    174     "extr_rs.w     %[t2],        $ac2,         16            \n\t"
    175     "extr_rs.w     %[t3],        $ac3,         16            \n\t"
    176     "addiu         %[n],         %[n],         -1            \n\t"
    177     "mult          $ac0,         %[r0],        %[input1]     \n\t"
    178     "mult          $ac1,         %[r1],        %[input1]     \n\t"
    179     "mult          $ac2,         %[r2],        %[input1]     \n\t"
    180     "mult          $ac3,         %[r3],        %[input1]     \n\t"
    181     "sw            %[t0],        0(%[ptr2])                  \n\t"
    182     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
    183     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
    184     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
    185     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
    186     "sw            %[t1],        4(%[ptr2])                  \n\t"
    187     "sw            %[t2],        8(%[ptr2])                  \n\t"
    188     "sw            %[t3],        12(%[ptr2])                 \n\t"
    189     "mult          $ac0,         %[t0],        %[input0]     \n\t"
    190     "mult          $ac1,         %[t1],        %[input0]     \n\t"
    191     "mult          $ac2,         %[t2],        %[input0]     \n\t"
    192     "mult          $ac3,         %[t3],        %[input0]     \n\t"
    193     "addiu         %[ptr2],      %[ptr2],      16            \n\t"
    194     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
    195     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
    196     "extr_rs.w     %[t2],        $ac2,         15            \n\t"
    197     "extr_rs.w     %[t3],        $ac3,         15            \n\t"
    198     "addu          %[t0],        %[t0],        %[s0]         \n\t"
    199     "addu          %[t1],        %[t1],        %[s1]         \n\t"
    200     "addu          %[t2],        %[t2],        %[s2]         \n\t"
    201     "addu          %[t3],        %[t3],        %[s3]         \n\t"
    202     "sw            %[t0],        0(%[ptr1])                  \n\t"
    203     "sw            %[t1],        4(%[ptr1])                  \n\t"
    204     "sw            %[t2],        8(%[ptr1])                  \n\t"
    205     "sw            %[t3],        12(%[ptr1])                 \n\t"
    206     "bgtz          %[n],         1b                          \n\t"
    207     " addiu        %[ptr1],      %[ptr1],      16            \n\t"
    208     "beq           %[m],         %0,           3f            \n\t"
    209     " nop                                                    \n\t"
    210    "2:                                                       \n\t"
    211     "lw            %[r0],        0(%[ptr0])                  \n\t"
    212     "lw            %[t0],        0(%[ptr2])                  \n\t"
    213     "addiu         %[ptr0],      %[ptr0],      4             \n\t"
    214     "mult          $ac0,         %[r0],        %[input0]     \n\t"
    215     "mult          $ac1,         %[r0],        %[input1]     \n\t"
    216     "extr_rs.w     %[r1],        $ac0,         15            \n\t"
    217     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
    218     "addu          %[t0],        %[t0],        %[r1]         \n\t"
    219     "mult          $ac0,         %[t0],        %[input2]     \n\t"
    220     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
    221     "sw            %[t0],        0(%[ptr2])                  \n\t"
    222     "mult          $ac0,         %[t0],        %[input0]     \n\t"
    223     "addiu         %[ptr2],      %[ptr2],      4             \n\t"
    224     "addiu         %[m],         %[m],         -1            \n\t"
    225     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
    226     "addu          %[t0],        %[t0],        %[t1]         \n\t"
    227     "sw            %[t0],        0(%[ptr1])                  \n\t"
    228     "bgtz          %[m],         2b                          \n\t"
    229     " addiu        %[ptr1],      %[ptr1],      4             \n\t"
    230    "3:                                                       \n\t"
    231     ".set          pop                                       \n\t"
    232     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
    233       [r3] "=&r" (r3), [t0] "=&r" (t0), [t1] "=&r" (t1),
    234       [t2] "=&r" (t2), [t3] "=&r" (t3), [s0] "=&r" (s0),
    235       [s1] "=&r" (s1), [s2] "=&r" (s2), [s3] "=&r" (s3),
    236       [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1), [m] "+r" (m),
    237       [ptr2] "+r" (ptr2), [n] "+r" (n)
    238     : [input0] "r" (input0), [input1] "r" (input1),
    239       [input2] "r" (input2)
    240     : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi",
    241       "$ac2lo", "$ac3hi", "$ac3lo"
    242   );
    243 #else
    244   // Non-DSPR2 version of the function. Avoiding the accumulator usage due to
    245   // large latencies. This variant is bit-exact with C code.
    246   int n = HALF_SUBFRAMELEN - 1;
    247   int32_t t16a, t16b;
    248   int32_t r0, r1, r2, r3, r4;
    249 
    250   __asm __volatile (
    251     ".set          push                                      \n\t"
    252     ".set          noreorder                                 \n\t"
    253     "sra           %[t16a],      %[input2],     16           \n\t"
    254     "andi          %[t16b],      %[input2],     0xFFFF       \n\t"
    255 #if defined(MIPS32R2_LE)
    256     "seh           %[t16b],      %[t16b]                     \n\t"
    257     "seh           %[input0],    %[input0]                   \n\t"
    258     "seh           %[input1],    %[input1]                   \n\t"
    259 #else
    260     "sll           %[t16b],      %[t16b],       16           \n\t"
    261     "sra           %[t16b],      %[t16b],       16           \n\t"
    262     "sll           %[input0],    %[input0],     16           \n\t"
    263     "sra           %[input0],    %[input0],     16           \n\t"
    264     "sll           %[input1],    %[input1],     16           \n\t"
    265     "sra           %[input1],    %[input1],     16           \n\t"
    266 #endif
    267     "addiu         %[r0],        %[t16a],       1            \n\t"
    268     "slt           %[r1],        %[t16b],       $zero        \n\t"
    269     "movn          %[t16a],      %[r0],         %[r1]        \n\t"
    270    "1:                                                       \n\t"
    271     "lw            %[r0],        0(%[ptr0])                  \n\t"
    272     "lw            %[r1],        0(%[ptr2])                  \n\t"
    273     "addiu         %[ptr0],      %[ptr0],       4            \n\t"
    274     "sra           %[r2],        %[r0],         16           \n\t"
    275     "andi          %[r0],        %[r0],         0xFFFF       \n\t"
    276     "mul           %[r3],        %[r2],         %[input0]    \n\t"
    277     "mul           %[r4],        %[r0],         %[input0]    \n\t"
    278     "mul           %[r2],        %[r2],         %[input1]    \n\t"
    279     "mul           %[r0],        %[r0],         %[input1]    \n\t"
    280     "addiu         %[ptr2],      %[ptr2],       4            \n\t"
    281     "sll           %[r3],        %[r3],         1            \n\t"
    282     "sra           %[r4],        %[r4],         1            \n\t"
    283     "addiu         %[r4],        %[r4],         0x2000       \n\t"
    284     "sra           %[r4],        %[r4],         14           \n\t"
    285     "addu          %[r3],        %[r3],         %[r4]        \n\t"
    286     "addu          %[r1],        %[r1],         %[r3]        \n\t"
    287     "sra           %[r3],        %[r1],         16           \n\t"
    288     "andi          %[r4],        %[r1],         0xFFFF       \n\t"
    289     "sra           %[r4],        %[r4],         1            \n\t"
    290     "mul           %[r1],        %[r1],         %[t16a]      \n\t"
    291     "mul           %[r3],        %[r3],         %[t16b]      \n\t"
    292     "mul           %[r4],        %[r4],         %[t16b]      \n\t"
    293     "sll           %[r2],        %[r2],         1            \n\t"
    294     "sra           %[r0],        %[r0],         1            \n\t"
    295     "addiu         %[r0],        %[r0],         0x2000       \n\t"
    296     "sra           %[r0],        %[r0],         14           \n\t"
    297     "addu          %[r0],        %[r0],         %[r2]        \n\t"
    298     "addiu         %[n],         %[n],          -1           \n\t"
    299     "addu          %[r1],        %[r1],         %[r3]        \n\t"
    300     "addiu         %[r4],        %[r4],         0x4000       \n\t"
    301     "sra           %[r4],        %[r4],         15           \n\t"
    302     "addu          %[r1],        %[r1],         %[r4]        \n\t"
    303     "sra           %[r2],        %[r1],         16           \n\t"
    304     "andi          %[r3],        %[r1],         0xFFFF       \n\t"
    305     "mul           %[r3],        %[r3],         %[input0]    \n\t"
    306     "mul           %[r2],        %[r2],         %[input0]    \n\t"
    307     "sw            %[r1],        -4(%[ptr2])                 \n\t"
    308     "sra           %[r3],        %[r3],         1            \n\t"
    309     "addiu         %[r3],        %[r3],         0x2000       \n\t"
    310     "sra           %[r3],        %[r3],         14           \n\t"
    311     "addu          %[r0],        %[r0],         %[r3]        \n\t"
    312     "sll           %[r2],        %[r2],         1            \n\t"
    313     "addu          %[r0],        %[r0],         %[r2]        \n\t"
    314     "sw            %[r0],        0(%[ptr1])                  \n\t"
    315     "bgtz          %[n],         1b                          \n\t"
    316     " addiu        %[ptr1],      %[ptr1],       4            \n\t"
    317     ".set          pop                                       \n\t"
    318     : [t16a] "=&r" (t16a), [t16b] "=&r" (t16b), [r0] "=&r" (r0),
    319       [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
    320       [r4] "=&r" (r4), [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1),
    321       [ptr2] "+r" (ptr2), [n] "+r" (n)
    322     : [input0] "r" (input0), [input1] "r" (input1),
    323       [input2] "r" (input2)
    324     : "hi", "lo", "memory"
    325   );
    326 #endif
    327 }
    328