Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <stddef.h>
     12 
     13 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
     14 #include "webrtc/typedefs.h"
     15 
     16 // Filter ar_g_Q0[] and ar_f_Q0[] through an AR filter with coefficients
     17 // cth_Q15[] and sth_Q15[].
     18 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,     // Input samples
     19                                 int16_t* ar_f_Q0,     // Input samples
     20                                 int16_t* cth_Q15,     // Filter coefficients
     21                                 int16_t* sth_Q15,     // Filter coefficients
     22                                 size_t order_coef) { // order of the filter
     23   int n = 0;
     24 
     25   for (n = 0; n < HALF_SUBFRAMELEN - 1; n++) {
     26     int count = (int)(order_coef - 1);
     27     int offset;
     28 #if !defined(MIPS_DSP_R1_LE)
     29     int16_t* tmp_cth;
     30     int16_t* tmp_sth;
     31     int16_t* tmp_arg;
     32     int32_t max_q16 = 0x7fff;
     33     int32_t min_q16 = 0xffff8000;
     34 #endif
     35     // Declare variables used as temporary registers.
     36     int32_t r0, r1, r2, t0, t1, t2, t_ar;
     37 
     38     __asm __volatile (
     39       ".set          push                                                \n\t"
     40       ".set          noreorder                                           \n\t"
     41       "bltz          %[count],     2f                                    \n\t"
     42       " lh           %[t_ar],      0(%[tmp])                             \n\t"
     43       // Inner loop
     44      "1:                                                                 \n\t"
     45       "sll           %[offset],    %[count],               1             \n\t"
     46 #if defined(MIPS_DSP_R1_LE)
     47       "lhx           %[r0],        %[offset](%[cth_Q15])                 \n\t"
     48       "lhx           %[r1],        %[offset](%[sth_Q15])                 \n\t"
     49       "lhx           %[r2],        %[offset](%[ar_g_Q0])                 \n\t"
     50 #else
     51       "addu          %[tmp_cth],   %[cth_Q15],             %[offset]     \n\t"
     52       "addu          %[tmp_sth],   %[sth_Q15],             %[offset]     \n\t"
     53       "addu          %[tmp_arg],   %[ar_g_Q0],             %[offset]     \n\t"
     54       "lh            %[r0],        0(%[tmp_cth])                         \n\t"
     55       "lh            %[r1],        0(%[tmp_sth])                         \n\t"
     56       "lh            %[r2],        0(%[tmp_arg])                         \n\t"
     57 #endif
     58       "mul           %[t0],        %[r0],                  %[t_ar]       \n\t"
     59       "mul           %[t1],        %[r1],                  %[t_ar]       \n\t"
     60       "mul           %[t2],        %[r1],                  %[r2]         \n\t"
     61       "mul           %[r0],        %[r0],                  %[r2]         \n\t"
     62       "subu          %[t0],        %[t0],                  %[t2]         \n\t"
     63       "addu          %[t1],        %[t1],                  %[r0]         \n\t"
     64 #if defined(MIPS_DSP_R1_LE)
     65       "shra_r.w      %[t1],        %[t1],                  15            \n\t"
     66       "shra_r.w      %[t0],        %[t0],                  15            \n\t"
     67 #else
     68       "addiu         %[t1],        %[t1],                  0x4000        \n\t"
     69       "sra           %[t1],        %[t1],                  15            \n\t"
     70       "addiu         %[t0],        %[t0],                  0x4000        \n\t"
     71       "sra           %[t0],        %[t0],                  15            \n\t"
     72 #endif
     73       "addiu         %[offset],    %[offset],              2             \n\t"
     74 #if defined(MIPS_DSP_R1_LE)
     75       "shll_s.w      %[t1],        %[t1],                  16            \n\t"
     76       "shll_s.w      %[t_ar],      %[t0],                  16            \n\t"
     77 #else
     78       "slt           %[r0],        %[t1],                  %[max_q16]    \n\t"
     79       "slt           %[r1],        %[t0],                  %[max_q16]    \n\t"
     80       "movz          %[t1],        %[max_q16],             %[r0]         \n\t"
     81       "movz          %[t0],        %[max_q16],             %[r1]         \n\t"
     82 #endif
     83       "addu          %[offset],    %[offset],              %[ar_g_Q0]    \n\t"
     84 #if defined(MIPS_DSP_R1_LE)
     85       "sra           %[t1],        %[t1],                  16            \n\t"
     86       "sra           %[t_ar],      %[t_ar],                16            \n\t"
     87 #else
     88       "slt           %[r0],        %[t1],                  %[min_q16]    \n\t"
     89       "slt           %[r1],        %[t0],                  %[min_q16]    \n\t"
     90       "movn          %[t1],        %[min_q16],             %[r0]         \n\t"
     91       "movn          %[t0],        %[min_q16],             %[r1]         \n\t"
     92       "addu          %[t_ar],      $zero,                  %[t0]         \n\t"
     93 #endif
     94       "sh            %[t1],        0(%[offset])                          \n\t"
     95       "bgtz          %[count],     1b                                    \n\t"
     96       " addiu        %[count],     %[count],               -1            \n\t"
     97      "2:                                                                 \n\t"
     98       "sh            %[t_ar],      0(%[tmp])                             \n\t"
     99       "sh            %[t_ar],      0(%[ar_g_Q0])                         \n\t"
    100       ".set          pop                                                 \n\t"
    101       : [t_ar] "=&r" (t_ar), [count] "+r" (count), [offset] "=&r" (offset),
    102         [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [t0] "=&r" (t0),
    103 #if !defined(MIPS_DSP_R1_LE)
    104         [tmp_cth] "=&r" (tmp_cth), [tmp_sth] "=&r" (tmp_sth),
    105         [tmp_arg] "=&r" (tmp_arg),
    106 #endif
    107         [t1] "=&r" (t1), [t2] "=&r" (t2)
    108       : [tmp] "r" (&ar_f_Q0[n+1]), [cth_Q15] "r" (cth_Q15),
    109 #if !defined(MIPS_DSP_R1_LE)
    110         [max_q16] "r" (max_q16), [min_q16] "r" (min_q16),
    111 #endif
    112         [sth_Q15] "r" (sth_Q15), [ar_g_Q0] "r" (ar_g_Q0)
    113       : "memory", "hi", "lo"
    114     );
    115   }
    116 }
    117 
    118 // MIPS optimization of the inner loop used for function
    119 // WebRtcIsacfix_NormLatticeFilterMa(). It does:
    120 //
    121 // for 0 <= n < HALF_SUBFRAMELEN - 1:
    122 //   *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
    123 //   *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
    124 //
    125 // Note, function WebRtcIsacfix_FilterMaLoopMIPS and WebRtcIsacfix_FilterMaLoopC
    126 // are not bit-exact. The accuracy of the MIPS function is same or better.
    127 void WebRtcIsacfix_FilterMaLoopMIPS(int16_t input0,  // Filter coefficient
    128                                     int16_t input1,  // Filter coefficient
    129                                     int32_t input2,  // Inverse coeff (1/input1)
    130                                     int32_t* ptr0,   // Sample buffer
    131                                     int32_t* ptr1,   // Sample buffer
    132                                     int32_t* ptr2) { // Sample buffer
    133 #if defined(MIPS_DSP_R2_LE)
    134   // MIPS DSPR2 version. 4 available accumulators allows loop unrolling 4 times.
    135   // This variant is not bit-exact with WebRtcIsacfix_FilterMaLoopC, since we
    136   // are exploiting 64-bit accumulators. The accuracy of the MIPS DSPR2 function
    137   // is same or better.
    138   int n = (HALF_SUBFRAMELEN - 1) >> 2;
    139   int m = (HALF_SUBFRAMELEN - 1) & 3;
    140 
    141   int r0, r1, r2, r3;
    142   int t0, t1, t2, t3;
    143   int s0, s1, s2, s3;
    144 
    145   __asm __volatile (
    146     ".set          push                                      \n\t"
    147     ".set          noreorder                                 \n\t"
    148    "1:                                                       \n\t"
    149     "lw            %[r0],        0(%[ptr0])                  \n\t"
    150     "lw            %[r1],        4(%[ptr0])                  \n\t"
    151     "lw            %[r2],        8(%[ptr0])                  \n\t"
    152     "lw            %[r3],        12(%[ptr0])                 \n\t"
    153     "mult          $ac0,         %[r0],        %[input0]     \n\t"
    154     "mult          $ac1,         %[r1],        %[input0]     \n\t"
    155     "mult          $ac2,         %[r2],        %[input0]     \n\t"
    156     "mult          $ac3,         %[r3],        %[input0]     \n\t"
    157     "lw            %[t0],        0(%[ptr2])                  \n\t"
    158     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
    159     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
    160     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
    161     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
    162     "lw            %[t1],        4(%[ptr2])                  \n\t"
    163     "lw            %[t2],        8(%[ptr2])                  \n\t"
    164     "lw            %[t3],        12(%[ptr2])                 \n\t"
    165     "addu          %[t0],        %[t0],        %[s0]         \n\t"
    166     "addu          %[t1],        %[t1],        %[s1]         \n\t"
    167     "addu          %[t2],        %[t2],        %[s2]         \n\t"
    168     "addu          %[t3],        %[t3],        %[s3]         \n\t"
    169     "mult          $ac0,         %[t0],        %[input2]     \n\t"
    170     "mult          $ac1,         %[t1],        %[input2]     \n\t"
    171     "mult          $ac2,         %[t2],        %[input2]     \n\t"
    172     "mult          $ac3,         %[t3],        %[input2]     \n\t"
    173     "addiu         %[ptr0],      %[ptr0],      16            \n\t"
    174     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
    175     "extr_rs.w     %[t1],        $ac1,         16            \n\t"
    176     "extr_rs.w     %[t2],        $ac2,         16            \n\t"
    177     "extr_rs.w     %[t3],        $ac3,         16            \n\t"
    178     "addiu         %[n],         %[n],         -1            \n\t"
    179     "mult          $ac0,         %[r0],        %[input1]     \n\t"
    180     "mult          $ac1,         %[r1],        %[input1]     \n\t"
    181     "mult          $ac2,         %[r2],        %[input1]     \n\t"
    182     "mult          $ac3,         %[r3],        %[input1]     \n\t"
    183     "sw            %[t0],        0(%[ptr2])                  \n\t"
    184     "extr_rs.w     %[s0],        $ac0,         15            \n\t"
    185     "extr_rs.w     %[s1],        $ac1,         15            \n\t"
    186     "extr_rs.w     %[s2],        $ac2,         15            \n\t"
    187     "extr_rs.w     %[s3],        $ac3,         15            \n\t"
    188     "sw            %[t1],        4(%[ptr2])                  \n\t"
    189     "sw            %[t2],        8(%[ptr2])                  \n\t"
    190     "sw            %[t3],        12(%[ptr2])                 \n\t"
    191     "mult          $ac0,         %[t0],        %[input0]     \n\t"
    192     "mult          $ac1,         %[t1],        %[input0]     \n\t"
    193     "mult          $ac2,         %[t2],        %[input0]     \n\t"
    194     "mult          $ac3,         %[t3],        %[input0]     \n\t"
    195     "addiu         %[ptr2],      %[ptr2],      16            \n\t"
    196     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
    197     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
    198     "extr_rs.w     %[t2],        $ac2,         15            \n\t"
    199     "extr_rs.w     %[t3],        $ac3,         15            \n\t"
    200     "addu          %[t0],        %[t0],        %[s0]         \n\t"
    201     "addu          %[t1],        %[t1],        %[s1]         \n\t"
    202     "addu          %[t2],        %[t2],        %[s2]         \n\t"
    203     "addu          %[t3],        %[t3],        %[s3]         \n\t"
    204     "sw            %[t0],        0(%[ptr1])                  \n\t"
    205     "sw            %[t1],        4(%[ptr1])                  \n\t"
    206     "sw            %[t2],        8(%[ptr1])                  \n\t"
    207     "sw            %[t3],        12(%[ptr1])                 \n\t"
    208     "bgtz          %[n],         1b                          \n\t"
    209     " addiu        %[ptr1],      %[ptr1],      16            \n\t"
    210     "beq           %[m],         %0,           3f            \n\t"
    211     " nop                                                    \n\t"
    212    "2:                                                       \n\t"
    213     "lw            %[r0],        0(%[ptr0])                  \n\t"
    214     "lw            %[t0],        0(%[ptr2])                  \n\t"
    215     "addiu         %[ptr0],      %[ptr0],      4             \n\t"
    216     "mult          $ac0,         %[r0],        %[input0]     \n\t"
    217     "mult          $ac1,         %[r0],        %[input1]     \n\t"
    218     "extr_rs.w     %[r1],        $ac0,         15            \n\t"
    219     "extr_rs.w     %[t1],        $ac1,         15            \n\t"
    220     "addu          %[t0],        %[t0],        %[r1]         \n\t"
    221     "mult          $ac0,         %[t0],        %[input2]     \n\t"
    222     "extr_rs.w     %[t0],        $ac0,         16            \n\t"
    223     "sw            %[t0],        0(%[ptr2])                  \n\t"
    224     "mult          $ac0,         %[t0],        %[input0]     \n\t"
    225     "addiu         %[ptr2],      %[ptr2],      4             \n\t"
    226     "addiu         %[m],         %[m],         -1            \n\t"
    227     "extr_rs.w     %[t0],        $ac0,         15            \n\t"
    228     "addu          %[t0],        %[t0],        %[t1]         \n\t"
    229     "sw            %[t0],        0(%[ptr1])                  \n\t"
    230     "bgtz          %[m],         2b                          \n\t"
    231     " addiu        %[ptr1],      %[ptr1],      4             \n\t"
    232    "3:                                                       \n\t"
    233     ".set          pop                                       \n\t"
    234     : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
    235       [r3] "=&r" (r3), [t0] "=&r" (t0), [t1] "=&r" (t1),
    236       [t2] "=&r" (t2), [t3] "=&r" (t3), [s0] "=&r" (s0),
    237       [s1] "=&r" (s1), [s2] "=&r" (s2), [s3] "=&r" (s3),
    238       [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1), [m] "+r" (m),
    239       [ptr2] "+r" (ptr2), [n] "+r" (n)
    240     : [input0] "r" (input0), [input1] "r" (input1),
    241       [input2] "r" (input2)
    242     : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi",
    243       "$ac2lo", "$ac3hi", "$ac3lo"
    244   );
    245 #else
    246   // Non-DSPR2 version of the function. Avoiding the accumulator usage due to
    247   // large latencies. This variant is bit-exact with C code.
    248   int n = HALF_SUBFRAMELEN - 1;
    249   int32_t t16a, t16b;
    250   int32_t r0, r1, r2, r3, r4;
    251 
    252   __asm __volatile (
    253     ".set          push                                      \n\t"
    254     ".set          noreorder                                 \n\t"
    255     "sra           %[t16a],      %[input2],     16           \n\t"
    256     "andi          %[t16b],      %[input2],     0xFFFF       \n\t"
    257 #if defined(MIPS32R2_LE)
    258     "seh           %[t16b],      %[t16b]                     \n\t"
    259     "seh           %[input0],    %[input0]                   \n\t"
    260     "seh           %[input1],    %[input1]                   \n\t"
    261 #else
    262     "sll           %[t16b],      %[t16b],       16           \n\t"
    263     "sra           %[t16b],      %[t16b],       16           \n\t"
    264     "sll           %[input0],    %[input0],     16           \n\t"
    265     "sra           %[input0],    %[input0],     16           \n\t"
    266     "sll           %[input1],    %[input1],     16           \n\t"
    267     "sra           %[input1],    %[input1],     16           \n\t"
    268 #endif
    269     "addiu         %[r0],        %[t16a],       1            \n\t"
    270     "slt           %[r1],        %[t16b],       $zero        \n\t"
    271     "movn          %[t16a],      %[r0],         %[r1]        \n\t"
    272    "1:                                                       \n\t"
    273     "lw            %[r0],        0(%[ptr0])                  \n\t"
    274     "lw            %[r1],        0(%[ptr2])                  \n\t"
    275     "addiu         %[ptr0],      %[ptr0],       4            \n\t"
    276     "sra           %[r2],        %[r0],         16           \n\t"
    277     "andi          %[r0],        %[r0],         0xFFFF       \n\t"
    278     "mul           %[r3],        %[r2],         %[input0]    \n\t"
    279     "mul           %[r4],        %[r0],         %[input0]    \n\t"
    280     "mul           %[r2],        %[r2],         %[input1]    \n\t"
    281     "mul           %[r0],        %[r0],         %[input1]    \n\t"
    282     "addiu         %[ptr2],      %[ptr2],       4            \n\t"
    283     "sll           %[r3],        %[r3],         1            \n\t"
    284     "sra           %[r4],        %[r4],         1            \n\t"
    285     "addiu         %[r4],        %[r4],         0x2000       \n\t"
    286     "sra           %[r4],        %[r4],         14           \n\t"
    287     "addu          %[r3],        %[r3],         %[r4]        \n\t"
    288     "addu          %[r1],        %[r1],         %[r3]        \n\t"
    289     "sra           %[r3],        %[r1],         16           \n\t"
    290     "andi          %[r4],        %[r1],         0xFFFF       \n\t"
    291     "sra           %[r4],        %[r4],         1            \n\t"
    292     "mul           %[r1],        %[r1],         %[t16a]      \n\t"
    293     "mul           %[r3],        %[r3],         %[t16b]      \n\t"
    294     "mul           %[r4],        %[r4],         %[t16b]      \n\t"
    295     "sll           %[r2],        %[r2],         1            \n\t"
    296     "sra           %[r0],        %[r0],         1            \n\t"
    297     "addiu         %[r0],        %[r0],         0x2000       \n\t"
    298     "sra           %[r0],        %[r0],         14           \n\t"
    299     "addu          %[r0],        %[r0],         %[r2]        \n\t"
    300     "addiu         %[n],         %[n],          -1           \n\t"
    301     "addu          %[r1],        %[r1],         %[r3]        \n\t"
    302     "addiu         %[r4],        %[r4],         0x4000       \n\t"
    303     "sra           %[r4],        %[r4],         15           \n\t"
    304     "addu          %[r1],        %[r1],         %[r4]        \n\t"
    305     "sra           %[r2],        %[r1],         16           \n\t"
    306     "andi          %[r3],        %[r1],         0xFFFF       \n\t"
    307     "mul           %[r3],        %[r3],         %[input0]    \n\t"
    308     "mul           %[r2],        %[r2],         %[input0]    \n\t"
    309     "sw            %[r1],        -4(%[ptr2])                 \n\t"
    310     "sra           %[r3],        %[r3],         1            \n\t"
    311     "addiu         %[r3],        %[r3],         0x2000       \n\t"
    312     "sra           %[r3],        %[r3],         14           \n\t"
    313     "addu          %[r0],        %[r0],         %[r3]        \n\t"
    314     "sll           %[r2],        %[r2],         1            \n\t"
    315     "addu          %[r0],        %[r0],         %[r2]        \n\t"
    316     "sw            %[r0],        0(%[ptr1])                  \n\t"
    317     "bgtz          %[n],         1b                          \n\t"
    318     " addiu        %[ptr1],      %[ptr1],       4            \n\t"
    319     ".set          pop                                       \n\t"
    320     : [t16a] "=&r" (t16a), [t16b] "=&r" (t16b), [r0] "=&r" (r0),
    321       [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
    322       [r4] "=&r" (r4), [ptr0] "+r" (ptr0), [ptr1] "+r" (ptr1),
    323       [ptr2] "+r" (ptr2), [n] "+r" (n)
    324     : [input0] "r" (input0), [input1] "r" (input1),
    325       [input2] "r" (input2)
    326     : "hi", "lo", "memory"
    327   );
    328 #endif
    329 }
    330