Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
     12 
     13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
     14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
     15 // lpc_masking_model.c
     16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
     17                                                   int32_t q_val_corr,
     18                                                   int q_val_polynomial,
     19                                                   int16_t* a_polynomial,
     20                                                   int32_t* corr_coeffs,
     21                                                   int* q_val_residual_energy) {
     22 
     23   int i = 0, j = 0;
     24   int shift_internal = 0, shift_norm = 0;
     25   int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
     26   int32_t tmp_corr_c = corr_coeffs[0];
     27   int16_t* tmp_a_poly = &a_polynomial[0];
     28   int32_t sum64_hi = 0;
     29   int32_t sum64_lo = 0;
     30 
     31   for (j = 0; j <= lpc_order; j++) {
     32     // For the case of i == 0:
     33     //   residual_energy +=
     34     //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
     35 
     36     int32_t tmp2, tmp3;
     37     int16_t sign_1;
     38     int16_t sign_2;
     39     int16_t sign_3;
     40 
     41     __asm __volatile (
     42       ".set      push                                                \n\t"
     43       ".set      noreorder                                           \n\t"
     44       "lh        %[tmp2],         0(%[tmp_a_poly])                   \n\t"
     45       "mul       %[tmp32],        %[tmp2],            %[tmp2]        \n\t"
     46       "addiu     %[tmp_a_poly],   %[tmp_a_poly],      2              \n\t"
     47       "sra       %[sign_2],       %[sum64_hi],        31             \n\t"
     48       "mult      $ac0,            %[tmp32],           %[tmp_corr_c]  \n\t"
     49       "shilov    $ac0,            %[shift_internal]                  \n\t"
     50       "mfhi      %[tmp2],         $ac0                               \n\t"
     51       "mflo      %[tmp3],         $ac0                               \n\t"
     52       "sra       %[sign_1],       %[tmp2],            31             \n\t"
     53       "xor       %[sign_3],       %[sign_1],          %[sign_2]      \n\t"
     54       ".set      pop                                                 \n\t"
     55       : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
     56         [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
     57         [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
     58         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
     59       : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
     60       : "hi", "lo", "memory"
     61     );
     62 
     63     if (sign_3 != 0) {
     64       __asm __volatile (
     65         ".set      push                                      \n\t"
     66         ".set      noreorder                                 \n\t"
     67         "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
     68         "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
     69         ".set      pop                                       \n\t"
     70         : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
     71         : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
     72         : "hi", "lo", "memory"
     73       );
     74     } else {
     75       if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
     76           ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
     77         // Shift right for overflow.
     78         __asm __volatile (
     79           ".set      push                                             \n\t"
     80           ".set      noreorder                                        \n\t"
     81           "addiu     %[shift_internal], %[shift_internal],  1         \n\t"
     82           "prepend   %[sum64_lo],       %[sum64_hi],        1         \n\t"
     83           "sra       %[sum64_hi],       %[sum64_hi],        1         \n\t"
     84           "prepend   %[tmp3],           %[tmp2],            1         \n\t"
     85           "sra       %[tmp2],           %[tmp2],            1         \n\t"
     86           "addsc     %[sum64_lo],       %[sum64_lo],        %[tmp3]   \n\t"
     87           "addwc     %[sum64_hi],       %[sum64_hi],        %[tmp2]   \n\t"
     88           ".set      pop                                              \n\t"
     89           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
     90             [shift_internal] "+r" (shift_internal),
     91             [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
     92           :
     93           : "hi", "lo", "memory"
     94         );
     95       } else {
     96         __asm __volatile (
     97           ".set      push                                      \n\t"
     98           ".set      noreorder                                 \n\t"
     99           "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
    100           "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
    101           ".set      pop                                       \n\t"
    102           : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
    103           : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
    104           : "hi", "lo", "memory"
    105         );
    106       }
    107     }
    108   }
    109 
    110   for (i = 1; i <= lpc_order; i++) {
    111     tmp_corr_c = corr_coeffs[i];
    112     int16_t* tmp_a_poly_j = &a_polynomial[i];
    113     int16_t* tmp_a_poly_j_i = &a_polynomial[0];
    114     for (j = i; j <= lpc_order; j++) {
    115       // For the case of i = 1 .. lpc_order:
    116       //   residual_energy +=
    117       //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
    118 
    119       int32_t tmp2, tmp3;
    120       int16_t sign_1;
    121       int16_t sign_2;
    122       int16_t sign_3;
    123 
    124       __asm __volatile (
    125         ".set      push                                                   \n\t"
    126         ".set      noreorder                                              \n\t"
    127         "lh        %[tmp3],           0(%[tmp_a_poly_j])                  \n\t"
    128         "lh        %[tmp2],           0(%[tmp_a_poly_j_i])                \n\t"
    129         "addiu     %[tmp_a_poly_j],   %[tmp_a_poly_j],    2               \n\t"
    130         "addiu     %[tmp_a_poly_j_i], %[tmp_a_poly_j_i],  2               \n\t"
    131         "mul       %[tmp32],          %[tmp3],            %[tmp2]         \n\t"
    132         "sll       %[tmp32],          %[tmp32],           1               \n\t"
    133         "mult      $ac0,              %[tmp32],           %[tmp_corr_c]   \n\t"
    134         "shilov    $ac0,              %[shift_internal]                   \n\t"
    135         "mfhi      %[tmp2],           $ac0                                \n\t"
    136         "mflo      %[tmp3],           $ac0                                \n\t"
    137         "sra       %[sign_1],         %[tmp2],            31              \n\t"
    138         "sra       %[sign_2],         %[sum64_hi],        31              \n\t"
    139         "xor       %[sign_3],         %[sign_1],          %[sign_2]       \n\t"
    140         ".set      pop                                                    \n\t"
    141         : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
    142           [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
    143           [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
    144           [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
    145           [sum64_lo] "+r" (sum64_lo)
    146         : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
    147         : "hi", "lo", "memory"
    148       );
    149       if (sign_3 != 0) {
    150         __asm __volatile (
    151           ".set      push                                     \n\t"
    152           ".set      noreorder                                \n\t"
    153           "addsc     %[sum64_lo],   %[sum64_lo],   %[tmp3]    \n\t"
    154           "addwc     %[sum64_hi],   %[sum64_hi],   %[tmp2]    \n\t"
    155           ".set      pop                                      \n\t"
    156           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
    157             [sum64_lo] "+r" (sum64_lo)
    158           :
    159           :"memory"
    160         );
    161       } else {
    162         // Test overflow and sum the result.
    163         if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
    164             ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
    165           // Shift right for overflow.
    166           __asm __volatile (
    167             ".set      push                                              \n\t"
    168             ".set      noreorder                                         \n\t"
    169             "addiu     %[shift_internal],  %[shift_internal],  1         \n\t"
    170             "prepend   %[sum64_lo],        %[sum64_hi],        1         \n\t"
    171             "sra       %[sum64_hi],        %[sum64_hi],        1         \n\t"
    172             "prepend   %[tmp3],            %[tmp2],            1         \n\t"
    173             "sra       %[tmp2],            %[tmp2],            1         \n\t"
    174             "addsc     %[sum64_lo],        %[sum64_lo],        %[tmp3]   \n\t"
    175             "addwc     %[sum64_hi],        %[sum64_hi],        %[tmp2]   \n\t"
    176             ".set      pop                                               \n\t"
    177             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
    178               [shift_internal] "+r" (shift_internal),
    179               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
    180             :
    181             : "hi", "lo", "memory"
    182           );
    183         } else {
    184           __asm __volatile (
    185             ".set      push                                      \n\t"
    186             ".set      noreorder                                 \n\t"
    187             "addsc     %[sum64_lo],    %[sum64_lo],   %[tmp3]    \n\t"
    188             "addwc     %[sum64_hi],    %[sum64_hi],   %[tmp2]    \n\t"
    189             ".set      pop                                       \n\t"
    190             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
    191               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
    192             :
    193             : "hi", "lo", "memory"
    194           );
    195         }
    196       }
    197     }
    198   }
    199   word32_high = sum64_hi;
    200   word32_low = sum64_lo;
    201 
    202   // Calculate the value of shifting (shift_norm) for the 64-bit sum.
    203   if (word32_high != 0) {
    204     shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
    205     int tmp1;
    206     __asm __volatile (
    207       ".set    push                                                     \n\t"
    208       ".set    noreorder                                                \n\t"
    209       "srl     %[residual_energy],  %[sum64_lo],         %[shift_norm]  \n\t"
    210       "li      %[tmp1],             32                                  \n\t"
    211       "subu    %[tmp1],             %[tmp1],             %[shift_norm]  \n\t"
    212       "sll     %[tmp1],             %[sum64_hi],         %[tmp1]        \n\t"
    213       "or      %[residual_energy],  %[residual_energy],  %[tmp1]        \n\t"
    214       ".set    pop                                                      \n\t"
    215       : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
    216         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
    217       : [shift_norm] "r" (shift_norm)
    218       : "memory"
    219     );
    220   } else {
    221     if ((word32_low & 0x80000000) != 0) {
    222       shift_norm = 1;
    223       residual_energy = (uint32_t)word32_low >> 1;
    224     } else {
    225       shift_norm = WebRtcSpl_NormW32(word32_low);
    226       residual_energy = word32_low << shift_norm;
    227       shift_norm = -shift_norm;
    228     }
    229   }
    230 
    231   // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
    232   //   = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
    233   *q_val_residual_energy =
    234       q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
    235 
    236   return residual_energy;
    237 }
    238