1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h" 12 13 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy 14 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file 15 // lpc_masking_model.c 16 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order, 17 int32_t q_val_corr, 18 int q_val_polynomial, 19 int16_t* a_polynomial, 20 int32_t* corr_coeffs, 21 int* q_val_residual_energy) { 22 23 int i = 0, j = 0; 24 int shift_internal = 0, shift_norm = 0; 25 int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0; 26 int32_t tmp_corr_c = corr_coeffs[0]; 27 int16_t* tmp_a_poly = &a_polynomial[0]; 28 int32_t sum64_hi = 0; 29 int32_t sum64_lo = 0; 30 31 for (j = 0; j <= lpc_order; j++) { 32 // For the case of i == 0: 33 // residual_energy += 34 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i]; 35 36 int32_t tmp2, tmp3; 37 int16_t sign_1; 38 int16_t sign_2; 39 int16_t sign_3; 40 41 __asm __volatile ( 42 ".set push \n\t" 43 ".set noreorder \n\t" 44 "lh %[tmp2], 0(%[tmp_a_poly]) \n\t" 45 "mul %[tmp32], %[tmp2], %[tmp2] \n\t" 46 "addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t" 47 "sra %[sign_2], %[sum64_hi], 31 \n\t" 48 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t" 49 "shilov $ac0, %[shift_internal] \n\t" 50 "mfhi %[tmp2], $ac0 \n\t" 51 "mflo %[tmp3], $ac0 \n\t" 52 "sra %[sign_1], %[tmp2], 31 \n\t" 53 "xor %[sign_3], %[sign_1], %[sign_2] \n\t" 54 ".set pop \n\t" 55 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32), 56 [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1), 57 [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2), 58 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 59 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal) 60 : "hi", "lo", "memory" 61 ); 62 63 if (sign_3 != 0) { 64 __asm __volatile ( 65 ".set push \n\t" 66 ".set noreorder \n\t" 67 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 68 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 69 ".set pop \n\t" 70 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 71 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3) 72 : "hi", "lo", "memory" 73 ); 74 } else { 75 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) || 76 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) { 77 // Shift right for overflow. 78 __asm __volatile ( 79 ".set push \n\t" 80 ".set noreorder \n\t" 81 "addiu %[shift_internal], %[shift_internal], 1 \n\t" 82 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t" 83 "sra %[sum64_hi], %[sum64_hi], 1 \n\t" 84 "prepend %[tmp3], %[tmp2], 1 \n\t" 85 "sra %[tmp2], %[tmp2], 1 \n\t" 86 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 87 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 88 ".set pop \n\t" 89 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), 90 [shift_internal] "+r" (shift_internal), 91 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 92 : 93 : "hi", "lo", "memory" 94 ); 95 } else { 96 __asm __volatile ( 97 ".set push \n\t" 98 ".set noreorder \n\t" 99 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 100 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 101 ".set pop \n\t" 102 : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 103 : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3) 104 : "hi", "lo", "memory" 105 ); 106 } 107 } 108 } 109 110 for (i = 1; i <= lpc_order; i++) { 111 tmp_corr_c = corr_coeffs[i]; 112 int16_t* tmp_a_poly_j = &a_polynomial[i]; 113 int16_t* tmp_a_poly_j_i = &a_polynomial[0]; 114 for (j = i; j <= lpc_order; j++) { 115 // For the case of i = 1 .. lpc_order: 116 // residual_energy += 117 // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2; 118 119 int32_t tmp2, tmp3; 120 int16_t sign_1; 121 int16_t sign_2; 122 int16_t sign_3; 123 124 __asm __volatile ( 125 ".set push \n\t" 126 ".set noreorder \n\t" 127 "lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t" 128 "lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t" 129 "addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t" 130 "addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t" 131 "mul %[tmp32], %[tmp3], %[tmp2] \n\t" 132 "sll %[tmp32], %[tmp32], 1 \n\t" 133 "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t" 134 "shilov $ac0, %[shift_internal] \n\t" 135 "mfhi %[tmp2], $ac0 \n\t" 136 "mflo %[tmp3], $ac0 \n\t" 137 "sra %[sign_1], %[tmp2], 31 \n\t" 138 "sra %[sign_2], %[sum64_hi], 31 \n\t" 139 "xor %[sign_3], %[sign_1], %[sign_2] \n\t" 140 ".set pop \n\t" 141 : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32), 142 [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1), 143 [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2), 144 [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi), 145 [sum64_lo] "+r" (sum64_lo) 146 : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal) 147 : "hi", "lo", "memory" 148 ); 149 if (sign_3 != 0) { 150 __asm __volatile ( 151 ".set push \n\t" 152 ".set noreorder \n\t" 153 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 154 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 155 ".set pop \n\t" 156 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi), 157 [sum64_lo] "+r" (sum64_lo) 158 : 159 :"memory" 160 ); 161 } else { 162 // Test overflow and sum the result. 163 if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) || 164 ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) { 165 // Shift right for overflow. 166 __asm __volatile ( 167 ".set push \n\t" 168 ".set noreorder \n\t" 169 "addiu %[shift_internal], %[shift_internal], 1 \n\t" 170 "prepend %[sum64_lo], %[sum64_hi], 1 \n\t" 171 "sra %[sum64_hi], %[sum64_hi], 1 \n\t" 172 "prepend %[tmp3], %[tmp2], 1 \n\t" 173 "sra %[tmp2], %[tmp2], 1 \n\t" 174 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 175 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 176 ".set pop \n\t" 177 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), 178 [shift_internal] "+r" (shift_internal), 179 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 180 : 181 : "hi", "lo", "memory" 182 ); 183 } else { 184 __asm __volatile ( 185 ".set push \n\t" 186 ".set noreorder \n\t" 187 "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" 188 "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" 189 ".set pop \n\t" 190 : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), 191 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 192 : 193 : "hi", "lo", "memory" 194 ); 195 } 196 } 197 } 198 } 199 word32_high = sum64_hi; 200 word32_low = sum64_lo; 201 202 // Calculate the value of shifting (shift_norm) for the 64-bit sum. 203 if (word32_high != 0) { 204 shift_norm = 32 - WebRtcSpl_NormW32(word32_high); 205 int tmp1; 206 __asm __volatile ( 207 ".set push \n\t" 208 ".set noreorder \n\t" 209 "srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t" 210 "li %[tmp1], 32 \n\t" 211 "subu %[tmp1], %[tmp1], %[shift_norm] \n\t" 212 "sll %[tmp1], %[sum64_hi], %[tmp1] \n\t" 213 "or %[residual_energy], %[residual_energy], %[tmp1] \n\t" 214 ".set pop \n\t" 215 : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1), 216 [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) 217 : [shift_norm] "r" (shift_norm) 218 : "memory" 219 ); 220 } else { 221 if ((word32_low & 0x80000000) != 0) { 222 shift_norm = 1; 223 residual_energy = (uint32_t)word32_low >> 1; 224 } else { 225 shift_norm = WebRtcSpl_NormW32(word32_low); 226 residual_energy = word32_low << shift_norm; 227 shift_norm = -shift_norm; 228 } 229 } 230 231 // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm 232 // = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2) 233 *q_val_residual_energy = 234 q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2; 235 236 return residual_energy; 237 } 238