1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <string.h> 13 14 #include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" 15 #include "webrtc/modules/audio_processing/ns/nsx_core.h" 16 17 static const int16_t kIndicatorTable[17] = { 18 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, 19 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 20 }; 21 22 // Compute speech/noise probability 23 // speech/noise probability is returned in: probSpeechFinal 24 //snrLocPrior is the prior SNR for each frequency (in Q11) 25 //snrLocPost is the post SNR for each frequency (in Q11) 26 void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, 27 uint16_t* nonSpeechProbFinal, 28 uint32_t* priorLocSnr, 29 uint32_t* postLocSnr) { 30 uint32_t tmpU32no1, tmpU32no2, tmpU32no3; 31 int32_t indPriorFX, tmp32no1; 32 int32_t logLrtTimeAvgKsumFX; 33 int16_t indPriorFX16; 34 int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; 35 size_t i; 36 int normTmp, nShifts; 37 38 int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; 39 int32_t const_max = 0x7fffffff; 40 int32_t const_neg43 = -43; 41 int32_t const_5412 = 5412; 42 int32_t const_11rsh12 = (11 << 12); 43 int32_t const_178 = 178; 44 45 46 // compute feature based on average LR factor 47 // this is the average over all frequencies of the smooth log LRT 48 logLrtTimeAvgKsumFX = 0; 49 for (i = 0; i < inst->magnLen; i++) { 50 r0 = postLocSnr[i]; // Q11 51 r1 = priorLocSnr[i]; 52 r2 = inst->logLrtTimeAvgW32[i]; 53 54 __asm __volatile( 55 ".set push \n\t" 56 ".set noreorder \n\t" 57 "clz %[r3], %[r0] \n\t" 58 "clz %[r5], %[r1] \n\t" 59 "slti %[r4], %[r3], 32 \n\t" 60 "slti %[r6], %[r5], 32 \n\t" 61 "movz %[r3], $0, %[r4] \n\t" 62 "movz %[r5], $0, %[r6] \n\t" 63 "slti %[r4], %[r3], 11 \n\t" 64 "addiu %[r6], %[r3], -11 \n\t" 65 "neg %[r7], %[r6] \n\t" 66 "sllv %[r6], %[r1], %[r6] \n\t" 67 "srav %[r7], %[r1], %[r7] \n\t" 68 "movn %[r6], %[r7], %[r4] \n\t" 69 "sllv %[r1], %[r1], %[r5] \n\t" 70 "and %[r1], %[r1], %[const_max] \n\t" 71 "sra %[r1], %[r1], 19 \n\t" 72 "mul %[r7], %[r1], %[r1] \n\t" 73 "sllv %[r3], %[r0], %[r3] \n\t" 74 "divu %[r8], %[r3], %[r6] \n\t" 75 "slti %[r6], %[r6], 1 \n\t" 76 "mul %[r7], %[r7], %[const_neg43] \n\t" 77 "sra %[r7], %[r7], 19 \n\t" 78 "movz %[r3], %[r8], %[r6] \n\t" 79 "subu %[r0], %[r0], %[r3] \n\t" 80 "movn %[r0], $0, %[r6] \n\t" 81 "mul %[r1], %[r1], %[const_5412] \n\t" 82 "sra %[r1], %[r1], 12 \n\t" 83 "addu %[r7], %[r7], %[r1] \n\t" 84 "addiu %[r1], %[r7], 37 \n\t" 85 "addiu %[r5], %[r5], -31 \n\t" 86 "neg %[r5], %[r5] \n\t" 87 "sll %[r5], %[r5], 12 \n\t" 88 "addu %[r5], %[r5], %[r1] \n\t" 89 "subu %[r7], %[r5], %[const_11rsh12] \n\t" 90 "mul %[r7], %[r7], %[const_178] \n\t" 91 "sra %[r7], %[r7], 8 \n\t" 92 "addu %[r7], %[r7], %[r2] \n\t" 93 "sra %[r7], %[r7], 1 \n\t" 94 "subu %[r2], %[r2], %[r7] \n\t" 95 "addu %[r2], %[r2], %[r0] \n\t" 96 ".set pop \n\t" 97 : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), 98 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), 99 [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) 100 : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), 101 [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), 102 [const_178] "r" (const_178) 103 : "hi", "lo" 104 ); 105 inst->logLrtTimeAvgW32[i] = r2; 106 logLrtTimeAvgKsumFX += r2; 107 } 108 109 inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> 110 (inst->stages + 11); 111 112 // done with computation of LR factor 113 114 // 115 // compute the indicator functions 116 // 117 118 // average LRT feature 119 // FLOAT code 120 // indicator0 = 0.5 * (tanh(widthPrior * 121 // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); 122 tmpIndFX = 16384; // Q14(1.0) 123 tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 124 nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; 125 //use larger width in tanh map for pause regions 126 if (tmp32no1 < 0) { 127 tmpIndFX = 0; 128 tmp32no1 = -tmp32no1; 129 //widthPrior = widthPrior * 2.0; 130 nShifts++; 131 } 132 tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 133 // compute indicator function: sigmoid map 134 tableIndex = (int16_t)(tmp32no1 >> 14); 135 if ((tableIndex < 16) && (tableIndex >= 0)) { 136 tmp16no2 = kIndicatorTable[tableIndex]; 137 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 138 frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 139 tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); 140 if (tmpIndFX == 0) { 141 tmpIndFX = 8192 - tmp16no2; // Q14 142 } else { 143 tmpIndFX = 8192 + tmp16no2; // Q14 144 } 145 } 146 indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 147 148 //spectral flatness feature 149 if (inst->weightSpecFlat) { 150 tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 151 tmpIndFX = 16384; // Q14(1.0) 152 //use larger width in tanh map for pause regions 153 tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 154 nShifts = 4; 155 if (inst->thresholdSpecFlat < tmpU32no1) { 156 tmpIndFX = 0; 157 tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; 158 //widthPrior = widthPrior * 2.0; 159 nShifts++; 160 } 161 tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14 162 // compute indicator function: sigmoid map 163 // FLOAT code 164 // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * 165 // (threshPrior1 - tmpFloat1)) + 1.0); 166 tableIndex = (int16_t)(tmpU32no1 >> 14); 167 if (tableIndex < 16) { 168 tmp16no2 = kIndicatorTable[tableIndex]; 169 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 170 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 171 tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); 172 if (tmpIndFX) { 173 tmpIndFX = 8192 + tmp16no2; // Q14 174 } else { 175 tmpIndFX = 8192 - tmp16no2; // Q14 176 } 177 } 178 indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 179 } 180 181 //for template spectral-difference 182 if (inst->weightSpecDiff) { 183 tmpU32no1 = 0; 184 if (inst->featureSpecDiff) { 185 normTmp = WEBRTC_SPL_MIN(20 - inst->stages, 186 WebRtcSpl_NormU32(inst->featureSpecDiff)); 187 assert(normTmp >= 0); 188 tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) 189 tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); 190 if (tmpU32no2 > 0) { 191 // Q(20 - inst->stages) 192 tmpU32no1 /= tmpU32no2; 193 } else { 194 tmpU32no1 = (uint32_t)(0x7fffffff); 195 } 196 } 197 tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; 198 tmpU32no2 = tmpU32no1 - tmpU32no3; 199 nShifts = 1; 200 tmpIndFX = 16384; // Q14(1.0) 201 //use larger width in tanh map for pause regions 202 if (tmpU32no2 & 0x80000000) { 203 tmpIndFX = 0; 204 tmpU32no2 = tmpU32no3 - tmpU32no1; 205 //widthPrior = widthPrior * 2.0; 206 nShifts--; 207 } 208 tmpU32no1 = tmpU32no2 >> nShifts; 209 // compute indicator function: sigmoid map 210 /* FLOAT code 211 indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); 212 */ 213 tableIndex = (int16_t)(tmpU32no1 >> 14); 214 if (tableIndex < 16) { 215 tmp16no2 = kIndicatorTable[tableIndex]; 216 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 217 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 218 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 219 tmp16no1, frac, 14); 220 if (tmpIndFX) { 221 tmpIndFX = 8192 + tmp16no2; 222 } else { 223 tmpIndFX = 8192 - tmp16no2; 224 } 225 } 226 indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 227 } 228 229 //combine the indicator function with the feature weights 230 // FLOAT code 231 // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * 232 // indicator1 + weightIndPrior2 * indicator2); 233 indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 234 // done with computing indicator function 235 236 //compute the prior probability 237 // FLOAT code 238 // inst->priorNonSpeechProb += PRIOR_UPDATE * 239 // (indPriorNonSpeech - inst->priorNonSpeechProb); 240 tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 241 inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); 242 243 //final speech probability: combine prior model with LR factor: 244 245 memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); 246 247 if (inst->priorNonSpeechProb > 0) { 248 r0 = inst->priorNonSpeechProb; 249 r1 = 16384 - r0; 250 int32_t const_23637 = 23637; 251 int32_t const_44 = 44; 252 int32_t const_84 = 84; 253 int32_t const_1 = 1; 254 int32_t const_neg8 = -8; 255 for (i = 0; i < inst->magnLen; i++) { 256 r2 = inst->logLrtTimeAvgW32[i]; 257 if (r2 < 65300) { 258 __asm __volatile( 259 ".set push \n\t" 260 ".set noreorder \n\t" 261 "mul %[r2], %[r2], %[const_23637] \n\t" 262 "sll %[r6], %[r1], 16 \n\t" 263 "clz %[r7], %[r6] \n\t" 264 "clo %[r8], %[r6] \n\t" 265 "slt %[r9], %[r6], $0 \n\t" 266 "movn %[r7], %[r8], %[r9] \n\t" 267 "sra %[r2], %[r2], 14 \n\t" 268 "andi %[r3], %[r2], 0xfff \n\t" 269 "mul %[r4], %[r3], %[r3] \n\t" 270 "mul %[r3], %[r3], %[const_84] \n\t" 271 "sra %[r2], %[r2], 12 \n\t" 272 "slt %[r5], %[r2], %[const_neg8] \n\t" 273 "movn %[r2], %[const_neg8], %[r5] \n\t" 274 "mul %[r4], %[r4], %[const_44] \n\t" 275 "sra %[r3], %[r3], 7 \n\t" 276 "addiu %[r7], %[r7], -1 \n\t" 277 "slti %[r9], %[r7], 31 \n\t" 278 "movz %[r7], $0, %[r9] \n\t" 279 "sra %[r4], %[r4], 19 \n\t" 280 "addu %[r4], %[r4], %[r3] \n\t" 281 "addiu %[r3], %[r2], 8 \n\t" 282 "addiu %[r2], %[r2], -4 \n\t" 283 "neg %[r5], %[r2] \n\t" 284 "sllv %[r6], %[r4], %[r2] \n\t" 285 "srav %[r5], %[r4], %[r5] \n\t" 286 "slt %[r2], %[r2], $0 \n\t" 287 "movn %[r6], %[r5], %[r2] \n\t" 288 "sllv %[r3], %[const_1], %[r3] \n\t" 289 "addu %[r2], %[r3], %[r6] \n\t" 290 "clz %[r4], %[r2] \n\t" 291 "clo %[r5], %[r2] \n\t" 292 "slt %[r8], %[r2], $0 \n\t" 293 "movn %[r4], %[r5], %[r8] \n\t" 294 "addiu %[r4], %[r4], -1 \n\t" 295 "slt %[r5], $0, %[r2] \n\t" 296 "or %[r5], %[r5], %[r7] \n\t" 297 "movz %[r4], $0, %[r5] \n\t" 298 "addiu %[r6], %[r7], -7 \n\t" 299 "addu %[r6], %[r6], %[r4] \n\t" 300 "bltz %[r6], 1f \n\t" 301 " nop \n\t" 302 "addiu %[r4], %[r6], -8 \n\t" 303 "neg %[r3], %[r4] \n\t" 304 "srav %[r5], %[r2], %[r3] \n\t" 305 "mul %[r5], %[r5], %[r1] \n\t" 306 "mul %[r2], %[r2], %[r1] \n\t" 307 "slt %[r4], %[r4], $0 \n\t" 308 "srav %[r5], %[r5], %[r6] \n\t" 309 "sra %[r2], %[r2], 8 \n\t" 310 "movn %[r2], %[r5], %[r4] \n\t" 311 "sll %[r3], %[r0], 8 \n\t" 312 "addu %[r2], %[r0], %[r2] \n\t" 313 "divu %[r3], %[r3], %[r2] \n\t" 314 "1: \n\t" 315 ".set pop \n\t" 316 : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), 317 [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 318 [r8] "=&r" (r8), [r9] "=&r" (r9) 319 : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), 320 [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), 321 [const_1] "r" (const_1), [const_44] "r" (const_44) 322 : "hi", "lo" 323 ); 324 nonSpeechProbFinal[i] = r3; 325 } 326 } 327 } 328 } 329 330 // Update analysis buffer for lower band, and window data before FFT. 331 void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, 332 int16_t* out, 333 int16_t* new_speech) { 334 int iters, after; 335 int anaLen = (int)inst->anaLen; 336 int *window = (int*)inst->window; 337 int *anaBuf = (int*)inst->analysisBuffer; 338 int *outBuf = (int*)out; 339 int r0, r1, r2, r3, r4, r5, r6, r7; 340 #if defined(MIPS_DSP_R1_LE) 341 int r8; 342 #endif 343 344 // For lower band update analysis buffer. 345 memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, 346 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); 347 memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, 348 inst->blockLen10ms * sizeof(*inst->analysisBuffer)); 349 350 // Window data before FFT. 351 #if defined(MIPS_DSP_R1_LE) 352 __asm __volatile( 353 ".set push \n\t" 354 ".set noreorder \n\t" 355 "sra %[iters], %[anaLen], 3 \n\t" 356 "1: \n\t" 357 "blez %[iters], 2f \n\t" 358 " nop \n\t" 359 "lw %[r0], 0(%[window]) \n\t" 360 "lw %[r1], 0(%[anaBuf]) \n\t" 361 "lw %[r2], 4(%[window]) \n\t" 362 "lw %[r3], 4(%[anaBuf]) \n\t" 363 "lw %[r4], 8(%[window]) \n\t" 364 "lw %[r5], 8(%[anaBuf]) \n\t" 365 "lw %[r6], 12(%[window]) \n\t" 366 "lw %[r7], 12(%[anaBuf]) \n\t" 367 "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" 368 "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" 369 "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" 370 "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" 371 "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" 372 "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" 373 "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" 374 "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" 375 #if defined(MIPS_DSP_R2_LE) 376 "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" 377 "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" 378 "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" 379 "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" 380 "sw %[r8], 0(%[outBuf]) \n\t" 381 "sw %[r1], 4(%[outBuf]) \n\t" 382 "sw %[r3], 8(%[outBuf]) \n\t" 383 "sw %[r5], 12(%[outBuf]) \n\t" 384 #else 385 "shra_r.w %[r8], %[r8], 15 \n\t" 386 "shra_r.w %[r0], %[r0], 15 \n\t" 387 "shra_r.w %[r1], %[r1], 15 \n\t" 388 "shra_r.w %[r2], %[r2], 15 \n\t" 389 "shra_r.w %[r3], %[r3], 15 \n\t" 390 "shra_r.w %[r4], %[r4], 15 \n\t" 391 "shra_r.w %[r5], %[r5], 15 \n\t" 392 "shra_r.w %[r6], %[r6], 15 \n\t" 393 "sll %[r0], %[r0], 16 \n\t" 394 "sll %[r2], %[r2], 16 \n\t" 395 "sll %[r4], %[r4], 16 \n\t" 396 "sll %[r6], %[r6], 16 \n\t" 397 "packrl.ph %[r0], %[r8], %[r0] \n\t" 398 "packrl.ph %[r2], %[r1], %[r2] \n\t" 399 "packrl.ph %[r4], %[r3], %[r4] \n\t" 400 "packrl.ph %[r6], %[r5], %[r6] \n\t" 401 "sw %[r0], 0(%[outBuf]) \n\t" 402 "sw %[r2], 4(%[outBuf]) \n\t" 403 "sw %[r4], 8(%[outBuf]) \n\t" 404 "sw %[r6], 12(%[outBuf]) \n\t" 405 #endif 406 "addiu %[window], %[window], 16 \n\t" 407 "addiu %[anaBuf], %[anaBuf], 16 \n\t" 408 "addiu %[outBuf], %[outBuf], 16 \n\t" 409 "b 1b \n\t" 410 " addiu %[iters], %[iters], -1 \n\t" 411 "2: \n\t" 412 "andi %[after], %[anaLen], 7 \n\t" 413 "3: \n\t" 414 "blez %[after], 4f \n\t" 415 " nop \n\t" 416 "lh %[r0], 0(%[window]) \n\t" 417 "lh %[r1], 0(%[anaBuf]) \n\t" 418 "mul %[r0], %[r0], %[r1] \n\t" 419 "addiu %[window], %[window], 2 \n\t" 420 "addiu %[anaBuf], %[anaBuf], 2 \n\t" 421 "addiu %[outBuf], %[outBuf], 2 \n\t" 422 "shra_r.w %[r0], %[r0], 14 \n\t" 423 "sh %[r0], -2(%[outBuf]) \n\t" 424 "b 3b \n\t" 425 " addiu %[after], %[after], -1 \n\t" 426 "4: \n\t" 427 ".set pop \n\t" 428 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), 429 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), 430 [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), 431 [iters] "=&r" (iters), [after] "=&r" (after), 432 [window] "+r" (window),[anaBuf] "+r" (anaBuf), 433 [outBuf] "+r" (outBuf) 434 : [anaLen] "r" (anaLen) 435 : "memory", "hi", "lo" 436 ); 437 #else 438 __asm __volatile( 439 ".set push \n\t" 440 ".set noreorder \n\t" 441 "sra %[iters], %[anaLen], 2 \n\t" 442 "1: \n\t" 443 "blez %[iters], 2f \n\t" 444 " nop \n\t" 445 "lh %[r0], 0(%[window]) \n\t" 446 "lh %[r1], 0(%[anaBuf]) \n\t" 447 "lh %[r2], 2(%[window]) \n\t" 448 "lh %[r3], 2(%[anaBuf]) \n\t" 449 "lh %[r4], 4(%[window]) \n\t" 450 "lh %[r5], 4(%[anaBuf]) \n\t" 451 "lh %[r6], 6(%[window]) \n\t" 452 "lh %[r7], 6(%[anaBuf]) \n\t" 453 "mul %[r0], %[r0], %[r1] \n\t" 454 "mul %[r2], %[r2], %[r3] \n\t" 455 "mul %[r4], %[r4], %[r5] \n\t" 456 "mul %[r6], %[r6], %[r7] \n\t" 457 "addiu %[window], %[window], 8 \n\t" 458 "addiu %[anaBuf], %[anaBuf], 8 \n\t" 459 "addiu %[r0], %[r0], 0x2000 \n\t" 460 "addiu %[r2], %[r2], 0x2000 \n\t" 461 "addiu %[r4], %[r4], 0x2000 \n\t" 462 "addiu %[r6], %[r6], 0x2000 \n\t" 463 "sra %[r0], %[r0], 14 \n\t" 464 "sra %[r2], %[r2], 14 \n\t" 465 "sra %[r4], %[r4], 14 \n\t" 466 "sra %[r6], %[r6], 14 \n\t" 467 "sh %[r0], 0(%[outBuf]) \n\t" 468 "sh %[r2], 2(%[outBuf]) \n\t" 469 "sh %[r4], 4(%[outBuf]) \n\t" 470 "sh %[r6], 6(%[outBuf]) \n\t" 471 "addiu %[outBuf], %[outBuf], 8 \n\t" 472 "b 1b \n\t" 473 " addiu %[iters], %[iters], -1 \n\t" 474 "2: \n\t" 475 "andi %[after], %[anaLen], 3 \n\t" 476 "3: \n\t" 477 "blez %[after], 4f \n\t" 478 " nop \n\t" 479 "lh %[r0], 0(%[window]) \n\t" 480 "lh %[r1], 0(%[anaBuf]) \n\t" 481 "mul %[r0], %[r0], %[r1] \n\t" 482 "addiu %[window], %[window], 2 \n\t" 483 "addiu %[anaBuf], %[anaBuf], 2 \n\t" 484 "addiu %[outBuf], %[outBuf], 2 \n\t" 485 "addiu %[r0], %[r0], 0x2000 \n\t" 486 "sra %[r0], %[r0], 14 \n\t" 487 "sh %[r0], -2(%[outBuf]) \n\t" 488 "b 3b \n\t" 489 " addiu %[after], %[after], -1 \n\t" 490 "4: \n\t" 491 ".set pop \n\t" 492 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), 493 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), 494 [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), 495 [after] "=&r" (after), [window] "+r" (window), 496 [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) 497 : [anaLen] "r" (anaLen) 498 : "memory", "hi", "lo" 499 ); 500 #endif 501 } 502 503 // For the noise supression process, synthesis, read out fully processed 504 // segment, and update synthesis buffer. 505 void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, 506 int16_t* out_frame, 507 int16_t gain_factor) { 508 int iters = (int)inst->blockLen10ms >> 2; 509 int after = inst->blockLen10ms & 3; 510 int r0, r1, r2, r3, r4, r5, r6, r7; 511 int16_t *window = (int16_t*)inst->window; 512 int16_t *real = inst->real; 513 int16_t *synthBuf = inst->synthesisBuffer; 514 int16_t *out = out_frame; 515 int sat_pos = 0x7fff; 516 int sat_neg = 0xffff8000; 517 int block10 = (int)inst->blockLen10ms; 518 int anaLen = (int)inst->anaLen; 519 520 __asm __volatile( 521 ".set push \n\t" 522 ".set noreorder \n\t" 523 "1: \n\t" 524 "blez %[iters], 2f \n\t" 525 " nop \n\t" 526 "lh %[r0], 0(%[window]) \n\t" 527 "lh %[r1], 0(%[real]) \n\t" 528 "lh %[r2], 2(%[window]) \n\t" 529 "lh %[r3], 2(%[real]) \n\t" 530 "lh %[r4], 4(%[window]) \n\t" 531 "lh %[r5], 4(%[real]) \n\t" 532 "lh %[r6], 6(%[window]) \n\t" 533 "lh %[r7], 6(%[real]) \n\t" 534 "mul %[r0], %[r0], %[r1] \n\t" 535 "mul %[r2], %[r2], %[r3] \n\t" 536 "mul %[r4], %[r4], %[r5] \n\t" 537 "mul %[r6], %[r6], %[r7] \n\t" 538 "addiu %[r0], %[r0], 0x2000 \n\t" 539 "addiu %[r2], %[r2], 0x2000 \n\t" 540 "addiu %[r4], %[r4], 0x2000 \n\t" 541 "addiu %[r6], %[r6], 0x2000 \n\t" 542 "sra %[r0], %[r0], 14 \n\t" 543 "sra %[r2], %[r2], 14 \n\t" 544 "sra %[r4], %[r4], 14 \n\t" 545 "sra %[r6], %[r6], 14 \n\t" 546 "mul %[r0], %[r0], %[gain_factor] \n\t" 547 "mul %[r2], %[r2], %[gain_factor] \n\t" 548 "mul %[r4], %[r4], %[gain_factor] \n\t" 549 "mul %[r6], %[r6], %[gain_factor] \n\t" 550 "addiu %[r0], %[r0], 0x1000 \n\t" 551 "addiu %[r2], %[r2], 0x1000 \n\t" 552 "addiu %[r4], %[r4], 0x1000 \n\t" 553 "addiu %[r6], %[r6], 0x1000 \n\t" 554 "sra %[r0], %[r0], 13 \n\t" 555 "sra %[r2], %[r2], 13 \n\t" 556 "sra %[r4], %[r4], 13 \n\t" 557 "sra %[r6], %[r6], 13 \n\t" 558 "slt %[r1], %[r0], %[sat_pos] \n\t" 559 "slt %[r3], %[r2], %[sat_pos] \n\t" 560 "slt %[r5], %[r4], %[sat_pos] \n\t" 561 "slt %[r7], %[r6], %[sat_pos] \n\t" 562 "movz %[r0], %[sat_pos], %[r1] \n\t" 563 "movz %[r2], %[sat_pos], %[r3] \n\t" 564 "movz %[r4], %[sat_pos], %[r5] \n\t" 565 "movz %[r6], %[sat_pos], %[r7] \n\t" 566 "lh %[r1], 0(%[synthBuf]) \n\t" 567 "lh %[r3], 2(%[synthBuf]) \n\t" 568 "lh %[r5], 4(%[synthBuf]) \n\t" 569 "lh %[r7], 6(%[synthBuf]) \n\t" 570 "addu %[r0], %[r0], %[r1] \n\t" 571 "addu %[r2], %[r2], %[r3] \n\t" 572 "addu %[r4], %[r4], %[r5] \n\t" 573 "addu %[r6], %[r6], %[r7] \n\t" 574 "slt %[r1], %[r0], %[sat_pos] \n\t" 575 "slt %[r3], %[r2], %[sat_pos] \n\t" 576 "slt %[r5], %[r4], %[sat_pos] \n\t" 577 "slt %[r7], %[r6], %[sat_pos] \n\t" 578 "movz %[r0], %[sat_pos], %[r1] \n\t" 579 "movz %[r2], %[sat_pos], %[r3] \n\t" 580 "movz %[r4], %[sat_pos], %[r5] \n\t" 581 "movz %[r6], %[sat_pos], %[r7] \n\t" 582 "slt %[r1], %[r0], %[sat_neg] \n\t" 583 "slt %[r3], %[r2], %[sat_neg] \n\t" 584 "slt %[r5], %[r4], %[sat_neg] \n\t" 585 "slt %[r7], %[r6], %[sat_neg] \n\t" 586 "movn %[r0], %[sat_neg], %[r1] \n\t" 587 "movn %[r2], %[sat_neg], %[r3] \n\t" 588 "movn %[r4], %[sat_neg], %[r5] \n\t" 589 "movn %[r6], %[sat_neg], %[r7] \n\t" 590 "sh %[r0], 0(%[synthBuf]) \n\t" 591 "sh %[r2], 2(%[synthBuf]) \n\t" 592 "sh %[r4], 4(%[synthBuf]) \n\t" 593 "sh %[r6], 6(%[synthBuf]) \n\t" 594 "sh %[r0], 0(%[out]) \n\t" 595 "sh %[r2], 2(%[out]) \n\t" 596 "sh %[r4], 4(%[out]) \n\t" 597 "sh %[r6], 6(%[out]) \n\t" 598 "addiu %[window], %[window], 8 \n\t" 599 "addiu %[real], %[real], 8 \n\t" 600 "addiu %[synthBuf],%[synthBuf], 8 \n\t" 601 "addiu %[out], %[out], 8 \n\t" 602 "b 1b \n\t" 603 " addiu %[iters], %[iters], -1 \n\t" 604 "2: \n\t" 605 "blez %[after], 3f \n\t" 606 " subu %[block10], %[anaLen], %[block10] \n\t" 607 "lh %[r0], 0(%[window]) \n\t" 608 "lh %[r1], 0(%[real]) \n\t" 609 "mul %[r0], %[r0], %[r1] \n\t" 610 "addiu %[window], %[window], 2 \n\t" 611 "addiu %[real], %[real], 2 \n\t" 612 "addiu %[r0], %[r0], 0x2000 \n\t" 613 "sra %[r0], %[r0], 14 \n\t" 614 "mul %[r0], %[r0], %[gain_factor] \n\t" 615 "addiu %[r0], %[r0], 0x1000 \n\t" 616 "sra %[r0], %[r0], 13 \n\t" 617 "slt %[r1], %[r0], %[sat_pos] \n\t" 618 "movz %[r0], %[sat_pos], %[r1] \n\t" 619 "lh %[r1], 0(%[synthBuf]) \n\t" 620 "addu %[r0], %[r0], %[r1] \n\t" 621 "slt %[r1], %[r0], %[sat_pos] \n\t" 622 "movz %[r0], %[sat_pos], %[r1] \n\t" 623 "slt %[r1], %[r0], %[sat_neg] \n\t" 624 "movn %[r0], %[sat_neg], %[r1] \n\t" 625 "sh %[r0], 0(%[synthBuf]) \n\t" 626 "sh %[r0], 0(%[out]) \n\t" 627 "addiu %[synthBuf],%[synthBuf], 2 \n\t" 628 "addiu %[out], %[out], 2 \n\t" 629 "b 2b \n\t" 630 " addiu %[after], %[after], -1 \n\t" 631 "3: \n\t" 632 "sra %[iters], %[block10], 2 \n\t" 633 "4: \n\t" 634 "blez %[iters], 5f \n\t" 635 " andi %[after], %[block10], 3 \n\t" 636 "lh %[r0], 0(%[window]) \n\t" 637 "lh %[r1], 0(%[real]) \n\t" 638 "lh %[r2], 2(%[window]) \n\t" 639 "lh %[r3], 2(%[real]) \n\t" 640 "lh %[r4], 4(%[window]) \n\t" 641 "lh %[r5], 4(%[real]) \n\t" 642 "lh %[r6], 6(%[window]) \n\t" 643 "lh %[r7], 6(%[real]) \n\t" 644 "mul %[r0], %[r0], %[r1] \n\t" 645 "mul %[r2], %[r2], %[r3] \n\t" 646 "mul %[r4], %[r4], %[r5] \n\t" 647 "mul %[r6], %[r6], %[r7] \n\t" 648 "addiu %[r0], %[r0], 0x2000 \n\t" 649 "addiu %[r2], %[r2], 0x2000 \n\t" 650 "addiu %[r4], %[r4], 0x2000 \n\t" 651 "addiu %[r6], %[r6], 0x2000 \n\t" 652 "sra %[r0], %[r0], 14 \n\t" 653 "sra %[r2], %[r2], 14 \n\t" 654 "sra %[r4], %[r4], 14 \n\t" 655 "sra %[r6], %[r6], 14 \n\t" 656 "mul %[r0], %[r0], %[gain_factor] \n\t" 657 "mul %[r2], %[r2], %[gain_factor] \n\t" 658 "mul %[r4], %[r4], %[gain_factor] \n\t" 659 "mul %[r6], %[r6], %[gain_factor] \n\t" 660 "addiu %[r0], %[r0], 0x1000 \n\t" 661 "addiu %[r2], %[r2], 0x1000 \n\t" 662 "addiu %[r4], %[r4], 0x1000 \n\t" 663 "addiu %[r6], %[r6], 0x1000 \n\t" 664 "sra %[r0], %[r0], 13 \n\t" 665 "sra %[r2], %[r2], 13 \n\t" 666 "sra %[r4], %[r4], 13 \n\t" 667 "sra %[r6], %[r6], 13 \n\t" 668 "slt %[r1], %[r0], %[sat_pos] \n\t" 669 "slt %[r3], %[r2], %[sat_pos] \n\t" 670 "slt %[r5], %[r4], %[sat_pos] \n\t" 671 "slt %[r7], %[r6], %[sat_pos] \n\t" 672 "movz %[r0], %[sat_pos], %[r1] \n\t" 673 "movz %[r2], %[sat_pos], %[r3] \n\t" 674 "movz %[r4], %[sat_pos], %[r5] \n\t" 675 "movz %[r6], %[sat_pos], %[r7] \n\t" 676 "lh %[r1], 0(%[synthBuf]) \n\t" 677 "lh %[r3], 2(%[synthBuf]) \n\t" 678 "lh %[r5], 4(%[synthBuf]) \n\t" 679 "lh %[r7], 6(%[synthBuf]) \n\t" 680 "addu %[r0], %[r0], %[r1] \n\t" 681 "addu %[r2], %[r2], %[r3] \n\t" 682 "addu %[r4], %[r4], %[r5] \n\t" 683 "addu %[r6], %[r6], %[r7] \n\t" 684 "slt %[r1], %[r0], %[sat_pos] \n\t" 685 "slt %[r3], %[r2], %[sat_pos] \n\t" 686 "slt %[r5], %[r4], %[sat_pos] \n\t" 687 "slt %[r7], %[r6], %[sat_pos] \n\t" 688 "movz %[r0], %[sat_pos], %[r1] \n\t" 689 "movz %[r2], %[sat_pos], %[r3] \n\t" 690 "movz %[r4], %[sat_pos], %[r5] \n\t" 691 "movz %[r6], %[sat_pos], %[r7] \n\t" 692 "slt %[r1], %[r0], %[sat_neg] \n\t" 693 "slt %[r3], %[r2], %[sat_neg] \n\t" 694 "slt %[r5], %[r4], %[sat_neg] \n\t" 695 "slt %[r7], %[r6], %[sat_neg] \n\t" 696 "movn %[r0], %[sat_neg], %[r1] \n\t" 697 "movn %[r2], %[sat_neg], %[r3] \n\t" 698 "movn %[r4], %[sat_neg], %[r5] \n\t" 699 "movn %[r6], %[sat_neg], %[r7] \n\t" 700 "sh %[r0], 0(%[synthBuf]) \n\t" 701 "sh %[r2], 2(%[synthBuf]) \n\t" 702 "sh %[r4], 4(%[synthBuf]) \n\t" 703 "sh %[r6], 6(%[synthBuf]) \n\t" 704 "addiu %[window], %[window], 8 \n\t" 705 "addiu %[real], %[real], 8 \n\t" 706 "addiu %[synthBuf],%[synthBuf], 8 \n\t" 707 "b 4b \n\t" 708 " addiu %[iters], %[iters], -1 \n\t" 709 "5: \n\t" 710 "blez %[after], 6f \n\t" 711 " nop \n\t" 712 "lh %[r0], 0(%[window]) \n\t" 713 "lh %[r1], 0(%[real]) \n\t" 714 "mul %[r0], %[r0], %[r1] \n\t" 715 "addiu %[window], %[window], 2 \n\t" 716 "addiu %[real], %[real], 2 \n\t" 717 "addiu %[r0], %[r0], 0x2000 \n\t" 718 "sra %[r0], %[r0], 14 \n\t" 719 "mul %[r0], %[r0], %[gain_factor] \n\t" 720 "addiu %[r0], %[r0], 0x1000 \n\t" 721 "sra %[r0], %[r0], 13 \n\t" 722 "slt %[r1], %[r0], %[sat_pos] \n\t" 723 "movz %[r0], %[sat_pos], %[r1] \n\t" 724 "lh %[r1], 0(%[synthBuf]) \n\t" 725 "addu %[r0], %[r0], %[r1] \n\t" 726 "slt %[r1], %[r0], %[sat_pos] \n\t" 727 "movz %[r0], %[sat_pos], %[r1] \n\t" 728 "slt %[r1], %[r0], %[sat_neg] \n\t" 729 "movn %[r0], %[sat_neg], %[r1] \n\t" 730 "sh %[r0], 0(%[synthBuf]) \n\t" 731 "addiu %[synthBuf],%[synthBuf], 2 \n\t" 732 "b 2b \n\t" 733 " addiu %[after], %[after], -1 \n\t" 734 "6: \n\t" 735 ".set pop \n\t" 736 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), 737 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), 738 [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), 739 [after] "+r" (after), [block10] "+r" (block10), 740 [window] "+r" (window), [real] "+r" (real), 741 [synthBuf] "+r" (synthBuf), [out] "+r" (out) 742 : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), 743 [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) 744 : "memory", "hi", "lo" 745 ); 746 747 // update synthesis buffer 748 memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, 749 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); 750 WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer 751 + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); 752 } 753 754 // Filter the data in the frequency domain, and create spectrum. 755 void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, 756 int16_t* freq_buf) { 757 uint16_t *noiseSupFilter = inst->noiseSupFilter; 758 int16_t *real = inst->real; 759 int16_t *imag = inst->imag; 760 int32_t loop_count = 2; 761 int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; 762 int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4; 763 int16_t* freq_buf_f = freq_buf; 764 int16_t* freq_buf_s = &freq_buf[tmp16]; 765 766 __asm __volatile ( 767 ".set push \n\t" 768 ".set noreorder \n\t" 769 //first sample 770 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" 771 "lh %[tmp_2], 0(%[real]) \n\t" 772 "lh %[tmp_3], 0(%[imag]) \n\t" 773 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" 774 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" 775 "sra %[tmp_2], %[tmp_2], 14 \n\t" 776 "sra %[tmp_3], %[tmp_3], 14 \n\t" 777 "sh %[tmp_2], 0(%[real]) \n\t" 778 "sh %[tmp_3], 0(%[imag]) \n\t" 779 "negu %[tmp_3], %[tmp_3] \n\t" 780 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" 781 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" 782 "addiu %[real], %[real], 2 \n\t" 783 "addiu %[imag], %[imag], 2 \n\t" 784 "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" 785 "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" 786 "1: \n\t" 787 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" 788 "lh %[tmp_2], 0(%[real]) \n\t" 789 "lh %[tmp_3], 0(%[imag]) \n\t" 790 "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" 791 "lh %[tmp_5], 2(%[real]) \n\t" 792 "lh %[tmp_6], 2(%[imag]) \n\t" 793 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" 794 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" 795 "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" 796 "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" 797 "addiu %[loop_count], %[loop_count], 2 \n\t" 798 "sra %[tmp_2], %[tmp_2], 14 \n\t" 799 "sra %[tmp_3], %[tmp_3], 14 \n\t" 800 "sra %[tmp_5], %[tmp_5], 14 \n\t" 801 "sra %[tmp_6], %[tmp_6], 14 \n\t" 802 "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" 803 "sh %[tmp_2], 0(%[real]) \n\t" 804 "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" 805 "sh %[tmp_3], 0(%[imag]) \n\t" 806 "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" 807 "negu %[tmp_3], %[tmp_3] \n\t" 808 "sh %[tmp_5], 2(%[real]) \n\t" 809 "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" 810 "sh %[tmp_6], 2(%[imag]) \n\t" 811 "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" 812 "negu %[tmp_6], %[tmp_6] \n\t" 813 "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" 814 "addiu %[real], %[real], 4 \n\t" 815 "addiu %[imag], %[imag], 4 \n\t" 816 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" 817 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" 818 "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" 819 "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" 820 "blt %[loop_count], %[loop_size], 1b \n\t" 821 " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" 822 //last two samples: 823 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" 824 "lh %[tmp_2], 0(%[real]) \n\t" 825 "lh %[tmp_3], 0(%[imag]) \n\t" 826 "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" 827 "lh %[tmp_5], 2(%[real]) \n\t" 828 "lh %[tmp_6], 2(%[imag]) \n\t" 829 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" 830 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" 831 "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" 832 "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" 833 "sra %[tmp_2], %[tmp_2], 14 \n\t" 834 "sra %[tmp_3], %[tmp_3], 14 \n\t" 835 "sra %[tmp_5], %[tmp_5], 14 \n\t" 836 "sra %[tmp_6], %[tmp_6], 14 \n\t" 837 "sh %[tmp_2], 0(%[real]) \n\t" 838 "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" 839 "sh %[tmp_3], 0(%[imag]) \n\t" 840 "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" 841 "negu %[tmp_3], %[tmp_3] \n\t" 842 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" 843 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" 844 "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" 845 "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" 846 "sh %[tmp_5], 2(%[real]) \n\t" 847 "sh %[tmp_6], 2(%[imag]) \n\t" 848 ".set pop \n\t" 849 : [real] "+r" (real), [imag] "+r" (imag), 850 [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), 851 [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), 852 [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), 853 [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) 854 : [loop_size] "r" (inst->anaLen2) 855 : "memory", "hi", "lo" 856 ); 857 } 858 859 #if defined(MIPS_DSP_R1_LE) 860 // Denormalize the real-valued signal |in|, the output from inverse FFT. 861 void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, 862 int16_t* in, 863 int factor) { 864 int32_t r0, r1, r2, r3, t0; 865 int len = (int)inst->anaLen; 866 int16_t *out = &inst->real[0]; 867 int shift = factor - inst->normData; 868 869 __asm __volatile ( 870 ".set push \n\t" 871 ".set noreorder \n\t" 872 "beqz %[len], 8f \n\t" 873 " nop \n\t" 874 "bltz %[shift], 4f \n\t" 875 " sra %[t0], %[len], 2 \n\t" 876 "beqz %[t0], 2f \n\t" 877 " andi %[len], %[len], 3 \n\t" 878 "1: \n\t" 879 "lh %[r0], 0(%[in]) \n\t" 880 "lh %[r1], 2(%[in]) \n\t" 881 "lh %[r2], 4(%[in]) \n\t" 882 "lh %[r3], 6(%[in]) \n\t" 883 "shllv_s.ph %[r0], %[r0], %[shift] \n\t" 884 "shllv_s.ph %[r1], %[r1], %[shift] \n\t" 885 "shllv_s.ph %[r2], %[r2], %[shift] \n\t" 886 "shllv_s.ph %[r3], %[r3], %[shift] \n\t" 887 "addiu %[in], %[in], 8 \n\t" 888 "addiu %[t0], %[t0], -1 \n\t" 889 "sh %[r0], 0(%[out]) \n\t" 890 "sh %[r1], 2(%[out]) \n\t" 891 "sh %[r2], 4(%[out]) \n\t" 892 "sh %[r3], 6(%[out]) \n\t" 893 "bgtz %[t0], 1b \n\t" 894 " addiu %[out], %[out], 8 \n\t" 895 "2: \n\t" 896 "beqz %[len], 8f \n\t" 897 " nop \n\t" 898 "3: \n\t" 899 "lh %[r0], 0(%[in]) \n\t" 900 "addiu %[in], %[in], 2 \n\t" 901 "addiu %[len], %[len], -1 \n\t" 902 "shllv_s.ph %[r0], %[r0], %[shift] \n\t" 903 "addiu %[out], %[out], 2 \n\t" 904 "bgtz %[len], 3b \n\t" 905 " sh %[r0], -2(%[out]) \n\t" 906 "b 8f \n\t" 907 "4: \n\t" 908 "negu %[shift], %[shift] \n\t" 909 "beqz %[t0], 6f \n\t" 910 " andi %[len], %[len], 3 \n\t" 911 "5: \n\t" 912 "lh %[r0], 0(%[in]) \n\t" 913 "lh %[r1], 2(%[in]) \n\t" 914 "lh %[r2], 4(%[in]) \n\t" 915 "lh %[r3], 6(%[in]) \n\t" 916 "srav %[r0], %[r0], %[shift] \n\t" 917 "srav %[r1], %[r1], %[shift] \n\t" 918 "srav %[r2], %[r2], %[shift] \n\t" 919 "srav %[r3], %[r3], %[shift] \n\t" 920 "addiu %[in], %[in], 8 \n\t" 921 "addiu %[t0], %[t0], -1 \n\t" 922 "sh %[r0], 0(%[out]) \n\t" 923 "sh %[r1], 2(%[out]) \n\t" 924 "sh %[r2], 4(%[out]) \n\t" 925 "sh %[r3], 6(%[out]) \n\t" 926 "bgtz %[t0], 5b \n\t" 927 " addiu %[out], %[out], 8 \n\t" 928 "6: \n\t" 929 "beqz %[len], 8f \n\t" 930 " nop \n\t" 931 "7: \n\t" 932 "lh %[r0], 0(%[in]) \n\t" 933 "addiu %[in], %[in], 2 \n\t" 934 "addiu %[len], %[len], -1 \n\t" 935 "srav %[r0], %[r0], %[shift] \n\t" 936 "addiu %[out], %[out], 2 \n\t" 937 "bgtz %[len], 7b \n\t" 938 " sh %[r0], -2(%[out]) \n\t" 939 "8: \n\t" 940 ".set pop \n\t" 941 : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), 942 [r2] "=&r" (r2), [r3] "=&r" (r3) 943 : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), 944 [out] "r" (out) 945 : "memory" 946 ); 947 } 948 #endif 949 950 // Normalize the real-valued signal |in|, the input to forward FFT. 951 void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, 952 const int16_t* in, 953 int16_t* out) { 954 int32_t r0, r1, r2, r3, t0; 955 int len = (int)inst->anaLen; 956 int shift = inst->normData; 957 958 __asm __volatile ( 959 ".set push \n\t" 960 ".set noreorder \n\t" 961 "beqz %[len], 4f \n\t" 962 " sra %[t0], %[len], 2 \n\t" 963 "beqz %[t0], 2f \n\t" 964 " andi %[len], %[len], 3 \n\t" 965 "1: \n\t" 966 "lh %[r0], 0(%[in]) \n\t" 967 "lh %[r1], 2(%[in]) \n\t" 968 "lh %[r2], 4(%[in]) \n\t" 969 "lh %[r3], 6(%[in]) \n\t" 970 "sllv %[r0], %[r0], %[shift] \n\t" 971 "sllv %[r1], %[r1], %[shift] \n\t" 972 "sllv %[r2], %[r2], %[shift] \n\t" 973 "sllv %[r3], %[r3], %[shift] \n\t" 974 "addiu %[in], %[in], 8 \n\t" 975 "addiu %[t0], %[t0], -1 \n\t" 976 "sh %[r0], 0(%[out]) \n\t" 977 "sh %[r1], 2(%[out]) \n\t" 978 "sh %[r2], 4(%[out]) \n\t" 979 "sh %[r3], 6(%[out]) \n\t" 980 "bgtz %[t0], 1b \n\t" 981 " addiu %[out], %[out], 8 \n\t" 982 "2: \n\t" 983 "beqz %[len], 4f \n\t" 984 " nop \n\t" 985 "3: \n\t" 986 "lh %[r0], 0(%[in]) \n\t" 987 "addiu %[in], %[in], 2 \n\t" 988 "addiu %[len], %[len], -1 \n\t" 989 "sllv %[r0], %[r0], %[shift] \n\t" 990 "addiu %[out], %[out], 2 \n\t" 991 "bgtz %[len], 3b \n\t" 992 " sh %[r0], -2(%[out]) \n\t" 993 "4: \n\t" 994 ".set pop \n\t" 995 : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), 996 [r2] "=&r" (r2), [r3] "=&r" (r3) 997 : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), 998 [out] "r" (out) 999 : "memory" 1000 ); 1001 } 1002 1003