android-7.1.1_r1.0/s

/*
 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
#include <string.h>

#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h"
#include "webrtc/modules/audio_processing/ns/nsx_core.h"

static const int16_t kIndicatorTable[17] = {
  0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
  7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
};

// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//snrLocPrior is the prior SNR for each frequency (in Q11)
//snrLocPost is the post SNR for each frequency (in Q11)
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
                               uint16_t* nonSpeechProbFinal,
                               uint32_t* priorLocSnr,
                               uint32_t* postLocSnr) {
  uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
  int32_t indPriorFX, tmp32no1;
  int32_t logLrtTimeAvgKsumFX;
  int16_t indPriorFX16;
  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
  size_t i;
  int normTmp, nShifts;

  int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
  int32_t const_max = 0x7fffffff;
  int32_t const_neg43 = -43;
  int32_t const_5412 = 5412;
  int32_t const_11rsh12 = (11 << 12);
  int32_t const_178 = 178;


  // compute feature based on average LR factor
  // this is the average over all frequencies of the smooth log LRT
  logLrtTimeAvgKsumFX = 0;
  for (i = 0; i < inst->magnLen; i++) {
    r0 = postLocSnr[i]; // Q11
    r1 = priorLocSnr[i];
    r2 = inst->logLrtTimeAvgW32[i];

    __asm __volatile(
      ".set       push                                    \n\t"
      ".set       noreorder                               \n\t"
      "clz        %[r3],    %[r0]                         \n\t"
      "clz        %[r5],    %[r1]                         \n\t"
      "slti       %[r4],    %[r3],    32                  \n\t"
      "slti       %[r6],    %[r5],    32                  \n\t"
      "movz       %[r3],    $0,       %[r4]               \n\t"
      "movz       %[r5],    $0,       %[r6]               \n\t"
      "slti       %[r4],    %[r3],    11                  \n\t"
      "addiu      %[r6],    %[r3],    -11                 \n\t"
      "neg        %[r7],    %[r6]                         \n\t"
      "sllv       %[r6],    %[r1],    %[r6]               \n\t"
      "srav       %[r7],    %[r1],    %[r7]               \n\t"
      "movn       %[r6],    %[r7],    %[r4]               \n\t"
      "sllv       %[r1],    %[r1],    %[r5]               \n\t"
      "and        %[r1],    %[r1],    %[const_max]        \n\t"
      "sra        %[r1],    %[r1],    19                  \n\t"
      "mul        %[r7],    %[r1],    %[r1]               \n\t"
      "sllv       %[r3],    %[r0],    %[r3]               \n\t"
      "divu       %[r8],    %[r3],    %[r6]               \n\t"
      "slti       %[r6],    %[r6],    1                   \n\t"
      "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
      "sra        %[r7],    %[r7],    19                  \n\t"
      "movz       %[r3],    %[r8],    %[r6]               \n\t"
      "subu       %[r0],    %[r0],    %[r3]               \n\t"
      "movn       %[r0],    $0,       %[r6]               \n\t"
      "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
      "sra        %[r1],    %[r1],    12                  \n\t"
      "addu       %[r7],    %[r7],    %[r1]               \n\t"
      "addiu      %[r1],    %[r7],    37                  \n\t"
      "addiu      %[r5],    %[r5],    -31                 \n\t"
      "neg        %[r5],    %[r5]                         \n\t"
      "sll        %[r5],    %[r5],    12                  \n\t"
      "addu       %[r5],    %[r5],    %[r1]               \n\t"
      "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
      "mul        %[r7],    %[r7],    %[const_178]        \n\t"
      "sra        %[r7],    %[r7],    8                   \n\t"
      "addu       %[r7],    %[r7],    %[r2]               \n\t"
      "sra        %[r7],    %[r7],    1                   \n\t"
      "subu       %[r2],    %[r2],    %[r7]               \n\t"
      "addu       %[r2],    %[r2],    %[r0]               \n\t"
      ".set       pop                                     \n\t"
      : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
        [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
        [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
      : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
        [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
        [const_178] "r" (const_178)
      : "hi", "lo"
    );
    inst->logLrtTimeAvgW32[i] = r2;
    logLrtTimeAvgKsumFX += r2;
  }

  inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
      (inst->stages + 11);

  // done with computation of LR factor

  //
  // compute the indicator functions
  //

  // average LRT feature
  // FLOAT code
  // indicator0 = 0.5 * (tanh(widthPrior *
  //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
  tmpIndFX = 16384; // Q14(1.0)
  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
  //use larger width in tanh map for pause regions
  if (tmp32no1 < 0) {
    tmpIndFX = 0;
    tmp32no1 = -tmp32no1;
    //widthPrior = widthPrior * 2.0;
    nShifts++;
  }
  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
  // compute indicator function: sigmoid map
  tableIndex = (int16_t)(tmp32no1 >> 14);
  if ((tableIndex < 16) && (tableIndex >= 0)) {
    tmp16no2 = kIndicatorTable[tableIndex];
    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
    tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
    if (tmpIndFX == 0) {
      tmpIndFX = 8192 - tmp16no2; // Q14
    } else {
      tmpIndFX = 8192 + tmp16no2; // Q14
    }
  }
  indPriorFX = inst->weightLogLrt * tmpIndFX;  // 6*Q14

  //spectral flatness feature
  if (inst->weightSpecFlat) {
    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
    tmpIndFX = 16384; // Q14(1.0)
    //use larger width in tanh map for pause regions
    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
    nShifts = 4;
    if (inst->thresholdSpecFlat < tmpU32no1) {
      tmpIndFX = 0;
      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
      //widthPrior = widthPrior * 2.0;
      nShifts++;
    }
    tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25);  //Q14
    // compute indicator function: sigmoid map
    // FLOAT code
    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
    //                          (threshPrior1 - tmpFloat1)) + 1.0);
    tableIndex = (int16_t)(tmpU32no1 >> 14);
    if (tableIndex < 16) {
      tmp16no2 = kIndicatorTable[tableIndex];
      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
      tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
      if (tmpIndFX) {
        tmpIndFX = 8192 + tmp16no2; // Q14
      } else {
        tmpIndFX = 8192 - tmp16no2; // Q14
      }
    }
    indPriorFX += inst->weightSpecFlat * tmpIndFX;  // 6*Q14
  }

  //for template spectral-difference
  if (inst->weightSpecDiff) {
    tmpU32no1 = 0;
    if (inst->featureSpecDiff) {
      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
                               WebRtcSpl_NormU32(inst->featureSpecDiff));
      assert(normTmp >= 0);
      tmpU32no1 = inst->featureSpecDiff << normTmp;  // Q(normTmp-2*stages)
      tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
      if (tmpU32no2 > 0) {
        // Q(20 - inst->stages)
        tmpU32no1 /= tmpU32no2;
      } else {
        tmpU32no1 = (uint32_t)(0x7fffffff);
      }
    }
    tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
    tmpU32no2 = tmpU32no1 - tmpU32no3;
    nShifts = 1;
    tmpIndFX = 16384; // Q14(1.0)
    //use larger width in tanh map for pause regions
    if (tmpU32no2 & 0x80000000) {
      tmpIndFX = 0;
      tmpU32no2 = tmpU32no3 - tmpU32no1;
      //widthPrior = widthPrior * 2.0;
      nShifts--;
    }
    tmpU32no1 = tmpU32no2 >> nShifts;
    // compute indicator function: sigmoid map
    /* FLOAT code
     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
     */
    tableIndex = (int16_t)(tmpU32no1 >> 14);
    if (tableIndex < 16) {
      tmp16no2 = kIndicatorTable[tableIndex];
      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
                    tmp16no1, frac, 14);
      if (tmpIndFX) {
        tmpIndFX = 8192 + tmp16no2;
      } else {
        tmpIndFX = 8192 - tmp16no2;
      }
    }
    indPriorFX += inst->weightSpecDiff * tmpIndFX;  // 6*Q14
  }

  //combine the indicator function with the feature weights
  // FLOAT code
  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
  //                 indicator1 + weightIndPrior2 * indicator2);
  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
  // done with computing indicator function

  //compute the prior probability
  // FLOAT code
  // inst->priorNonSpeechProb += PRIOR_UPDATE *
  //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
  inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);

  //final speech probability: combine prior model with LR factor:

  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);

  if (inst->priorNonSpeechProb > 0) {
    r0 = inst->priorNonSpeechProb;
    r1 = 16384 - r0;
    int32_t const_23637 = 23637;
    int32_t const_44 = 44;
    int32_t const_84 = 84;
    int32_t const_1 = 1;
    int32_t const_neg8 = -8;
    for (i = 0; i < inst->magnLen; i++) {
      r2 = inst->logLrtTimeAvgW32[i];
      if (r2 < 65300) {
        __asm __volatile(
          ".set         push                                      \n\t"
          ".set         noreorder                                 \n\t"
          "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
          "sll          %[r6],    %[r1],          16              \n\t"
          "clz          %[r7],    %[r6]                           \n\t"
          "clo          %[r8],    %[r6]                           \n\t"
          "slt          %[r9],    %[r6],          $0              \n\t"
          "movn         %[r7],    %[r8],          %[r9]           \n\t"
          "sra          %[r2],    %[r2],          14              \n\t"
          "andi         %[r3],    %[r2],          0xfff           \n\t"
          "mul          %[r4],    %[r3],          %[r3]           \n\t"
          "mul          %[r3],    %[r3],          %[const_84]     \n\t"
          "sra          %[r2],    %[r2],          12              \n\t"
          "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
          "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
          "mul          %[r4],    %[r4],          %[const_44]     \n\t"
          "sra          %[r3],    %[r3],          7               \n\t"
          "addiu        %[r7],    %[r7],          -1              \n\t"
          "slti         %[r9],    %[r7],          31              \n\t"
          "movz         %[r7],    $0,             %[r9]           \n\t"
          "sra          %[r4],    %[r4],          19              \n\t"
          "addu         %[r4],    %[r4],          %[r3]           \n\t"
          "addiu        %[r3],    %[r2],          8               \n\t"
          "addiu        %[r2],    %[r2],          -4              \n\t"
          "neg          %[r5],    %[r2]                           \n\t"
          "sllv         %[r6],    %[r4],          %[r2]           \n\t"
          "srav         %[r5],    %[r4],          %[r5]           \n\t"
          "slt          %[r2],    %[r2],          $0              \n\t"
          "movn         %[r6],    %[r5],          %[r2]           \n\t"
          "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
          "addu         %[r2],    %[r3],          %[r6]           \n\t"
          "clz          %[r4],    %[r2]                           \n\t"
          "clo          %[r5],    %[r2]                           \n\t"
          "slt          %[r8],    %[r2],          $0              \n\t"
          "movn         %[r4],    %[r5],          %[r8]           \n\t"
          "addiu        %[r4],    %[r4],          -1              \n\t"
          "slt          %[r5],    $0,             %[r2]           \n\t"
          "or           %[r5],    %[r5],          %[r7]           \n\t"
          "movz         %[r4],    $0,             %[r5]           \n\t"
          "addiu        %[r6],    %[r7],          -7              \n\t"
          "addu         %[r6],    %[r6],          %[r4]           \n\t"
          "bltz         %[r6],    1f                              \n\t"
          " nop                                                   \n\t"
          "addiu        %[r4],    %[r6],          -8              \n\t"
          "neg          %[r3],    %[r4]                           \n\t"
          "srav         %[r5],    %[r2],          %[r3]           \n\t"
          "mul          %[r5],    %[r5],          %[r1]           \n\t"
          "mul          %[r2],    %[r2],          %[r1]           \n\t"
          "slt          %[r4],    %[r4],          $0              \n\t"
          "srav         %[r5],    %[r5],          %[r6]           \n\t"
          "sra          %[r2],    %[r2],          8               \n\t"
          "movn         %[r2],    %[r5],          %[r4]           \n\t"
          "sll          %[r3],    %[r0],          8               \n\t"
          "addu         %[r2],    %[r0],          %[r2]           \n\t"
          "divu         %[r3],    %[r3],          %[r2]           \n\t"
         "1:                                                      \n\t"
          ".set         pop                                       \n\t"
          : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
            [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
            [r8] "=&r" (r8), [r9] "=&r" (r9)
          : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
            [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
            [const_1] "r" (const_1), [const_44] "r" (const_44)
          : "hi", "lo"
        );
        nonSpeechProbFinal[i] = r3;
      }
    }
  }
}

// Update analysis buffer for lower band, and window data before FFT.
void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
                                   int16_t* out,
                                   int16_t* new_speech) {
  int iters, after;
  int anaLen = (int)inst->anaLen;
  int *window = (int*)inst->window;
  int *anaBuf = (int*)inst->analysisBuffer;
  int *outBuf = (int*)out;
  int r0, r1, r2, r3, r4, r5, r6, r7;
#if defined(MIPS_DSP_R1_LE)
  int r8;
#endif

  // For lower band update analysis buffer.
  memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
  memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
      inst->blockLen10ms * sizeof(*inst->analysisBuffer));

  // Window data before FFT.
#if defined(MIPS_DSP_R1_LE)
  __asm __volatile(
    ".set              push                                \n\t"
    ".set              noreorder                           \n\t"
    "sra               %[iters],   %[anaLen],    3         \n\t"
   "1:                                                     \n\t"
    "blez              %[iters],   2f                      \n\t"
    " nop                                                  \n\t"
    "lw                %[r0],      0(%[window])            \n\t"
    "lw                %[r1],      0(%[anaBuf])            \n\t"
    "lw                %[r2],      4(%[window])            \n\t"
    "lw                %[r3],      4(%[anaBuf])            \n\t"
    "lw                %[r4],      8(%[window])            \n\t"
    "lw                %[r5],      8(%[anaBuf])            \n\t"
    "lw                %[r6],      12(%[window])           \n\t"
    "lw                %[r7],      12(%[anaBuf])           \n\t"
    "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
    "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
    "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
    "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
    "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
    "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
    "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
    "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
#if defined(MIPS_DSP_R2_LE)
    "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
    "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
    "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
    "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
    "sw                %[r8],      0(%[outBuf])            \n\t"
    "sw                %[r1],      4(%[outBuf])            \n\t"
    "sw                %[r3],      8(%[outBuf])            \n\t"
    "sw                %[r5],      12(%[outBuf])           \n\t"
#else
    "shra_r.w          %[r8],      %[r8],        15        \n\t"
    "shra_r.w          %[r0],      %[r0],        15        \n\t"
    "shra_r.w          %[r1],      %[r1],        15        \n\t"
    "shra_r.w          %[r2],      %[r2],        15        \n\t"
    "shra_r.w          %[r3],      %[r3],        15        \n\t"
    "shra_r.w          %[r4],      %[r4],        15        \n\t"
    "shra_r.w          %[r5],      %[r5],        15        \n\t"
    "shra_r.w          %[r6],      %[r6],        15        \n\t"
    "sll               %[r0],      %[r0],        16        \n\t"
    "sll               %[r2],      %[r2],        16        \n\t"
    "sll               %[r4],      %[r4],        16        \n\t"
    "sll               %[r6],      %[r6],        16        \n\t"
    "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
    "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
    "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
    "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
    "sw                %[r0],      0(%[outBuf])            \n\t"
    "sw                %[r2],      4(%[outBuf])            \n\t"
    "sw                %[r4],      8(%[outBuf])            \n\t"
    "sw                %[r6],      12(%[outBuf])           \n\t"
#endif
    "addiu             %[window],  %[window],    16        \n\t"
    "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
    "addiu             %[outBuf],  %[outBuf],    16        \n\t"
    "b                 1b                                  \n\t"
    " addiu            %[iters],   %[iters],     -1        \n\t"
   "2:                                                     \n\t"
    "andi              %[after],   %[anaLen],    7         \n\t"
   "3:                                                     \n\t"
    "blez              %[after],   4f                      \n\t"
    " nop                                                  \n\t"
    "lh                %[r0],      0(%[window])            \n\t"
    "lh                %[r1],      0(%[anaBuf])            \n\t"
    "mul               %[r0],      %[r0],        %[r1]     \n\t"
    "addiu             %[window],  %[window],    2         \n\t"
    "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
    "addiu             %[outBuf],  %[outBuf],    2         \n\t"
    "shra_r.w          %[r0],      %[r0],        14        \n\t"
    "sh                %[r0],      -2(%[outBuf])           \n\t"
    "b                 3b                                  \n\t"
    " addiu            %[after],   %[after],     -1        \n\t"
   "4:                                                     \n\t"
    ".set              pop                                 \n\t"
    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
      [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
      [iters] "=&r" (iters), [after] "=&r" (after),
      [window] "+r" (window),[anaBuf] "+r" (anaBuf),
      [outBuf] "+r" (outBuf)
    : [anaLen] "r" (anaLen)
    : "memory", "hi", "lo"
  );
#else
  __asm  __volatile(
    ".set           push                                    \n\t"
    ".set           noreorder                               \n\t"
    "sra            %[iters],   %[anaLen],      2           \n\t"
   "1:                                                      \n\t"
    "blez           %[iters],   2f                          \n\t"
    " nop                                                   \n\t"
    "lh             %[r0],      0(%[window])                \n\t"
    "lh             %[r1],      0(%[anaBuf])                \n\t"
    "lh             %[r2],      2(%[window])                \n\t"
    "lh             %[r3],      2(%[anaBuf])                \n\t"
    "lh             %[r4],      4(%[window])                \n\t"
    "lh             %[r5],      4(%[anaBuf])                \n\t"
    "lh             %[r6],      6(%[window])                \n\t"
    "lh             %[r7],      6(%[anaBuf])                \n\t"
    "mul            %[r0],      %[r0],          %[r1]       \n\t"
    "mul            %[r2],      %[r2],          %[r3]       \n\t"
    "mul            %[r4],      %[r4],          %[r5]       \n\t"
    "mul            %[r6],      %[r6],          %[r7]       \n\t"
    "addiu          %[window],  %[window],      8           \n\t"
    "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
    "addiu          %[r0],      %[r0],          0x2000      \n\t"
    "addiu          %[r2],      %[r2],          0x2000      \n\t"
    "addiu          %[r4],      %[r4],          0x2000      \n\t"
    "addiu          %[r6],      %[r6],          0x2000      \n\t"
    "sra            %[r0],      %[r0],          14          \n\t"
    "sra            %[r2],      %[r2],          14          \n\t"
    "sra            %[r4],      %[r4],          14          \n\t"
    "sra            %[r6],      %[r6],          14          \n\t"
    "sh             %[r0],      0(%[outBuf])                \n\t"
    "sh             %[r2],      2(%[outBuf])                \n\t"
    "sh             %[r4],      4(%[outBuf])                \n\t"
    "sh             %[r6],      6(%[outBuf])                \n\t"
    "addiu          %[outBuf],  %[outBuf],      8           \n\t"
    "b              1b                                      \n\t"
    " addiu         %[iters],   %[iters],       -1          \n\t"
   "2:                                                      \n\t"
    "andi           %[after],   %[anaLen],      3           \n\t"
   "3:                                                      \n\t"
    "blez           %[after],   4f                          \n\t"
    " nop                                                   \n\t"
    "lh             %[r0],      0(%[window])                \n\t"
    "lh             %[r1],      0(%[anaBuf])                \n\t"
    "mul            %[r0],      %[r0],          %[r1]       \n\t"
    "addiu          %[window],  %[window],      2           \n\t"
    "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
    "addiu          %[outBuf],  %[outBuf],      2           \n\t"
    "addiu          %[r0],      %[r0],          0x2000      \n\t"
    "sra            %[r0],      %[r0],          14          \n\t"
    "sh             %[r0],      -2(%[outBuf])               \n\t"
    "b              3b                                      \n\t"
    " addiu         %[after],   %[after],       -1          \n\t"
   "4:                                                      \n\t"
    ".set           pop                                     \n\t"
    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
      [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
      [after] "=&r" (after), [window] "+r" (window),
      [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
    : [anaLen] "r" (anaLen)
    : "memory", "hi", "lo"
  );
#endif
}

// For the noise supression process, synthesis, read out fully processed
// segment, and update synthesis buffer.
void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
                                    int16_t* out_frame,
                                    int16_t gain_factor) {
  int iters = (int)inst->blockLen10ms >> 2;
  int after = inst->blockLen10ms & 3;
  int r0, r1, r2, r3, r4, r5, r6, r7;
  int16_t *window = (int16_t*)inst->window;
  int16_t *real = inst->real;
  int16_t *synthBuf = inst->synthesisBuffer;
  int16_t *out = out_frame;
  int sat_pos = 0x7fff;
  int sat_neg = 0xffff8000;
  int block10 = (int)inst->blockLen10ms;
  int anaLen = (int)inst->anaLen;

  __asm __volatile(
    ".set       push                                        \n\t"
    ".set       noreorder                                   \n\t"
   "1:                                                      \n\t"
    "blez       %[iters],   2f                              \n\t"
    " nop                                                   \n\t"
    "lh         %[r0],      0(%[window])                    \n\t"
    "lh         %[r1],      0(%[real])                      \n\t"
    "lh         %[r2],      2(%[window])                    \n\t"
    "lh         %[r3],      2(%[real])                      \n\t"
    "lh         %[r4],      4(%[window])                    \n\t"
    "lh         %[r5],      4(%[real])                      \n\t"
    "lh         %[r6],      6(%[window])                    \n\t"
    "lh         %[r7],      6(%[real])                      \n\t"
    "mul        %[r0],      %[r0],          %[r1]           \n\t"
    "mul        %[r2],      %[r2],          %[r3]           \n\t"
    "mul        %[r4],      %[r4],          %[r5]           \n\t"
    "mul        %[r6],      %[r6],          %[r7]           \n\t"
    "addiu      %[r0],      %[r0],          0x2000          \n\t"
    "addiu      %[r2],      %[r2],          0x2000          \n\t"
    "addiu      %[r4],      %[r4],          0x2000          \n\t"
    "addiu      %[r6],      %[r6],          0x2000          \n\t"
    "sra        %[r0],      %[r0],          14              \n\t"
    "sra        %[r2],      %[r2],          14              \n\t"
    "sra        %[r4],      %[r4],          14              \n\t"
    "sra        %[r6],      %[r6],          14              \n\t"
    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
    "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
    "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
    "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
    "addiu      %[r0],      %[r0],          0x1000          \n\t"
    "addiu      %[r2],      %[r2],          0x1000          \n\t"
    "addiu      %[r4],      %[r4],          0x1000          \n\t"
    "addiu      %[r6],      %[r6],          0x1000          \n\t"
    "sra        %[r0],      %[r0],          13              \n\t"
    "sra        %[r2],      %[r2],          13              \n\t"
    "sra        %[r4],      %[r4],          13              \n\t"
    "sra        %[r6],      %[r6],          13              \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
    "lh         %[r1],      0(%[synthBuf])                  \n\t"
    "lh         %[r3],      2(%[synthBuf])                  \n\t"
    "lh         %[r5],      4(%[synthBuf])                  \n\t"
    "lh         %[r7],      6(%[synthBuf])                  \n\t"
    "addu       %[r0],      %[r0],          %[r1]           \n\t"
    "addu       %[r2],      %[r2],          %[r3]           \n\t"
    "addu       %[r4],      %[r4],          %[r5]           \n\t"
    "addu       %[r6],      %[r6],          %[r7]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
    "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
    "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
    "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
    "sh         %[r0],      0(%[synthBuf])                  \n\t"
    "sh         %[r2],      2(%[synthBuf])                  \n\t"
    "sh         %[r4],      4(%[synthBuf])                  \n\t"
    "sh         %[r6],      6(%[synthBuf])                  \n\t"
    "sh         %[r0],      0(%[out])                       \n\t"
    "sh         %[r2],      2(%[out])                       \n\t"
    "sh         %[r4],      4(%[out])                       \n\t"
    "sh         %[r6],      6(%[out])                       \n\t"
    "addiu      %[window],  %[window],      8               \n\t"
    "addiu      %[real],    %[real],        8               \n\t"
    "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
    "addiu      %[out],     %[out],         8               \n\t"
    "b          1b                                          \n\t"
    " addiu     %[iters],   %[iters],       -1              \n\t"
   "2:                                                      \n\t"
    "blez       %[after],   3f                              \n\t"
    " subu      %[block10], %[anaLen],      %[block10]      \n\t"
    "lh         %[r0],      0(%[window])                    \n\t"
    "lh         %[r1],      0(%[real])                      \n\t"
    "mul        %[r0],      %[r0],          %[r1]           \n\t"
    "addiu      %[window],  %[window],      2               \n\t"
    "addiu      %[real],    %[real],        2               \n\t"
    "addiu      %[r0],      %[r0],          0x2000          \n\t"
    "sra        %[r0],      %[r0],          14              \n\t"
    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
    "addiu      %[r0],      %[r0],          0x1000          \n\t"
    "sra        %[r0],      %[r0],          13              \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "lh         %[r1],      0(%[synthBuf])                  \n\t"
    "addu       %[r0],      %[r0],          %[r1]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
    "sh         %[r0],      0(%[synthBuf])                  \n\t"
    "sh         %[r0],      0(%[out])                       \n\t"
    "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
    "addiu      %[out],     %[out],         2               \n\t"
    "b          2b                                          \n\t"
    " addiu     %[after],   %[after],       -1              \n\t"
   "3:                                                      \n\t"
    "sra        %[iters],   %[block10],     2               \n\t"
   "4:                                                      \n\t"
    "blez       %[iters],   5f                              \n\t"
    " andi      %[after],   %[block10],     3               \n\t"
    "lh         %[r0],      0(%[window])                    \n\t"
    "lh         %[r1],      0(%[real])                      \n\t"
    "lh         %[r2],      2(%[window])                    \n\t"
    "lh         %[r3],      2(%[real])                      \n\t"
    "lh         %[r4],      4(%[window])                    \n\t"
    "lh         %[r5],      4(%[real])                      \n\t"
    "lh         %[r6],      6(%[window])                    \n\t"
    "lh         %[r7],      6(%[real])                      \n\t"
    "mul        %[r0],      %[r0],          %[r1]           \n\t"
    "mul        %[r2],      %[r2],          %[r3]           \n\t"
    "mul        %[r4],      %[r4],          %[r5]           \n\t"
    "mul        %[r6],      %[r6],          %[r7]           \n\t"
    "addiu      %[r0],      %[r0],          0x2000          \n\t"
    "addiu      %[r2],      %[r2],          0x2000          \n\t"
    "addiu      %[r4],      %[r4],          0x2000          \n\t"
    "addiu      %[r6],      %[r6],          0x2000          \n\t"
    "sra        %[r0],      %[r0],          14              \n\t"
    "sra        %[r2],      %[r2],          14              \n\t"
    "sra        %[r4],      %[r4],          14              \n\t"
    "sra        %[r6],      %[r6],          14              \n\t"
    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
    "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
    "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
    "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
    "addiu      %[r0],      %[r0],          0x1000          \n\t"
    "addiu      %[r2],      %[r2],          0x1000          \n\t"
    "addiu      %[r4],      %[r4],          0x1000          \n\t"
    "addiu      %[r6],      %[r6],          0x1000          \n\t"
    "sra        %[r0],      %[r0],          13              \n\t"
    "sra        %[r2],      %[r2],          13              \n\t"
    "sra        %[r4],      %[r4],          13              \n\t"
    "sra        %[r6],      %[r6],          13              \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
    "lh         %[r1],      0(%[synthBuf])                  \n\t"
    "lh         %[r3],      2(%[synthBuf])                  \n\t"
    "lh         %[r5],      4(%[synthBuf])                  \n\t"
    "lh         %[r7],      6(%[synthBuf])                  \n\t"
    "addu       %[r0],      %[r0],          %[r1]           \n\t"
    "addu       %[r2],      %[r2],          %[r3]           \n\t"
    "addu       %[r4],      %[r4],          %[r5]           \n\t"
    "addu       %[r6],      %[r6],          %[r7]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
    "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
    "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
    "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
    "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
    "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
    "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
    "sh         %[r0],      0(%[synthBuf])                  \n\t"
    "sh         %[r2],      2(%[synthBuf])                  \n\t"
    "sh         %[r4],      4(%[synthBuf])                  \n\t"
    "sh         %[r6],      6(%[synthBuf])                  \n\t"
    "addiu      %[window],  %[window],      8               \n\t"
    "addiu      %[real],    %[real],        8               \n\t"
    "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
    "b          4b                                          \n\t"
    " addiu     %[iters],   %[iters],       -1              \n\t"
   "5:                                                      \n\t"
    "blez       %[after],   6f                              \n\t"
    " nop                                                   \n\t"
    "lh         %[r0],      0(%[window])                    \n\t"
    "lh         %[r1],      0(%[real])                      \n\t"
    "mul        %[r0],      %[r0],          %[r1]           \n\t"
    "addiu      %[window],  %[window],      2               \n\t"
    "addiu      %[real],    %[real],        2               \n\t"
    "addiu      %[r0],      %[r0],          0x2000          \n\t"
    "sra        %[r0],      %[r0],          14              \n\t"
    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
    "addiu      %[r0],      %[r0],          0x1000          \n\t"
    "sra        %[r0],      %[r0],          13              \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "lh         %[r1],      0(%[synthBuf])                  \n\t"
    "addu       %[r0],      %[r0],          %[r1]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
    "sh         %[r0],      0(%[synthBuf])                  \n\t"
    "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
    "b          2b                                          \n\t"
    " addiu     %[after],   %[after],       -1              \n\t"
   "6:                                                      \n\t"
    ".set       pop                                         \n\t"
    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
      [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
      [after] "+r" (after), [block10] "+r" (block10),
      [window] "+r" (window), [real] "+r" (real),
      [synthBuf] "+r" (synthBuf), [out] "+r" (out)
    : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
      [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
    : "memory", "hi", "lo"
  );

  // update synthesis buffer
  memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
  WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
      + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
}

// Filter the data in the frequency domain, and create spectrum.
void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
                                    int16_t* freq_buf) {
  uint16_t *noiseSupFilter = inst->noiseSupFilter;
  int16_t *real = inst->real;
  int16_t *imag = inst->imag;
  int32_t loop_count = 2;
  int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
  int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4;
  int16_t* freq_buf_f = freq_buf;
  int16_t* freq_buf_s = &freq_buf[tmp16];

  __asm __volatile (
    ".set       push                                                 \n\t"
    ".set       noreorder                                            \n\t"
    //first sample
    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
    "lh         %[tmp_2],           0(%[real])                       \n\t"
    "lh         %[tmp_3],           0(%[imag])                       \n\t"
    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
    "sh         %[tmp_2],           0(%[real])                       \n\t"
    "sh         %[tmp_3],           0(%[imag])                       \n\t"
    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
    "addiu      %[real],            %[real],              2          \n\t"
    "addiu      %[imag],            %[imag],              2          \n\t"
    "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
    "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
   "1:                                                               \n\t"
    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
    "lh         %[tmp_2],           0(%[real])                       \n\t"
    "lh         %[tmp_3],           0(%[imag])                       \n\t"
    "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
    "lh         %[tmp_5],           2(%[real])                       \n\t"
    "lh         %[tmp_6],           2(%[imag])                       \n\t"
    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
    "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
    "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
    "addiu      %[loop_count],      %[loop_count],        2          \n\t"
    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
    "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
    "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
    "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
    "sh         %[tmp_2],           0(%[real])                       \n\t"
    "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
    "sh         %[tmp_3],           0(%[imag])                       \n\t"
    "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
    "sh         %[tmp_5],           2(%[real])                       \n\t"
    "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
    "sh         %[tmp_6],           2(%[imag])                       \n\t"
    "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
    "negu       %[tmp_6],           %[tmp_6]                         \n\t"
    "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
    "addiu      %[real],            %[real],              4          \n\t"
    "addiu      %[imag],            %[imag],              4          \n\t"
    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
    "blt        %[loop_count],      %[loop_size],         1b         \n\t"
    " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
    //last two samples:
    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
    "lh         %[tmp_2],           0(%[real])                       \n\t"
    "lh         %[tmp_3],           0(%[imag])                       \n\t"
    "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
    "lh         %[tmp_5],           2(%[real])                       \n\t"
    "lh         %[tmp_6],           2(%[imag])                       \n\t"
    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
    "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
    "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
    "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
    "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
    "sh         %[tmp_2],           0(%[real])                       \n\t"
    "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
    "sh         %[tmp_3],           0(%[imag])                       \n\t"
    "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
    "sh         %[tmp_5],           2(%[real])                       \n\t"
    "sh         %[tmp_6],           2(%[imag])                       \n\t"
    ".set       pop                                                  \n\t"
    : [real] "+r" (real), [imag] "+r" (imag),
      [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
      [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
      [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
      [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
    : [loop_size] "r" (inst->anaLen2)
    : "memory", "hi", "lo"
  );
}

#if defined(MIPS_DSP_R1_LE)
// Denormalize the real-valued signal |in|, the output from inverse FFT.
void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
                                int16_t* in,
                                int factor) {
  int32_t r0, r1, r2, r3, t0;
  int len = (int)inst->anaLen;
  int16_t *out = &inst->real[0];
  int shift = factor - inst->normData;

  __asm __volatile (
    ".set          push                                \n\t"
    ".set          noreorder                           \n\t"
    "beqz          %[len],     8f                      \n\t"
    " nop                                              \n\t"
    "bltz          %[shift],   4f                      \n\t"
    " sra          %[t0],      %[len],      2          \n\t"
    "beqz          %[t0],      2f                      \n\t"
    " andi         %[len],     %[len],      3          \n\t"
   "1:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "lh            %[r1],      2(%[in])                \n\t"
    "lh            %[r2],      4(%[in])                \n\t"
    "lh            %[r3],      6(%[in])                \n\t"
    "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
    "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
    "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
    "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
    "addiu         %[in],      %[in],       8          \n\t"
    "addiu         %[t0],      %[t0],       -1         \n\t"
    "sh            %[r0],      0(%[out])               \n\t"
    "sh            %[r1],      2(%[out])               \n\t"
    "sh            %[r2],      4(%[out])               \n\t"
    "sh            %[r3],      6(%[out])               \n\t"
    "bgtz          %[t0],      1b                      \n\t"
    " addiu        %[out],     %[out],      8          \n\t"
   "2:                                                 \n\t"
    "beqz          %[len],     8f                      \n\t"
    " nop                                              \n\t"
   "3:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "addiu         %[in],      %[in],       2          \n\t"
    "addiu         %[len],     %[len],      -1         \n\t"
    "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
    "addiu         %[out],     %[out],      2          \n\t"
    "bgtz          %[len],     3b                      \n\t"
    " sh           %[r0],      -2(%[out])              \n\t"
    "b             8f                                  \n\t"
   "4:                                                 \n\t"
    "negu          %[shift],   %[shift]                \n\t"
    "beqz          %[t0],      6f                      \n\t"
    " andi         %[len],     %[len],      3          \n\t"
   "5:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "lh            %[r1],      2(%[in])                \n\t"
    "lh            %[r2],      4(%[in])                \n\t"
    "lh            %[r3],      6(%[in])                \n\t"
    "srav          %[r0],      %[r0],       %[shift]   \n\t"
    "srav          %[r1],      %[r1],       %[shift]   \n\t"
    "srav          %[r2],      %[r2],       %[shift]   \n\t"
    "srav          %[r3],      %[r3],       %[shift]   \n\t"
    "addiu         %[in],      %[in],       8          \n\t"
    "addiu         %[t0],      %[t0],       -1         \n\t"
    "sh            %[r0],      0(%[out])               \n\t"
    "sh            %[r1],      2(%[out])               \n\t"
    "sh            %[r2],      4(%[out])               \n\t"
    "sh            %[r3],      6(%[out])               \n\t"
    "bgtz          %[t0],      5b                      \n\t"
    " addiu        %[out],     %[out],      8          \n\t"
   "6:                                                 \n\t"
    "beqz          %[len],     8f                      \n\t"
    " nop                                              \n\t"
   "7:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "addiu         %[in],      %[in],       2          \n\t"
    "addiu         %[len],     %[len],      -1         \n\t"
    "srav          %[r0],      %[r0],       %[shift]   \n\t"
    "addiu         %[out],     %[out],      2          \n\t"
    "bgtz          %[len],     7b                      \n\t"
    " sh           %[r0],      -2(%[out])              \n\t"
   "8:                                                 \n\t"
    ".set          pop                                 \n\t"
    : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
      [r2] "=&r" (r2), [r3] "=&r" (r3)
    : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
      [out] "r" (out)
    : "memory"
  );
}
#endif

// Normalize the real-valued signal |in|, the input to forward FFT.
void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
                                        const int16_t* in,
                                        int16_t* out) {
  int32_t r0, r1, r2, r3, t0;
  int len = (int)inst->anaLen;
  int shift = inst->normData;

  __asm __volatile (
    ".set          push                                \n\t"
    ".set          noreorder                           \n\t"
    "beqz          %[len],     4f                      \n\t"
    " sra          %[t0],      %[len],      2          \n\t"
    "beqz          %[t0],      2f                      \n\t"
    " andi         %[len],     %[len],      3          \n\t"
   "1:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "lh            %[r1],      2(%[in])                \n\t"
    "lh            %[r2],      4(%[in])                \n\t"
    "lh            %[r3],      6(%[in])                \n\t"
    "sllv          %[r0],      %[r0],       %[shift]   \n\t"
    "sllv          %[r1],      %[r1],       %[shift]   \n\t"
    "sllv          %[r2],      %[r2],       %[shift]   \n\t"
    "sllv          %[r3],      %[r3],       %[shift]   \n\t"
    "addiu         %[in],      %[in],       8          \n\t"
    "addiu         %[t0],      %[t0],       -1         \n\t"
    "sh            %[r0],      0(%[out])               \n\t"
    "sh            %[r1],      2(%[out])               \n\t"
    "sh            %[r2],      4(%[out])               \n\t"
    "sh            %[r3],      6(%[out])               \n\t"
    "bgtz          %[t0],      1b                      \n\t"
    " addiu        %[out],     %[out],      8          \n\t"
   "2:                                                 \n\t"
    "beqz          %[len],     4f                      \n\t"
    " nop                                              \n\t"
   "3:                                                 \n\t"
    "lh            %[r0],      0(%[in])                \n\t"
    "addiu         %[in],      %[in],       2          \n\t"
    "addiu         %[len],     %[len],      -1         \n\t"
    "sllv          %[r0],      %[r0],       %[shift]   \n\t"
    "addiu         %[out],     %[out],      2          \n\t"
    "bgtz          %[len],     3b                      \n\t"
    " sh           %[r0],      -2(%[out])              \n\t"
   "4:                                                 \n\t"
    ".set          pop                                 \n\t"
    : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
      [r2] "=&r" (r2), [r3] "=&r" (r3)
    : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
      [out] "r" (out)
    : "memory"
  );
}