Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 #include <assert.h>
     13 
     14 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
     15 
     16 // Autocorrelation function in fixed point.
     17 // NOTE! Different from SPLIB-version in how it scales the signal.
     18 int WebRtcIsacfix_AutocorrNeon(int32_t* __restrict r,
     19                                const int16_t* x,
     20                                int16_t n,
     21                                int16_t order,
     22                                int16_t* __restrict scale) {
     23   int i = 0;
     24   int16_t scaling = 0;
     25   uint32_t temp = 0;
     26   int64_t prod = 0;
     27   int64_t prod_tail = 0;
     28 
     29   assert(n % 4 == 0);
     30   assert(n >= 8);
     31 
     32   // Calculate r[0].
     33   int16x4_t x0_v;
     34   int32x4_t tmpa0_v;
     35   int64x2_t tmpb_v;
     36 
     37   tmpb_v = vdupq_n_s64(0);
     38   const int16_t* x_start = x;
     39   const int16_t* x_end0 = x_start + n;
     40   while (x_start < x_end0) {
     41     x0_v = vld1_s16(x_start);
     42     tmpa0_v = vmull_s16(x0_v, x0_v);
     43     tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v);
     44     x_start += 4;
     45   }
     46 
     47 #ifdef WEBRTC_ARCH_ARM64
     48   prod = vaddvq_s64(tmpb_v);
     49 #else
     50   prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)),
     51                        0);
     52 #endif
     53   // Calculate scaling (the value of shifting).
     54   temp = (uint32_t)(prod >> 31);
     55 
     56   scaling = temp ? 32 - WebRtcSpl_NormU32(temp) : 0;
     57   r[0] = (int32_t)(prod >> scaling);
     58 
     59   int16x8_t x1_v;
     60   int16x8_t y_v;
     61   int32x4_t tmpa1_v;
     62   // Perform the actual correlation calculation.
     63   for (i = 1; i < order + 1; i++) {
     64     tmpb_v = vdupq_n_s64(0);
     65     int rest = (n - i) % 8;
     66     x_start = x;
     67     x_end0 = x_start + n - i - rest;
     68     const int16_t* y_start = x_start + i;
     69     while (x_start < x_end0) {
     70       x1_v = vld1q_s16(x_start);
     71       y_v = vld1q_s16(y_start);
     72       tmpa0_v = vmull_s16(vget_low_s16(x1_v), vget_low_s16(y_v));
     73 #ifdef WEBRTC_ARCH_ARM64
     74       tmpa1_v = vmull_high_s16(x1_v, y_v);
     75 #else
     76       tmpa1_v = vmull_s16(vget_high_s16(x1_v), vget_high_s16(y_v));
     77 #endif
     78       tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v);
     79       tmpb_v = vpadalq_s32(tmpb_v, tmpa1_v);
     80       x_start += 8;
     81       y_start += 8;
     82     }
     83     // The remaining calculation.
     84     const int16_t* x_end1 = x + n - i;
     85     if (rest >= 4) {
     86         int16x4_t x2_v = vld1_s16(x_start);
     87         int16x4_t y2_v = vld1_s16(y_start);
     88         tmpa0_v = vmull_s16(x2_v, y2_v);
     89         tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v);
     90         x_start += 4;
     91         y_start += 4;
     92     }
     93 #ifdef WEBRTC_ARCH_ARM64
     94     prod = vaddvq_s64(tmpb_v);
     95 #else
     96     prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)),
     97                          0);
     98 #endif
     99 
    100     prod_tail = 0;
    101     while (x_start < x_end1) {
    102       prod_tail += *x_start * *y_start;
    103       ++x_start;
    104       ++y_start;
    105     }
    106 
    107     r[i] = (int32_t)((prod + prod_tail) >> scaling);
    108   }
    109 
    110   *scale = scaling;
    111 
    112   return order + 1;
    113 }
    114 
    115