1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <arm_neon.h> 12 #include <assert.h> 13 14 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" 15 16 // Autocorrelation function in fixed point. 17 // NOTE! Different from SPLIB-version in how it scales the signal. 18 int WebRtcIsacfix_AutocorrNeon(int32_t* __restrict r, 19 const int16_t* x, 20 int16_t n, 21 int16_t order, 22 int16_t* __restrict scale) { 23 int i = 0; 24 int16_t scaling = 0; 25 uint32_t temp = 0; 26 int64_t prod = 0; 27 int64_t prod_tail = 0; 28 29 assert(n % 4 == 0); 30 assert(n >= 8); 31 32 // Calculate r[0]. 33 int16x4_t x0_v; 34 int32x4_t tmpa0_v; 35 int64x2_t tmpb_v; 36 37 tmpb_v = vdupq_n_s64(0); 38 const int16_t* x_start = x; 39 const int16_t* x_end0 = x_start + n; 40 while (x_start < x_end0) { 41 x0_v = vld1_s16(x_start); 42 tmpa0_v = vmull_s16(x0_v, x0_v); 43 tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); 44 x_start += 4; 45 } 46 47 #ifdef WEBRTC_ARCH_ARM64 48 prod = vaddvq_s64(tmpb_v); 49 #else 50 prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)), 51 0); 52 #endif 53 // Calculate scaling (the value of shifting). 54 temp = (uint32_t)(prod >> 31); 55 56 scaling = temp ? 32 - WebRtcSpl_NormU32(temp) : 0; 57 r[0] = (int32_t)(prod >> scaling); 58 59 int16x8_t x1_v; 60 int16x8_t y_v; 61 int32x4_t tmpa1_v; 62 // Perform the actual correlation calculation. 63 for (i = 1; i < order + 1; i++) { 64 tmpb_v = vdupq_n_s64(0); 65 int rest = (n - i) % 8; 66 x_start = x; 67 x_end0 = x_start + n - i - rest; 68 const int16_t* y_start = x_start + i; 69 while (x_start < x_end0) { 70 x1_v = vld1q_s16(x_start); 71 y_v = vld1q_s16(y_start); 72 tmpa0_v = vmull_s16(vget_low_s16(x1_v), vget_low_s16(y_v)); 73 #ifdef WEBRTC_ARCH_ARM64 74 tmpa1_v = vmull_high_s16(x1_v, y_v); 75 #else 76 tmpa1_v = vmull_s16(vget_high_s16(x1_v), vget_high_s16(y_v)); 77 #endif 78 tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); 79 tmpb_v = vpadalq_s32(tmpb_v, tmpa1_v); 80 x_start += 8; 81 y_start += 8; 82 } 83 // The remaining calculation. 84 const int16_t* x_end1 = x + n - i; 85 if (rest >= 4) { 86 int16x4_t x2_v = vld1_s16(x_start); 87 int16x4_t y2_v = vld1_s16(y_start); 88 tmpa0_v = vmull_s16(x2_v, y2_v); 89 tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); 90 x_start += 4; 91 y_start += 4; 92 } 93 #ifdef WEBRTC_ARCH_ARM64 94 prod = vaddvq_s64(tmpb_v); 95 #else 96 prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)), 97 0); 98 #endif 99 100 prod_tail = 0; 101 while (x_start < x_end1) { 102 prod_tail += *x_start * *y_start; 103 ++x_start; 104 ++y_start; 105 } 106 107 r[i] = (int32_t)((prod + prod_tail) >> scaling); 108 } 109 110 *scale = scaling; 111 112 return order + 1; 113 } 114 115