1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 13 #include <arm_neon.h> 14 15 static inline void DotProductWithScaleNeon(int32_t* cross_correlation, 16 const int16_t* vector1, 17 const int16_t* vector2, 18 size_t length, 19 int scaling) { 20 size_t i = 0; 21 size_t len1 = length >> 3; 22 size_t len2 = length & 7; 23 int64x2_t sum0 = vdupq_n_s64(0); 24 int64x2_t sum1 = vdupq_n_s64(0); 25 26 for (i = len1; i > 0; i -= 1) { 27 int16x8_t seq1_16x8 = vld1q_s16(vector1); 28 int16x8_t seq2_16x8 = vld1q_s16(vector2); 29 #if defined(WEBRTC_ARCH_ARM64) 30 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), 31 vget_low_s16(seq2_16x8)); 32 int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); 33 #else 34 int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), 35 vget_low_s16(seq2_16x8)); 36 int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8), 37 vget_high_s16(seq2_16x8)); 38 #endif 39 sum0 = vpadalq_s32(sum0, tmp0); 40 sum1 = vpadalq_s32(sum1, tmp1); 41 vector1 += 8; 42 vector2 += 8; 43 } 44 45 // Calculate the rest of the samples. 46 int64_t sum_res = 0; 47 for (i = len2; i > 0; i -= 1) { 48 sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2); 49 vector1++; 50 vector2++; 51 } 52 53 sum0 = vaddq_s64(sum0, sum1); 54 #if defined(WEBRTC_ARCH_ARM64) 55 int64_t sum2 = vaddvq_s64(sum0); 56 *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling); 57 #else 58 int64x1_t shift = vdup_n_s64(-scaling); 59 int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0)); 60 sum2 = vadd_s64(sum2, vdup_n_s64(sum_res)); 61 sum2 = vshl_s64(sum2, shift); 62 vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0); 63 #endif 64 } 65 66 /* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */ 67 void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, 68 const int16_t* seq1, 69 const int16_t* seq2, 70 size_t dim_seq, 71 size_t dim_cross_correlation, 72 int right_shifts, 73 int step_seq2) { 74 size_t i = 0; 75 76 for (i = 0; i < dim_cross_correlation; i++) { 77 const int16_t* seq1_ptr = seq1; 78 const int16_t* seq2_ptr = seq2 + (step_seq2 * i); 79 80 DotProductWithScaleNeon(cross_correlation, 81 seq1_ptr, 82 seq2_ptr, 83 dim_seq, 84 right_shifts); 85 cross_correlation++; 86 } 87 } 88