1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 /* 13 * This file contains the function WebRtcSpl_CrossCorrelation(). 14 * The description header can be found in signal_processing_library.h 15 * 16 */ 17 18 #include "signal_processing_library.h" 19 20 void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation, WebRtc_Word16* seq1, 21 WebRtc_Word16* seq2, WebRtc_Word16 dim_seq, 22 WebRtc_Word16 dim_cross_correlation, 23 WebRtc_Word16 right_shifts, 24 WebRtc_Word16 step_seq2) 25 { 26 int i, j; 27 WebRtc_Word16* seq1Ptr; 28 WebRtc_Word16* seq2Ptr; 29 WebRtc_Word32* CrossCorrPtr; 30 31 #ifdef _XSCALE_OPT_ 32 33 #ifdef _WIN32 34 #pragma message("NOTE: _XSCALE_OPT_ optimizations are used (overrides _ARM_OPT_ and requires /QRxscale compiler flag)") 35 #endif 36 37 __int64 macc40; 38 39 int iseq1[250]; 40 int iseq2[250]; 41 int iseq3[250]; 42 int * iseq1Ptr; 43 int * iseq2Ptr; 44 int * iseq3Ptr; 45 int len, i_len; 46 47 seq1Ptr = seq1; 48 iseq1Ptr = iseq1; 49 for(i = 0; i < ((dim_seq + 1) >> 1); i++) 50 { 51 *iseq1Ptr = (unsigned short)*seq1Ptr++; 52 *iseq1Ptr++ |= (WebRtc_Word32)*seq1Ptr++ << 16; 53 54 } 55 56 if(dim_seq%2) 57 { 58 *(iseq1Ptr-1) &= 0x0000ffff; 59 } 60 *iseq1Ptr = 0; 61 iseq1Ptr++; 62 *iseq1Ptr = 0; 63 iseq1Ptr++; 64 *iseq1Ptr = 0; 65 66 if(step_seq2 < 0) 67 { 68 seq2Ptr = seq2 - dim_cross_correlation + 1; 69 CrossCorrPtr = &cross_correlation[dim_cross_correlation - 1]; 70 } 71 else 72 { 73 seq2Ptr = seq2; 74 CrossCorrPtr = cross_correlation; 75 } 76 77 len = dim_seq + dim_cross_correlation - 1; 78 i_len = (len + 1) >> 1; 79 iseq2Ptr = iseq2; 80 81 iseq3Ptr = iseq3; 82 for(i = 0; i < i_len; i++) 83 { 84 *iseq2Ptr = (unsigned short)*seq2Ptr++; 85 *iseq3Ptr = (unsigned short)*seq2Ptr; 86 *iseq2Ptr++ |= (WebRtc_Word32)*seq2Ptr++ << 16; 87 *iseq3Ptr++ |= (WebRtc_Word32)*seq2Ptr << 16; 88 } 89 90 if(len % 2) 91 { 92 iseq2[i_len - 1] &= 0x0000ffff; 93 iseq3[i_len - 1] = 0; 94 } 95 else 96 iseq3[i_len - 1] &= 0x0000ffff; 97 98 iseq2[i_len] = 0; 99 iseq3[i_len] = 0; 100 iseq2[i_len + 1] = 0; 101 iseq3[i_len + 1] = 0; 102 iseq2[i_len + 2] = 0; 103 iseq3[i_len + 2] = 0; 104 105 // Set pointer to start value 106 iseq2Ptr = iseq2; 107 iseq3Ptr = iseq3; 108 109 i_len = (dim_seq + 7) >> 3; 110 for (i = 0; i < dim_cross_correlation; i++) 111 { 112 113 iseq1Ptr = iseq1; 114 115 macc40 = 0; 116 117 _WriteCoProcessor(macc40, 0); 118 119 if((i & 1)) 120 { 121 iseq3Ptr = iseq3 + (i >> 1); 122 for (j = i_len; j > 0; j--) 123 { 124 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++); 125 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++); 126 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++); 127 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq3Ptr++); 128 } 129 } 130 else 131 { 132 iseq2Ptr = iseq2 + (i >> 1); 133 for (j = i_len; j > 0; j--) 134 { 135 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++); 136 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++); 137 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++); 138 _SmulAddPack_2SW_ACC(*iseq1Ptr++, *iseq2Ptr++); 139 } 140 141 } 142 143 macc40 = _ReadCoProcessor(0); 144 *CrossCorrPtr = (WebRtc_Word32)(macc40 >> right_shifts); 145 CrossCorrPtr += step_seq2; 146 } 147 #else // #ifdef _XSCALE_OPT_ 148 #ifdef _ARM_OPT_ 149 WebRtc_Word16 dim_seq8 = (dim_seq >> 3) << 3; 150 #endif 151 152 CrossCorrPtr = cross_correlation; 153 154 for (i = 0; i < dim_cross_correlation; i++) 155 { 156 // Set the pointer to the static vector, set the pointer to the sliding vector 157 // and initialize cross_correlation 158 seq1Ptr = seq1; 159 seq2Ptr = seq2 + (step_seq2 * i); 160 (*CrossCorrPtr) = 0; 161 162 #ifndef _ARM_OPT_ 163 #ifdef _WIN32 164 #pragma message("NOTE: default implementation is used") 165 #endif 166 // Perform the cross correlation 167 for (j = 0; j < dim_seq; j++) 168 { 169 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), right_shifts); 170 seq1Ptr++; 171 seq2Ptr++; 172 } 173 #else 174 #ifdef _WIN32 175 #pragma message("NOTE: _ARM_OPT_ optimizations are used") 176 #endif 177 if (right_shifts == 0) 178 { 179 // Perform the optimized cross correlation 180 for (j = 0; j < dim_seq8; j = j + 8) 181 { 182 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 183 seq1Ptr++; 184 seq2Ptr++; 185 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 186 seq1Ptr++; 187 seq2Ptr++; 188 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 189 seq1Ptr++; 190 seq2Ptr++; 191 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 192 seq1Ptr++; 193 seq2Ptr++; 194 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 195 seq1Ptr++; 196 seq2Ptr++; 197 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 198 seq1Ptr++; 199 seq2Ptr++; 200 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 201 seq1Ptr++; 202 seq2Ptr++; 203 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 204 seq1Ptr++; 205 seq2Ptr++; 206 } 207 208 for (j = dim_seq8; j < dim_seq; j++) 209 { 210 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16((*seq1Ptr), (*seq2Ptr)); 211 seq1Ptr++; 212 seq2Ptr++; 213 } 214 } 215 else // right_shifts != 0 216 217 { 218 // Perform the optimized cross correlation 219 for (j = 0; j < dim_seq8; j = j + 8) 220 { 221 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 222 right_shifts); 223 seq1Ptr++; 224 seq2Ptr++; 225 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 226 right_shifts); 227 seq1Ptr++; 228 seq2Ptr++; 229 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 230 right_shifts); 231 seq1Ptr++; 232 seq2Ptr++; 233 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 234 right_shifts); 235 seq1Ptr++; 236 seq2Ptr++; 237 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 238 right_shifts); 239 seq1Ptr++; 240 seq2Ptr++; 241 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 242 right_shifts); 243 seq1Ptr++; 244 seq2Ptr++; 245 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 246 right_shifts); 247 seq1Ptr++; 248 seq2Ptr++; 249 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 250 right_shifts); 251 seq1Ptr++; 252 seq2Ptr++; 253 } 254 255 for (j = dim_seq8; j < dim_seq; j++) 256 { 257 (*CrossCorrPtr) += WEBRTC_SPL_MUL_16_16_RSFT((*seq1Ptr), (*seq2Ptr), 258 right_shifts); 259 seq1Ptr++; 260 seq2Ptr++; 261 } 262 } 263 #endif 264 CrossCorrPtr++; 265 } 266 #endif 267 } 268