1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /**************************************************************************************** 19 Portions of this file are derived from the following 3GPP standard: 20 21 3GPP TS 26.073 22 ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec 23 Available from http://www.3gpp.org 24 25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC) 26 Permission to distribute, modify and use this file under the standard license 27 terms listed above has been obtained from the copyright holder. 28 ****************************************************************************************/ 29 /* 30 ------------------------------------------------------------------------------ 31 32 33 34 Pathname: ./audio/gsm-amr/c/src/cor_h.c 35 36 Date: 06/12/2000 37 38 ------------------------------------------------------------------------------ 39 REVISION HISTORY 40 41 Description: Updated template used to PV coding template. First attempt at 42 optimizing C code. 43 44 Description: Used MAX_16 and MIN_16 when checking the result of Inv_sqrt. 45 Synced up to the new template. 46 47 Description: Added setting of Overflow flag in inlined code. 48 49 Description: Took out cor_h_x function and put it in its own file. Sync'ed 50 up with the single_func_template.c template. Delete version 51 ID variable. 52 53 Description: Synchronized file with UTMS version 3.2.0. Updated coding 54 template. Removed unnecessary include files. 55 56 Description: Fixed portion of the code that builds the rr[] matrix. There 57 was an error in the original inlining of code that caused 58 the code to be not bit-exact with UMTS version 3.2.0. 59 60 Description: Added calls to L_add() and mult() in the code to handle overflow 61 scenario. Moved cor_h.h after cnst.h in the Include section. 62 Doing this allows the unit test to build using the cnst.h in the 63 /test/include directory. Fixed initialization of the accumulator 64 in the first calculation of the sum of squares. 65 66 Description: Made the following changes per comments from Phase 2/3 review: 67 1. Used #define value instead of hard-coded numbers in the code. 68 2. Fixed typecasting issue with TI C compiler. 69 3. Removed typecasting of 0x00008000L in the call to L_add. 70 71 Description: Changed pOverflow from a global variable into a function 72 parameter. 73 74 Description: 75 1. Added pointer to avoid adding offsets in every pass 76 2. Eliminate variables defined as registers 77 3. Removed extra check for overflow by doing scaling right 78 after overflow is detected. 79 4. Eliminated calls to basic operations (like extract) not 80 needed because of the nature of the number (all bounded) 81 5. Eliminated duplicate loop accessing same data 82 6. Simplified matrix addressing by use of pointers 83 84 Description: 85 1. Eliminated unused include files. 86 2. Access twice the number of points when delaing with matrices 87 and in the process only 3 pointers (instead of 4) are needed 88 3. Replaced array addressing (array sign[]) by pointers 89 90 Description: Changed round function name to pv_round to avoid conflict with 91 round function in C standard library. 92 93 Description: Using inlines from fxp_arithmetic.h . 94 95 Description: Replacing fxp_arithmetic.h with basic_op.h. 96 97 Description: 98 99 ------------------------------------------------------------------------------ 100 */ 101 102 /*---------------------------------------------------------------------------- 103 ; INCLUDES 104 ----------------------------------------------------------------------------*/ 105 #include "cnst.h" 106 #include "cor_h.h" 107 #include "basicop_malloc.h" 108 #include "inv_sqrt.h" 109 #include "basic_op.h" 110 111 /*---------------------------------------------------------------------------- 112 ; MACROS 113 ; Define module specific macros here 114 ----------------------------------------------------------------------------*/ 115 116 /*---------------------------------------------------------------------------- 117 ; DEFINES 118 ; Include all pre-processor statements here. Include conditional 119 ; compile variables also. 120 ----------------------------------------------------------------------------*/ 121 122 /*---------------------------------------------------------------------------- 123 ; LOCAL FUNCTION DEFINITIONS 124 ; Function Prototype declaration 125 ----------------------------------------------------------------------------*/ 126 127 /*---------------------------------------------------------------------------- 128 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS 129 ; Variable declaration - defined here and used outside this module 130 ----------------------------------------------------------------------------*/ 131 132 /* 133 ------------------------------------------------------------------------------ 134 FUNCTION NAME: cor_h 135 ------------------------------------------------------------------------------ 136 INPUT AND OUTPUT DEFINITIONS 137 138 Inputs: 139 h = vector containing the impulse response of the weighted synthesis 140 filter; vector contents are of type Word16; vector length is 141 2 * L_SUBFR 142 sign = vector containing the sign information for the correlation 143 values; vector contents are of type Word16; vector length is 144 L_CODE 145 rr = autocorrelation matrix; matrix contents are of type Word16; 146 matrix dimension is L_CODE by L_CODE 147 148 Outputs: 149 rr contents are the newly calculated autocorrelation values 150 151 Returns: 152 None 153 154 Global Variables Used: 155 None 156 157 Local Variables Needed: 158 None 159 160 ------------------------------------------------------------------------------ 161 FUNCTION DESCRIPTION 162 163 This function computes correlations of the impulse response (h) needed for 164 the codebook search, and includes the sign information into the correlations. 165 166 The correlations are given by: 167 rr[i][j] = sum_{n=i}^{L-1} h[n-i] h[n-j]; i>=j; i,j=0,...,L-1 168 169 The sign information is included by: 170 rr[i][j] = rr[i][j]*sign[i]*sign[j] 171 172 ------------------------------------------------------------------------------ 173 REQUIREMENTS 174 175 None 176 177 ------------------------------------------------------------------------------ 178 REFERENCES 179 180 cor_h.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001 181 182 ------------------------------------------------------------------------------ 183 PSEUDO-CODE 184 185 void cor_h ( 186 Word16 h[], // (i) : impulse response of weighted synthesis 187 filter 188 Word16 sign[], // (i) : sign of d[n] 189 Word16 rr[][L_CODE] // (o) : matrix of autocorrelation 190 ) 191 { 192 Word16 i, j, k, dec, h2[L_CODE]; 193 Word32 s; 194 195 // Scaling for maximum precision 196 197 s = 2; 198 for (i = 0; i < L_CODE; i++) 199 s = L_mac (s, h[i], h[i]); 200 201 j = sub (extract_h (s), 32767); 202 if (j == 0) 203 { 204 for (i = 0; i < L_CODE; i++) 205 { 206 h2[i] = shr (h[i], 1); 207 } 208 } 209 else 210 { 211 s = L_shr (s, 1); 212 k = extract_h (L_shl (Inv_sqrt (s), 7)); 213 k = mult (k, 32440); // k = 0.99*k 214 215 for (i = 0; i < L_CODE; i++) 216 { 217 h2[i] = pv_round (L_shl (L_mult (h[i], k), 9)); 218 } 219 } 220 221 // build matrix rr[] 222 s = 0; 223 i = L_CODE - 1; 224 for (k = 0; k < L_CODE; k++, i--) 225 { 226 s = L_mac (s, h2[k], h2[k]); 227 rr[i][i] = pv_round (s); 228 } 229 230 for (dec = 1; dec < L_CODE; dec++) 231 { 232 s = 0; 233 j = L_CODE - 1; 234 i = sub (j, dec); 235 for (k = 0; k < (L_CODE - dec); k++, i--, j--) 236 { 237 s = L_mac (s, h2[k], h2[k + dec]); 238 rr[j][i] = mult (pv_round (s), mult (sign[i], sign[j])); 239 rr[i][j] = rr[j][i]; 240 } 241 } 242 } 243 244 --------------------------------------------------------------------------- 245 RESOURCES USED [optional] 246 247 When the code is written for a specific target processor the 248 the resources used should be documented below. 249 250 HEAP MEMORY USED: x bytes 251 252 STACK MEMORY USED: x bytes 253 254 CLOCK CYCLES: (cycle count equation for this function) + (variable 255 used to represent cycle count for each subroutine 256 called) 257 where: (cycle count variable) = cycle count for [subroutine 258 name] 259 260 ------------------------------------------------------------------------------ 261 CAUTION [optional] 262 [State any special notes, constraints or cautions for users of this function] 263 264 ------------------------------------------------------------------------------ 265 */ 266 267 void cor_h( 268 Word16 h[], /* (i) : impulse response of weighted synthesis 269 filter */ 270 Word16 sign[], /* (i) : sign of d[n] */ 271 Word16 rr[][L_CODE], /* (o) : matrix of autocorrelation */ 272 Flag *pOverflow 273 ) 274 { 275 Word16 i; 276 Word16 dec; 277 278 Word16 h2[L_CODE]; 279 Word32 s; 280 Word32 s2; 281 Word16 tmp1; 282 Word16 tmp2; 283 Word16 tmp11; 284 Word16 tmp22; 285 286 Word16 *p_h; 287 Word16 *p_h2; 288 Word16 *rr1; 289 Word16 *rr2; 290 Word16 *rr3; 291 Word16 *p_rr_ref1; 292 Word16 *p_sign1; 293 Word16 *p_sign2; 294 295 /* Scaling for maximum precision */ 296 297 /* Initialize accumulator to 1 since left shift happens */ 298 /* after the accumulation of the sum of squares (original */ 299 /* code initialized s to 2) */ 300 s = 1; 301 p_h = h; 302 303 for (i = (L_CODE >> 1); i != 0 ; i--) 304 { 305 tmp1 = *(p_h++); 306 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s); 307 tmp1 = *(p_h++); 308 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s); 309 310 } 311 312 s <<= 1; 313 314 if (s & MIN_32) 315 { 316 p_h2 = h2; 317 p_h = h; 318 319 for (i = (L_CODE >> 1); i != 0; i--) 320 { 321 *(p_h2++) = *(p_h++) >> 1; 322 *(p_h2++) = *(p_h++) >> 1; 323 } 324 } 325 else 326 { 327 328 s >>= 1; 329 330 s = Inv_sqrt(s, pOverflow); 331 332 if (s < (Word32) 0x00ffffffL) 333 { 334 /* k = 0.99*k */ 335 dec = (Word16)(((s >> 9) * 32440) >> 15); 336 } 337 else 338 { 339 dec = 32440; /* 0.99 */ 340 } 341 342 p_h = h; 343 p_h2 = h2; 344 345 for (i = (L_CODE >> 1); i != 0; i--) 346 { 347 *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6); 348 *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6); 349 } 350 } 351 /* build matrix rr[] */ 352 353 s = 0; 354 355 p_h2 = h2; 356 357 rr1 = &rr[L_CODE-1][L_CODE-1]; 358 359 for (i = L_CODE >> 1; i != 0 ; i--) 360 { 361 tmp1 = *(p_h2++); 362 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s); 363 *rr1 = (Word16)((s + 0x00004000L) >> 15); 364 rr1 -= (L_CODE + 1); 365 tmp1 = *(p_h2++); 366 s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s); 367 *rr1 = (Word16)((s + 0x00004000L) >> 15); 368 rr1 -= (L_CODE + 1); 369 } 370 371 372 p_rr_ref1 = rr[L_CODE-1]; 373 374 for (dec = 1; dec < L_CODE; dec += 2) 375 { 376 rr1 = &p_rr_ref1[L_CODE-1-dec]; 377 378 rr2 = &rr[L_CODE-1-dec][L_CODE-1]; 379 rr3 = &rr[L_CODE-1-(dec+1)][L_CODE-1]; 380 381 s = 0; 382 s2 = 0; 383 384 p_sign1 = &sign[L_CODE - 1]; 385 p_sign2 = &sign[L_CODE - 1 - dec]; 386 387 p_h2 = h2; 388 p_h = &h2[dec]; 389 390 for (i = (L_CODE - dec - 1); i != 0 ; i--) 391 { 392 s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h++), s); 393 s2 = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2++), (Word32) * (p_h), s2); 394 395 tmp1 = (Word16)((s + 0x00004000L) >> 15); 396 tmp11 = (Word16)((s2 + 0x00004000L) >> 15); 397 398 tmp2 = ((Word32) * (p_sign1) * *(p_sign2--)) >> 15; 399 tmp22 = ((Word32) * (p_sign1--) * *(p_sign2)) >> 15; 400 401 *rr2 = ((Word32) tmp1 * tmp2) >> 15; 402 *(rr1--) = *rr2; 403 *rr1 = ((Word32) tmp11 * tmp22) >> 15; 404 *rr3 = *rr1; 405 406 rr1 -= (L_CODE); 407 rr2 -= (L_CODE + 1); 408 rr3 -= (L_CODE + 1); 409 410 } 411 412 s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h), s); 413 414 tmp1 = (Word16)((s + 0x00004000L) >> 15); 415 416 tmp2 = ((Word32) * (p_sign1) * *(p_sign2)) >> 15; 417 *rr1 = ((Word32) tmp1 * tmp2) >> 15; 418 419 *rr2 = *rr1; 420 421 rr1 -= (L_CODE + 1); 422 rr2 -= (L_CODE + 1); 423 424 } 425 426 return; 427 428 } 429 430