1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /**************************************************************************************** 19 Portions of this file are derived from the following 3GPP standard: 20 21 3GPP TS 26.073 22 ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec 23 Available from http://www.3gpp.org 24 25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC) 26 Permission to distribute, modify and use this file under the standard license 27 terms listed above has been obtained from the copyright holder. 28 ****************************************************************************************/ 29 /* 30 ------------------------------------------------------------------------------ 31 32 Pathname: ./audio/gsm-amr/c/src/bgnscd.c 33 Functions: 34 Bgn_scd_reset 35 Bgn_scd 36 37 ------------------------------------------------------------------------------ 38 MODULE DESCRIPTION 39 40 Background noise source characteristic detector (SCD) 41 42 ------------------------------------------------------------------------------ 43 */ 44 45 46 /*---------------------------------------------------------------------------- 47 ; INCLUDES 48 ----------------------------------------------------------------------------*/ 49 #include <string.h> 50 51 #include "bgnscd.h" 52 #include "typedef.h" 53 #include "basic_op.h" 54 #include "cnst.h" 55 #include "copy.h" 56 #include "gmed_n.h" 57 #include "sqrt_l.h" 58 59 /*---------------------------------------------------------------------------- 60 ; MACROS 61 ; Define module specific macros here 62 ----------------------------------------------------------------------------*/ 63 64 65 /*---------------------------------------------------------------------------- 66 ; DEFINES 67 ; Include all pre-processor statements here. Include conditional 68 ; compile variables also. 69 ----------------------------------------------------------------------------*/ 70 #define TRUE 1 71 #define FALSE 0 72 73 /*---------------------------------------------------------------------------- 74 ; LOCAL FUNCTION DEFINITIONS 75 ; Function Prototype declaration 76 ----------------------------------------------------------------------------*/ 77 78 /*---------------------------------------------------------------------------- 79 ; LOCAL VARIABLE DEFINITIONS 80 ; Variable declaration - defined here and used outside this module 81 ----------------------------------------------------------------------------*/ 82 83 84 /* 85 ------------------------------------------------------------------------------ 86 FUNCTION NAME: Bgn_scd_reset 87 ------------------------------------------------------------------------------ 88 INPUT AND OUTPUT DEFINITIONS 89 90 Inputs: 91 state = points to memory of type Bgn_scdState. 92 93 Outputs: 94 The memory of type Bgn_scdState pointed to by state is set to all 95 zeros. 96 97 Returns: 98 Returns 0 if memory was successfully initialized, 99 otherwise returns -1. 100 101 Global Variables Used: 102 None. 103 104 Local Variables Needed: 105 None. 106 107 ------------------------------------------------------------------------------ 108 FUNCTION DESCRIPTION 109 110 Resets state memory. 111 112 ------------------------------------------------------------------------------ 113 REQUIREMENTS 114 115 None 116 117 ------------------------------------------------------------------------------ 118 REFERENCES 119 120 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001 121 122 ------------------------------------------------------------------------------ 123 PSEUDO-CODE 124 125 Word16 Bgn_scd_reset (Bgn_scdState *state) 126 { 127 if (state == (Bgn_scdState *) NULL){ 128 fprintf(stderr, "Bgn_scd_reset: invalid parameter\n"); 129 return -1; 130 } 131 132 // Static vectors to zero 133 Set_zero (state->frameEnergyHist, L_ENERGYHIST); 134 135 // Initialize hangover handling 136 state->bgHangover = 0; 137 138 return 0; 139 } 140 141 ------------------------------------------------------------------------------ 142 RESOURCES USED [optional] 143 144 When the code is written for a specific target processor the 145 the resources used should be documented below. 146 147 HEAP MEMORY USED: x bytes 148 149 STACK MEMORY USED: x bytes 150 151 CLOCK CYCLES: (cycle count equation for this function) + (variable 152 used to represent cycle count for each subroutine 153 called) 154 where: (cycle count variable) = cycle count for [subroutine 155 name] 156 157 ------------------------------------------------------------------------------ 158 CAUTION [optional] 159 [State any special notes, constraints or cautions for users of this function] 160 161 ------------------------------------------------------------------------------ 162 */ 163 164 Word16 Bgn_scd_reset(Bgn_scdState *state) 165 { 166 if (state == (Bgn_scdState *) NULL) 167 { 168 /* fprintf(stderr, "Bgn_scd_reset: invalid parameter\n"); */ 169 return(-1); 170 } 171 172 /* Static vectors to zero */ 173 memset(state->frameEnergyHist, 0, L_ENERGYHIST*sizeof(Word16)); 174 175 /* Initialize hangover handling */ 176 state->bgHangover = 0; 177 178 return(0); 179 } 180 181 /****************************************************************************/ 182 183 /* 184 ------------------------------------------------------------------------------ 185 FUNCTION NAME: Bgn_scd 186 ------------------------------------------------------------------------------ 187 INPUT AND OUTPUT DEFINITIONS 188 189 Inputs: 190 st = pointer to state variables of type Bgn_scdState 191 ltpGainHist[] = LTP gain history (Word16) 192 speech[] = synthesis speech frame (Word16) 193 voicedHangover = pointer to # of frames after last voiced frame (Word16) 194 pOverflow = pointer to overflow indicator (Flag) 195 196 Outputs: 197 st = function updates the state variables of type Bgn_scdState 198 pointed to by st. 199 voicedHangover = function updates the # of frames after last voiced 200 frame pointed to by voicedHangover. 201 pOverflow = 1 if the basic math function L_add() results in saturation. 202 else pOverflow is zero. 203 204 Returns: 205 inbgNoise = flag if background noise is present (Word16) 206 207 Global Variables Used: 208 None. 209 210 Local Variables Needed: 211 None. 212 213 ------------------------------------------------------------------------------ 214 FUNCTION DESCRIPTION 215 216 Characterize synthesis speech and detect background noise. 217 218 ------------------------------------------------------------------------------ 219 REQUIREMENTS 220 221 None 222 223 ------------------------------------------------------------------------------ 224 REFERENCES 225 226 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001 227 228 ------------------------------------------------------------------------------ 229 PSEUDO-CODE 230 231 Word16 Bgn_scd (Bgn_scdState *st, // i : State variables for bgn SCD 232 Word16 ltpGainHist[], // i : LTP gain history 233 Word16 speech[], // o : synthesis speech frame 234 Word16 *voicedHangover // o : # of frames after last 235 voiced frame 236 ) 237 { 238 Word16 i; 239 Word16 prevVoiced, inbgNoise; 240 Word16 temp; 241 Word16 ltpLimit, frameEnergyMin; 242 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart; 243 Word32 s; 244 245 // Update the inBackgroundNoise flag (valid for use in next frame if BFI) 246 // it now works as a energy detector floating on top 247 // not as good as a VAD. 248 249 currEnergy = 0; 250 s = (Word32) 0; 251 252 for (i = 0; i < L_FRAME; i++) 253 { 254 s = L_mac (s, speech[i], speech[i]); 255 } 256 257 s = L_shl(s, 2); 258 259 currEnergy = extract_h (s); 260 261 frameEnergyMin = 32767; 262 263 for (i = 0; i < L_ENERGYHIST; i++) 264 { 265 if (sub(st->frameEnergyHist[i], frameEnergyMin) < 0) 266 frameEnergyMin = st->frameEnergyHist[i]; 267 } 268 269 noiseFloor = shl (frameEnergyMin, 4); // Frame Energy Margin of 16 270 271 maxEnergy = st->frameEnergyHist[0]; 272 for (i = 1; i < L_ENERGYHIST-4; i++) 273 { 274 if ( sub (maxEnergy, st->frameEnergyHist[i]) < 0) 275 { 276 maxEnergy = st->frameEnergyHist[i]; 277 } 278 } 279 280 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3]; 281 for (i = 2*L_ENERGYHIST/3+1; i < L_ENERGYHIST; i++) 282 { 283 if ( sub (maxEnergyLastPart, st->frameEnergyHist[i] ) < 0) 284 { 285 maxEnergyLastPart = st->frameEnergyHist[i]; 286 } 287 } 288 289 inbgNoise = 0; // false 290 291 // Do not consider silence as noise 292 // Do not consider continuous high volume as noise 293 // Or if the current noise level is very low 294 // Mark as noise if under current noise limit 295 // OR if the maximum energy is below the upper limit 296 297 if ( (sub(maxEnergy, LOWERNOISELIMIT) > 0) && 298 (sub(currEnergy, FRAMEENERGYLIMIT) < 0) && 299 (sub(currEnergy, LOWERNOISELIMIT) > 0) && 300 ( (sub(currEnergy, noiseFloor) < 0) || 301 (sub(maxEnergyLastPart, UPPERNOISELIMIT) < 0))) 302 { 303 if (sub(add(st->bgHangover, 1), 30) > 0) 304 { 305 st->bgHangover = 30; 306 } else 307 { 308 st->bgHangover = add(st->bgHangover, 1); 309 } 310 } 311 else 312 { 313 st->bgHangover = 0; 314 } 315 316 // make final decision about frame state , act somewhat cautiosly 317 if (sub(st->bgHangover,1) > 0) 318 inbgNoise = 1; // true 319 320 for (i = 0; i < L_ENERGYHIST-1; i++) 321 { 322 st->frameEnergyHist[i] = st->frameEnergyHist[i+1]; 323 } 324 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy; 325 326 // prepare for voicing decision; tighten the threshold after some 327 time in noise 328 ltpLimit = 13926; // 0.85 Q14 329 if (sub(st->bgHangover, 8) > 0) 330 { 331 ltpLimit = 15565; // 0.95 Q14 332 } 333 if (sub(st->bgHangover, 15) > 0) 334 { 335 ltpLimit = 16383; // 1.00 Q14 336 } 337 338 // weak sort of voicing indication. 339 prevVoiced = 0; // false 340 341 if (sub(gmed_n(<pGainHist[4], 5), ltpLimit) > 0) 342 { 343 prevVoiced = 1; // true 344 } 345 if (sub(st->bgHangover, 20) > 0) { 346 if (sub(gmed_n(ltpGainHist, 9), ltpLimit) > 0) 347 { 348 prevVoiced = 1; // true 349 } 350 else 351 { 352 prevVoiced = 0; // false 353 } 354 } 355 356 if (prevVoiced) 357 { 358 *voicedHangover = 0; 359 } 360 else 361 { 362 temp = add(*voicedHangover, 1); 363 if (sub(temp, 10) > 0) 364 { 365 *voicedHangover = 10; 366 } 367 else 368 { 369 *voicedHangover = temp; 370 } 371 } 372 373 return inbgNoise; 374 } 375 376 ------------------------------------------------------------------------------ 377 RESOURCES USED [optional] 378 379 When the code is written for a specific target processor the 380 the resources used should be documented below. 381 382 HEAP MEMORY USED: x bytes 383 384 STACK MEMORY USED: x bytes 385 386 CLOCK CYCLES: (cycle count equation for this function) + (variable 387 used to represent cycle count for each subroutine 388 called) 389 where: (cycle count variable) = cycle count for [subroutine 390 name] 391 392 ------------------------------------------------------------------------------ 393 CAUTION [optional] 394 [State any special notes, constraints or cautions for users of this function] 395 396 ------------------------------------------------------------------------------ 397 */ 398 399 Word16 Bgn_scd(Bgn_scdState *st, /* i : State variables for bgn SCD */ 400 Word16 ltpGainHist[], /* i : LTP gain history */ 401 Word16 speech[], /* o : synthesis speech frame */ 402 Word16 *voicedHangover,/* o : # of frames after last 403 voiced frame */ 404 Flag *pOverflow 405 ) 406 { 407 Word16 i; 408 Word16 prevVoiced, inbgNoise; 409 Word16 temp; 410 Word16 ltpLimit, frameEnergyMin; 411 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart; 412 Word32 s, L_temp; 413 414 415 /* Update the inBackgroundNoise flag (valid for use in next frame if BFI) */ 416 /* it now works as a energy detector floating on top */ 417 /* not as good as a VAD. */ 418 419 s = (Word32) 0; 420 421 for (i = L_FRAME - 1; i >= 0; i--) 422 { 423 L_temp = ((Word32) speech[i]) * speech[i]; 424 if (L_temp != (Word32) 0x40000000L) 425 { 426 L_temp = L_temp << 1; 427 } 428 else 429 { 430 L_temp = MAX_32; 431 } 432 s = L_add(s, L_temp, pOverflow); 433 } 434 435 /* s is a sum of squares, so don't need to check for neg overflow */ 436 if (s > (Word32)0x1fffffffL) 437 { 438 currEnergy = MAX_16; 439 } 440 else 441 { 442 currEnergy = (Word16)(s >> 14); 443 } 444 445 frameEnergyMin = 32767; 446 for (i = L_ENERGYHIST - 1; i >= 0; i--) 447 { 448 if (st->frameEnergyHist[i] < frameEnergyMin) 449 { 450 frameEnergyMin = st->frameEnergyHist[i]; 451 } 452 } 453 454 /* Frame Energy Margin of 16 */ 455 L_temp = (Word32)frameEnergyMin << 4; 456 if (L_temp != (Word32)((Word16) L_temp)) 457 { 458 if (L_temp > 0) 459 { 460 noiseFloor = MAX_16; 461 } 462 else 463 { 464 noiseFloor = MIN_16; 465 } 466 } 467 else 468 { 469 noiseFloor = (Word16)(L_temp); 470 } 471 472 maxEnergy = st->frameEnergyHist[0]; 473 for (i = L_ENERGYHIST - 5; i >= 1; i--) 474 { 475 if (maxEnergy < st->frameEnergyHist[i]) 476 { 477 maxEnergy = st->frameEnergyHist[i]; 478 } 479 } 480 481 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3]; 482 for (i = 2 * L_ENERGYHIST / 3 + 1; i < L_ENERGYHIST; i++) 483 { 484 if (maxEnergyLastPart < st->frameEnergyHist[i]) 485 { 486 maxEnergyLastPart = st->frameEnergyHist[i]; 487 } 488 } 489 490 /* Do not consider silence as noise */ 491 /* Do not consider continuous high volume as noise */ 492 /* Or if the current noise level is very low */ 493 /* Mark as noise if under current noise limit */ 494 /* OR if the maximum energy is below the upper limit */ 495 496 if ((maxEnergy > LOWERNOISELIMIT) && 497 (currEnergy < FRAMEENERGYLIMIT) && 498 (currEnergy > LOWERNOISELIMIT) && 499 ((currEnergy < noiseFloor) || 500 (maxEnergyLastPart < UPPERNOISELIMIT))) 501 { 502 if ((st->bgHangover + 1) > 30) 503 { 504 st->bgHangover = 30; 505 } 506 else 507 { 508 st->bgHangover += 1; 509 } 510 } 511 else 512 { 513 st->bgHangover = 0; 514 } 515 516 /* make final decision about frame state , act somewhat cautiosly */ 517 518 if (st->bgHangover > 1) 519 { 520 inbgNoise = TRUE; 521 } 522 else 523 { 524 inbgNoise = FALSE; 525 } 526 527 for (i = 0; i < L_ENERGYHIST - 1; i++) 528 { 529 st->frameEnergyHist[i] = st->frameEnergyHist[i+1]; 530 } 531 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy; 532 533 /* prepare for voicing decision; tighten the threshold after some 534 time in noise */ 535 536 if (st->bgHangover > 15) 537 { 538 ltpLimit = 16383; /* 1.00 Q14 */ 539 } 540 else if (st->bgHangover > 8) 541 { 542 ltpLimit = 15565; /* 0.95 Q14 */ 543 } 544 else 545 { 546 ltpLimit = 13926; /* 0.85 Q14 */ 547 } 548 549 /* weak sort of voicing indication. */ 550 prevVoiced = FALSE; 551 552 if (gmed_n(<pGainHist[4], 5) > ltpLimit) 553 { 554 prevVoiced = TRUE; 555 } 556 557 if (st->bgHangover > 20) 558 { 559 if (gmed_n(ltpGainHist, 9) > ltpLimit) 560 { 561 prevVoiced = TRUE; 562 } 563 else 564 { 565 prevVoiced = FALSE; 566 } 567 } 568 569 570 if (prevVoiced) 571 { 572 *voicedHangover = 0; 573 } 574 else 575 { 576 temp = *voicedHangover + 1; 577 578 if (temp > 10) 579 { 580 *voicedHangover = 10; 581 } 582 else 583 { 584 *voicedHangover = temp; 585 } 586 } 587 588 return(inbgNoise); 589 } 590