Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *       File: dtx.c                                                    *
     19 *                                                                      *
     20 *	    Description:DTX functions                                  *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 #include <stdio.h>
     25 #include <stdlib.h>
     26 #include "typedef.h"
     27 #include "basic_op.h"
     28 #include "oper_32b.h"
     29 #include "math_op.h"
     30 #include "cnst.h"
     31 #include "acelp.h"                         /* prototype of functions    */
     32 #include "bits.h"
     33 #include "dtx.h"
     34 #include "log2.h"
     35 #include "mem_align.h"
     36 
     37 static void aver_isf_history(
     38 		Word16 isf_old[],
     39 		Word16 indices[],
     40 		Word32 isf_aver[]
     41 		);
     42 
     43 static void find_frame_indices(
     44 		Word16 isf_old_tx[],
     45 		Word16 indices[],
     46 		dtx_encState * st
     47 		);
     48 
     49 static Word16 dithering_control(
     50 		dtx_encState * st
     51 		);
     52 
     53 /* excitation energy adjustment depending on speech coder mode used, Q7 */
     54 static Word16 en_adjust[9] =
     55 {
     56 	230,                                   /* mode0 = 7k  :  -5.4dB  */
     57 	179,                                   /* mode1 = 9k  :  -4.2dB  */
     58 	141,                                   /* mode2 = 12k :  -3.3dB  */
     59 	128,                                   /* mode3 = 14k :  -3.0dB  */
     60 	122,                                   /* mode4 = 16k :  -2.85dB */
     61 	115,                                   /* mode5 = 18k :  -2.7dB  */
     62 	115,                                   /* mode6 = 20k :  -2.7dB  */
     63 	115,                                   /* mode7 = 23k :  -2.7dB  */
     64 	115                                    /* mode8 = 24k :  -2.7dB  */
     65 };
     66 
     67 /**************************************************************************
     68 *
     69 * Function    : dtx_enc_init
     70 *
     71 **************************************************************************/
     72 Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP)
     73 {
     74 	dtx_encState *s;
     75 
     76 	if (st == (dtx_encState **) NULL)
     77 	{
     78 		fprintf(stderr, "dtx_enc_init: invalid parameter\n");
     79 		return -1;
     80 	}
     81 	*st = NULL;
     82 
     83 	/* allocate memory */
     84 	if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL)
     85 	{
     86 		fprintf(stderr, "dtx_enc_init: can not malloc state structure\n");
     87 		return -1;
     88 	}
     89 	dtx_enc_reset(s, isf_init);
     90 	*st = s;
     91 	return 0;
     92 }
     93 
     94 /**************************************************************************
     95 *
     96 * Function    : dtx_enc_reset
     97 *
     98 **************************************************************************/
     99 Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[])
    100 {
    101 	Word32 i;
    102 
    103 	if (st == (dtx_encState *) NULL)
    104 	{
    105 		fprintf(stderr, "dtx_enc_reset: invalid parameter\n");
    106 		return -1;
    107 	}
    108 	st->hist_ptr = 0;
    109 	st->log_en_index = 0;
    110 
    111 	/* Init isf_hist[] */
    112 	for (i = 0; i < DTX_HIST_SIZE; i++)
    113 	{
    114 		Copy(isf_init, &st->isf_hist[i * M], M);
    115 	}
    116 	st->cng_seed = RANDOM_INITSEED;
    117 
    118 	/* Reset energy history */
    119 	Set_zero(st->log_en_hist, DTX_HIST_SIZE);
    120 
    121 	st->dtxHangoverCount = DTX_HANG_CONST;
    122 	st->decAnaElapsedCount = 32767;
    123 
    124 	for (i = 0; i < 28; i++)
    125 	{
    126 		st->D[i] = 0;
    127 	}
    128 
    129 	for (i = 0; i < DTX_HIST_SIZE - 1; i++)
    130 	{
    131 		st->sumD[i] = 0;
    132 	}
    133 
    134 	return 1;
    135 }
    136 
    137 /**************************************************************************
    138 *
    139 * Function    : dtx_enc_exit
    140 *
    141 **************************************************************************/
    142 void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP)
    143 {
    144 	if (st == NULL || *st == NULL)
    145 		return;
    146 	/* deallocate memory */
    147 	mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB);
    148 	*st = NULL;
    149 	return;
    150 }
    151 
    152 
    153 /**************************************************************************
    154 *
    155 * Function    : dtx_enc
    156 *
    157 **************************************************************************/
    158 Word16 dtx_enc(
    159 		dtx_encState * st,                    /* i/o : State struct                                         */
    160 		Word16 isf[M],                        /* o   : CN ISF vector                                        */
    161 		Word16 * exc2,                        /* o   : CN excitation                                        */
    162 		Word16 ** prms
    163 	      )
    164 {
    165 	Word32 i, j;
    166 	Word16 indice[7];
    167 	Word16 log_en, gain, level, exp, exp0, tmp;
    168 	Word16 log_en_int_e, log_en_int_m;
    169 	Word32 L_isf[M], ener32, level32;
    170 	Word16 isf_order[3];
    171 	Word16 CN_dith;
    172 
    173 	/* VOX mode computation of SID parameters */
    174 	log_en = 0;
    175 	for (i = 0; i < M; i++)
    176 	{
    177 		L_isf[i] = 0;
    178 	}
    179 	/* average energy and isf */
    180 	for (i = 0; i < DTX_HIST_SIZE; i++)
    181 	{
    182 		/* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */
    183 		log_en = add(log_en, st->log_en_hist[i]);
    184 
    185 	}
    186 	find_frame_indices(st->isf_hist, isf_order, st);
    187 	aver_isf_history(st->isf_hist, isf_order, L_isf);
    188 
    189 	for (j = 0; j < M; j++)
    190 	{
    191 		isf[j] = (Word16)(L_isf[j] >> 3);  /* divide by 8 */
    192 	}
    193 
    194 	/* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E).  */
    195 	/* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */
    196 
    197 	/* increase dynamics to 7 bits (Q8) */
    198 	log_en = (log_en >> 2);
    199 
    200 	/* Add 2 in Q8 = 512 to get log2(E) between 0:24 */
    201 	log_en = add(log_en, 512);
    202 
    203 	/* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */
    204 	log_en = mult(log_en, 21504);
    205 
    206 	/* Quantize Energy */
    207 	st->log_en_index = shr(log_en, 6);
    208 
    209 	if(st->log_en_index > 63)
    210 	{
    211 		st->log_en_index = 63;
    212 	}
    213 	if (st->log_en_index < 0)
    214 	{
    215 		st->log_en_index = 0;
    216 	}
    217 	/* Quantize ISFs */
    218 	Qisf_ns(isf, isf, indice);
    219 
    220 
    221 	Parm_serial(indice[0], 6, prms);
    222 	Parm_serial(indice[1], 6, prms);
    223 	Parm_serial(indice[2], 6, prms);
    224 	Parm_serial(indice[3], 5, prms);
    225 	Parm_serial(indice[4], 5, prms);
    226 
    227 	Parm_serial((st->log_en_index), 6, prms);
    228 
    229 	CN_dith = dithering_control(st);
    230 	Parm_serial(CN_dith, 1, prms);
    231 
    232 	/* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) );    */
    233 	/* log2(E) in Q9 (log2(E) lies in between -2:22) */
    234 	log_en = shl(st->log_en_index, 15 - 6);
    235 
    236 	/* Divide by 2.625; log_en will be between 0:24  */
    237 	log_en = mult(log_en, 12483);
    238 	/* the result corresponds to log2(gain) in Q10 */
    239 
    240 	/* Find integer part  */
    241 	log_en_int_e = (log_en >> 10);
    242 
    243 	/* Find fractional part */
    244 	log_en_int_m = (Word16) (log_en & 0x3ff);
    245 	log_en_int_m = shl(log_en_int_m, 5);
    246 
    247 	/* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */
    248 	/* Add 16 in order to have the result of pow2 in Q16 */
    249 	log_en_int_e = add(log_en_int_e, 16 - 1);
    250 
    251 	level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */
    252 	exp0 = norm_l(level32);
    253 	level32 = (level32 << exp0);        /* level in Q31 */
    254 	exp0 = (15 - exp0);
    255 	level = extract_h(level32);            /* level in Q15 */
    256 
    257 	/* generate white noise vector */
    258 	for (i = 0; i < L_FRAME; i++)
    259 	{
    260 		exc2[i] = (Random(&(st->cng_seed)) >> 4);
    261 	}
    262 
    263 	/* gain = level / sqrt(ener) * sqrt(L_FRAME) */
    264 
    265 	/* energy of generated excitation */
    266 	ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp);
    267 
    268 	Isqrt_n(&ener32, &exp);
    269 
    270 	gain = extract_h(ener32);
    271 
    272 	gain = mult(level, gain);              /* gain in Q15 */
    273 
    274 	exp = add(exp0, exp);
    275 
    276 	/* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
    277 	exp += 4;
    278 
    279 	for (i = 0; i < L_FRAME; i++)
    280 	{
    281 		tmp = mult(exc2[i], gain);         /* Q0 * Q15 */
    282 		exc2[i] = shl(tmp, exp);
    283 	}
    284 
    285 	return 0;
    286 }
    287 
    288 /**************************************************************************
    289 *
    290 * Function    : dtx_buffer Purpose     : handles the DTX buffer
    291 *
    292 **************************************************************************/
    293 Word16 dtx_buffer(
    294 		dtx_encState * st,                    /* i/o : State struct                    */
    295 		Word16 isf_new[],                     /* i   : isf vector                      */
    296 		Word32 enr,                           /* i   : residual energy (in L_FRAME)    */
    297 		Word16 codec_mode
    298 		)
    299 {
    300 	Word16 log_en;
    301 
    302 	Word16 log_en_e;
    303 	Word16 log_en_m;
    304 	st->hist_ptr = add(st->hist_ptr, 1);
    305 	if(st->hist_ptr == DTX_HIST_SIZE)
    306 	{
    307 		st->hist_ptr = 0;
    308 	}
    309 	/* copy lsp vector into buffer */
    310 	Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M);
    311 
    312 	/* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f);  */
    313 	Log2(enr, &log_en_e, &log_en_m);
    314 
    315 	/* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */
    316 	log_en = shl(log_en_e, 7);             /* Q7 */
    317 	log_en = add(log_en, shr(log_en_m, 15 - 7));
    318 
    319 	/* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The
    320 	 * constant 0.0059322 takes into account windowings and analysis length from autocorrelation
    321 	 * computations; 7.39722 in Q7 = 947  */
    322 	/* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
    323 	/* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */
    324 
    325 	/* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0  (1024 in Q7) */
    326 	/* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
    327 
    328 	log_en = sub(log_en, add(1024, en_adjust[codec_mode]));
    329 
    330 	/* Insert into the buffer */
    331 	st->log_en_hist[st->hist_ptr] = log_en;
    332 	return 0;
    333 }
    334 
    335 /**************************************************************************
    336 *
    337 * Function    : tx_dtx_handler Purpose     : adds extra speech hangover
    338 *                                            to analyze speech on
    339 *                                            the decoding side.
    340 **************************************************************************/
    341 void tx_dtx_handler(dtx_encState * st,     /* i/o : State struct           */
    342 		Word16 vad_flag,                      /* i   : vad decision           */
    343 		Word16 * usedMode                     /* i/o : mode changed or not    */
    344 		)
    345 {
    346 
    347 	/* this state machine is in synch with the GSMEFR txDtx machine      */
    348 	st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1);
    349 
    350 	if (vad_flag != 0)
    351 	{
    352 		st->dtxHangoverCount = DTX_HANG_CONST;
    353 	} else
    354 	{                                      /* non-speech */
    355 		if (st->dtxHangoverCount == 0)
    356 		{                                  /* out of decoder analysis hangover  */
    357 			st->decAnaElapsedCount = 0;
    358 			*usedMode = MRDTX;
    359 		} else
    360 		{                                  /* in possible analysis hangover */
    361 			st->dtxHangoverCount = sub(st->dtxHangoverCount, 1);
    362 
    363 			/* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */
    364 			if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount),
    365 						DTX_ELAPSED_FRAMES_THRESH) < 0)
    366 			{
    367 				*usedMode = MRDTX;
    368 				/* if short time since decoder update, do not add extra HO */
    369 			}
    370 			/* else override VAD and stay in speech mode *usedMode and add extra hangover */
    371 		}
    372 	}
    373 
    374 	return;
    375 }
    376 
    377 
    378 
    379 static void aver_isf_history(
    380 		Word16 isf_old[],
    381 		Word16 indices[],
    382 		Word32 isf_aver[]
    383 		)
    384 {
    385 	Word32 i, j, k;
    386 	Word16 isf_tmp[2 * M];
    387 	Word32 L_tmp;
    388 
    389 	/* Memorize in isf_tmp[][] the ISF vectors to be replaced by */
    390 	/* the median ISF vector prior to the averaging               */
    391 	for (k = 0; k < 2; k++)
    392 	{
    393 		if ((indices[k] + 1) != 0)
    394 		{
    395 			for (i = 0; i < M; i++)
    396 			{
    397 				isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
    398 				isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
    399 			}
    400 		}
    401 	}
    402 
    403 	/* Perform the ISF averaging */
    404 	for (j = 0; j < M; j++)
    405 	{
    406 		L_tmp = 0;
    407 
    408 		for (i = 0; i < DTX_HIST_SIZE; i++)
    409 		{
    410 			L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j]));
    411 		}
    412 		isf_aver[j] = L_tmp;
    413 	}
    414 
    415 	/* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
    416 	for (k = 0; k < 2; k++)
    417 	{
    418 		if ((indices[k] + 1) != 0)
    419 		{
    420 			for (i = 0; i < M; i++)
    421 			{
    422 				isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
    423 			}
    424 		}
    425 	}
    426 
    427 	return;
    428 }
    429 
    430 static void find_frame_indices(
    431 		Word16 isf_old_tx[],
    432 		Word16 indices[],
    433 		dtx_encState * st
    434 		)
    435 {
    436 	Word32 L_tmp, summin, summax, summax2nd;
    437 	Word16 i, j, tmp;
    438 	Word16 ptr;
    439 
    440 	/* Remove the effect of the oldest frame from the column */
    441 	/* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is    */
    442 	/* not updated since it will be removed later.           */
    443 
    444 	tmp = DTX_HIST_SIZE_MIN_ONE;
    445 	j = -1;
    446 	for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
    447 	{
    448 		j = add(j, tmp);
    449 		st->sumD[i] = L_sub(st->sumD[i], st->D[j]);
    450 		tmp = sub(tmp, 1);
    451 	}
    452 
    453 	/* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1]    */
    454 	/* corresponding to the oldest frame is removed. The sum of     */
    455 	/* the distances between the latest isf and other isfs, */
    456 	/* i.e. the element sumD[0], will be computed during this call. */
    457 	/* Hence this element is initialized to zero.                   */
    458 
    459 	for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
    460 	{
    461 		st->sumD[i] = st->sumD[i - 1];
    462 	}
    463 	st->sumD[0] = 0;
    464 
    465 	/* Remove the oldest frame from the distance matrix.           */
    466 	/* Note that the distance matrix is replaced by a one-         */
    467 	/* dimensional array to save static memory.                    */
    468 
    469 	tmp = 0;
    470 	for (i = 27; i >= 12; i = (Word16) (i - tmp))
    471 	{
    472 		tmp = add(tmp, 1);
    473 		for (j = tmp; j > 0; j--)
    474 		{
    475 			st->D[i - j + 1] = st->D[i - j - tmp];
    476 		}
    477 	}
    478 
    479 	/* Compute the first column of the distance matrix D            */
    480 	/* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */
    481 
    482 	ptr = st->hist_ptr;
    483 	for (i = 1; i < DTX_HIST_SIZE; i++)
    484 	{
    485 		/* Compute the distance between the latest isf and the other isfs. */
    486 		ptr = sub(ptr, 1);
    487 		if (ptr < 0)
    488 		{
    489 			ptr = DTX_HIST_SIZE_MIN_ONE;
    490 		}
    491 		L_tmp = 0;
    492 		for (j = 0; j < M; j++)
    493 		{
    494 			tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]);
    495 			L_tmp = L_mac(L_tmp, tmp, tmp);
    496 		}
    497 		st->D[i - 1] = L_tmp;
    498 
    499 		/* Update also the column sums. */
    500 		st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]);
    501 		st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]);
    502 	}
    503 
    504 	/* Find the minimum and maximum distances */
    505 	summax = st->sumD[0];
    506 	summin = st->sumD[0];
    507 	indices[0] = 0;
    508 	indices[2] = 0;
    509 	for (i = 1; i < DTX_HIST_SIZE; i++)
    510 	{
    511 		if (L_sub(st->sumD[i], summax) > 0)
    512 		{
    513 			indices[0] = i;
    514 			summax = st->sumD[i];
    515 		}
    516 		if (L_sub(st->sumD[i], summin) < 0)
    517 		{
    518 			indices[2] = i;
    519 			summin = st->sumD[i];
    520 		}
    521 	}
    522 
    523 	/* Find the second largest distance */
    524 	summax2nd = -2147483647L;
    525 	indices[1] = -1;
    526 	for (i = 0; i < DTX_HIST_SIZE; i++)
    527 	{
    528 		if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0))
    529 		{
    530 			indices[1] = i;
    531 			summax2nd = st->sumD[i];
    532 		}
    533 	}
    534 
    535 	for (i = 0; i < 3; i++)
    536 	{
    537 		indices[i] = sub(st->hist_ptr, indices[i]);
    538 		if (indices[i] < 0)
    539 		{
    540 			indices[i] = add(indices[i], DTX_HIST_SIZE);
    541 		}
    542 	}
    543 
    544 	/* If maximum distance/MED_THRESH is smaller than minimum distance */
    545 	/* then the median ISF vector replacement is not performed         */
    546 	tmp = norm_l(summax);
    547 	summax = (summax << tmp);
    548 	summin = (summin << tmp);
    549 	L_tmp = L_mult(voround(summax), INV_MED_THRESH);
    550 	if(L_tmp <= summin)
    551 	{
    552 		indices[0] = -1;
    553 	}
    554 	/* If second largest distance/MED_THRESH is smaller than     */
    555 	/* minimum distance then the median ISF vector replacement is    */
    556 	/* not performed                                                 */
    557 	summax2nd = L_shl(summax2nd, tmp);
    558 	L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH);
    559 	if(L_tmp <= summin)
    560 	{
    561 		indices[1] = -1;
    562 	}
    563 	return;
    564 }
    565 
    566 static Word16 dithering_control(
    567 		dtx_encState * st
    568 		)
    569 {
    570 	Word16 tmp, mean, CN_dith, gain_diff;
    571 	Word32 i, ISF_diff;
    572 
    573 	/* determine how stationary the spectrum of background noise is */
    574 	ISF_diff = 0;
    575 	for (i = 0; i < 8; i++)
    576 	{
    577 		ISF_diff = L_add(ISF_diff, st->sumD[i]);
    578 	}
    579 	if ((ISF_diff >> 26) > 0)
    580 	{
    581 		CN_dith = 1;
    582 	} else
    583 	{
    584 		CN_dith = 0;
    585 	}
    586 
    587 	/* determine how stationary the energy of background noise is */
    588 	mean = 0;
    589 	for (i = 0; i < DTX_HIST_SIZE; i++)
    590 	{
    591 		mean = add(mean, st->log_en_hist[i]);
    592 	}
    593 	mean = (mean >> 3);
    594 	gain_diff = 0;
    595 	for (i = 0; i < DTX_HIST_SIZE; i++)
    596 	{
    597 		tmp = abs_s(sub(st->log_en_hist[i], mean));
    598 		gain_diff = add(gain_diff, tmp);
    599 	}
    600 	if (gain_diff > GAIN_THR)
    601 	{
    602 		CN_dith = 1;
    603 	}
    604 	return CN_dith;
    605 }
    606