Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *       File: dtx.c                                                    *
     19 *                                                                      *
     20 *       Description:DTX functions                                  *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 #include <stdio.h>
     25 #include <stdlib.h>
     26 #include "typedef.h"
     27 #include "basic_op.h"
     28 #include "oper_32b.h"
     29 #include "math_op.h"
     30 #include "cnst.h"
     31 #include "acelp.h"                         /* prototype of functions    */
     32 #include "bits.h"
     33 #include "dtx.h"
     34 #include "log2.h"
     35 #include "mem_align.h"
     36 
     37 static void aver_isf_history(
     38         Word16 isf_old[],
     39         Word16 indices[],
     40         Word32 isf_aver[]
     41         );
     42 
     43 static void find_frame_indices(
     44         Word16 isf_old_tx[],
     45         Word16 indices[],
     46         dtx_encState * st
     47         );
     48 
     49 static Word16 dithering_control(
     50         dtx_encState * st
     51         );
     52 
     53 /* excitation energy adjustment depending on speech coder mode used, Q7 */
     54 static Word16 en_adjust[9] =
     55 {
     56     230,                                   /* mode0 = 7k  :  -5.4dB  */
     57     179,                                   /* mode1 = 9k  :  -4.2dB  */
     58     141,                                   /* mode2 = 12k :  -3.3dB  */
     59     128,                                   /* mode3 = 14k :  -3.0dB  */
     60     122,                                   /* mode4 = 16k :  -2.85dB */
     61     115,                                   /* mode5 = 18k :  -2.7dB  */
     62     115,                                   /* mode6 = 20k :  -2.7dB  */
     63     115,                                   /* mode7 = 23k :  -2.7dB  */
     64     115                                    /* mode8 = 24k :  -2.7dB  */
     65 };
     66 
     67 /**************************************************************************
     68 *
     69 * Function    : dtx_enc_init
     70 *
     71 **************************************************************************/
     72 Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP)
     73 {
     74     dtx_encState *s;
     75 
     76     if (st == (dtx_encState **) NULL)
     77     {
     78         fprintf(stderr, "dtx_enc_init: invalid parameter\n");
     79         return -1;
     80     }
     81     *st = NULL;
     82 
     83     /* allocate memory */
     84     if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL)
     85     {
     86         fprintf(stderr, "dtx_enc_init: can not malloc state structure\n");
     87         return -1;
     88     }
     89     dtx_enc_reset(s, isf_init);
     90     *st = s;
     91     return 0;
     92 }
     93 
     94 /**************************************************************************
     95 *
     96 * Function    : dtx_enc_reset
     97 *
     98 **************************************************************************/
     99 Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[])
    100 {
    101     Word32 i;
    102 
    103     if (st == (dtx_encState *) NULL)
    104     {
    105         fprintf(stderr, "dtx_enc_reset: invalid parameter\n");
    106         return -1;
    107     }
    108     st->hist_ptr = 0;
    109     st->log_en_index = 0;
    110 
    111     /* Init isf_hist[] */
    112     for (i = 0; i < DTX_HIST_SIZE; i++)
    113     {
    114         Copy(isf_init, &st->isf_hist[i * M], M);
    115     }
    116     st->cng_seed = RANDOM_INITSEED;
    117 
    118     /* Reset energy history */
    119     Set_zero(st->log_en_hist, DTX_HIST_SIZE);
    120 
    121     st->dtxHangoverCount = DTX_HANG_CONST;
    122     st->decAnaElapsedCount = 32767;
    123 
    124     for (i = 0; i < 28; i++)
    125     {
    126         st->D[i] = 0;
    127     }
    128 
    129     for (i = 0; i < DTX_HIST_SIZE - 1; i++)
    130     {
    131         st->sumD[i] = 0;
    132     }
    133 
    134     return 1;
    135 }
    136 
    137 /**************************************************************************
    138 *
    139 * Function    : dtx_enc_exit
    140 *
    141 **************************************************************************/
    142 void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP)
    143 {
    144     if (st == NULL || *st == NULL)
    145         return;
    146     /* deallocate memory */
    147     mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB);
    148     *st = NULL;
    149     return;
    150 }
    151 
    152 
    153 /**************************************************************************
    154 *
    155 * Function    : dtx_enc
    156 *
    157 **************************************************************************/
    158 Word16 dtx_enc(
    159         dtx_encState * st,                    /* i/o : State struct                                         */
    160         Word16 isf[M],                        /* o   : CN ISF vector                                        */
    161         Word16 * exc2,                        /* o   : CN excitation                                        */
    162         Word16 ** prms
    163           )
    164 {
    165     Word32 i, j;
    166     Word16 indice[7];
    167     Word16 log_en, gain, level, exp, exp0, tmp;
    168     Word16 log_en_int_e, log_en_int_m;
    169     Word32 L_isf[M], ener32, level32;
    170     Word16 isf_order[3];
    171     Word16 CN_dith;
    172 
    173     /* VOX mode computation of SID parameters */
    174     log_en = 0;
    175     for (i = 0; i < M; i++)
    176     {
    177         L_isf[i] = 0;
    178     }
    179     /* average energy and isf */
    180     for (i = 0; i < DTX_HIST_SIZE; i++)
    181     {
    182         /* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */
    183         log_en = add(log_en, st->log_en_hist[i]);
    184 
    185     }
    186     find_frame_indices(st->isf_hist, isf_order, st);
    187     aver_isf_history(st->isf_hist, isf_order, L_isf);
    188 
    189     for (j = 0; j < M; j++)
    190     {
    191         isf[j] = (Word16)(L_isf[j] >> 3);  /* divide by 8 */
    192     }
    193 
    194     /* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E).  */
    195     /* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */
    196 
    197     /* increase dynamics to 7 bits (Q8) */
    198     log_en = (log_en >> 2);
    199 
    200     /* Add 2 in Q8 = 512 to get log2(E) between 0:24 */
    201     log_en = add(log_en, 512);
    202 
    203     /* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */
    204     log_en = mult(log_en, 21504);
    205 
    206     /* Quantize Energy */
    207     st->log_en_index = shr(log_en, 6);
    208 
    209     if(st->log_en_index > 63)
    210     {
    211         st->log_en_index = 63;
    212     }
    213     if (st->log_en_index < 0)
    214     {
    215         st->log_en_index = 0;
    216     }
    217     /* Quantize ISFs */
    218     Qisf_ns(isf, isf, indice);
    219 
    220 
    221     Parm_serial(indice[0], 6, prms);
    222     Parm_serial(indice[1], 6, prms);
    223     Parm_serial(indice[2], 6, prms);
    224     Parm_serial(indice[3], 5, prms);
    225     Parm_serial(indice[4], 5, prms);
    226 
    227     Parm_serial((st->log_en_index), 6, prms);
    228 
    229     CN_dith = dithering_control(st);
    230     Parm_serial(CN_dith, 1, prms);
    231 
    232     /* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) );    */
    233     /* log2(E) in Q9 (log2(E) lies in between -2:22) */
    234     log_en = shl(st->log_en_index, 15 - 6);
    235 
    236     /* Divide by 2.625; log_en will be between 0:24  */
    237     log_en = mult(log_en, 12483);
    238     /* the result corresponds to log2(gain) in Q10 */
    239 
    240     /* Find integer part  */
    241     log_en_int_e = (log_en >> 10);
    242 
    243     /* Find fractional part */
    244     log_en_int_m = (Word16) (log_en & 0x3ff);
    245     log_en_int_m = shl(log_en_int_m, 5);
    246 
    247     /* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */
    248     /* Add 16 in order to have the result of pow2 in Q16 */
    249     log_en_int_e = add(log_en_int_e, 16 - 1);
    250 
    251     level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */
    252     exp0 = norm_l(level32);
    253     level32 = (level32 << exp0);        /* level in Q31 */
    254     exp0 = (15 - exp0);
    255     level = extract_h(level32);            /* level in Q15 */
    256 
    257     /* generate white noise vector */
    258     for (i = 0; i < L_FRAME; i++)
    259     {
    260         exc2[i] = (Random(&(st->cng_seed)) >> 4);
    261     }
    262 
    263     /* gain = level / sqrt(ener) * sqrt(L_FRAME) */
    264 
    265     /* energy of generated excitation */
    266     ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp);
    267 
    268     Isqrt_n(&ener32, &exp);
    269 
    270     gain = extract_h(ener32);
    271 
    272     gain = mult(level, gain);              /* gain in Q15 */
    273 
    274     exp = add(exp0, exp);
    275 
    276     /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
    277     exp += 4;
    278 
    279     for (i = 0; i < L_FRAME; i++)
    280     {
    281         tmp = mult(exc2[i], gain);         /* Q0 * Q15 */
    282         exc2[i] = shl(tmp, exp);
    283     }
    284 
    285     return 0;
    286 }
    287 
    288 /**************************************************************************
    289 *
    290 * Function    : dtx_buffer Purpose     : handles the DTX buffer
    291 *
    292 **************************************************************************/
    293 Word16 dtx_buffer(
    294         dtx_encState * st,                    /* i/o : State struct                    */
    295         Word16 isf_new[],                     /* i   : isf vector                      */
    296         Word32 enr,                           /* i   : residual energy (in L_FRAME)    */
    297         Word16 codec_mode
    298         )
    299 {
    300     Word16 log_en;
    301 
    302     Word16 log_en_e;
    303     Word16 log_en_m;
    304     st->hist_ptr = add(st->hist_ptr, 1);
    305     if(st->hist_ptr == DTX_HIST_SIZE)
    306     {
    307         st->hist_ptr = 0;
    308     }
    309     /* copy lsp vector into buffer */
    310     Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M);
    311 
    312     /* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f);  */
    313     Log2(enr, &log_en_e, &log_en_m);
    314 
    315     /* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */
    316     log_en = shl(log_en_e, 7);             /* Q7 */
    317     log_en = add(log_en, shr(log_en_m, 15 - 7));
    318 
    319     /* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The
    320      * constant 0.0059322 takes into account windowings and analysis length from autocorrelation
    321      * computations; 7.39722 in Q7 = 947  */
    322     /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
    323     /* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */
    324 
    325     /* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0  (1024 in Q7) */
    326     /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
    327 
    328     log_en = sub(log_en, add(1024, en_adjust[codec_mode]));
    329 
    330     /* Insert into the buffer */
    331     st->log_en_hist[st->hist_ptr] = log_en;
    332     return 0;
    333 }
    334 
    335 /**************************************************************************
    336 *
    337 * Function    : tx_dtx_handler Purpose     : adds extra speech hangover
    338 *                                            to analyze speech on
    339 *                                            the decoding side.
    340 **************************************************************************/
    341 void tx_dtx_handler(dtx_encState * st,     /* i/o : State struct           */
    342         Word16 vad_flag,                      /* i   : vad decision           */
    343         Word16 * usedMode                     /* i/o : mode changed or not    */
    344         )
    345 {
    346 
    347     /* this state machine is in synch with the GSMEFR txDtx machine      */
    348     st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1);
    349 
    350     if (vad_flag != 0)
    351     {
    352         st->dtxHangoverCount = DTX_HANG_CONST;
    353     } else
    354     {                                      /* non-speech */
    355         if (st->dtxHangoverCount == 0)
    356         {                                  /* out of decoder analysis hangover  */
    357             st->decAnaElapsedCount = 0;
    358             *usedMode = MRDTX;
    359         } else
    360         {                                  /* in possible analysis hangover */
    361             st->dtxHangoverCount = sub(st->dtxHangoverCount, 1);
    362 
    363             /* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */
    364             if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount),
    365                         DTX_ELAPSED_FRAMES_THRESH) < 0)
    366             {
    367                 *usedMode = MRDTX;
    368                 /* if short time since decoder update, do not add extra HO */
    369             }
    370             /* else override VAD and stay in speech mode *usedMode and add extra hangover */
    371         }
    372     }
    373 
    374     return;
    375 }
    376 
    377 
    378 
    379 static void aver_isf_history(
    380         Word16 isf_old[],
    381         Word16 indices[],
    382         Word32 isf_aver[]
    383         )
    384 {
    385     Word32 i, j, k;
    386     Word16 isf_tmp[2 * M];
    387     Word32 L_tmp;
    388 
    389     /* Memorize in isf_tmp[][] the ISF vectors to be replaced by */
    390     /* the median ISF vector prior to the averaging               */
    391     for (k = 0; k < 2; k++)
    392     {
    393         if ((indices[k] + 1) != 0)
    394         {
    395             for (i = 0; i < M; i++)
    396             {
    397                 isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
    398                 isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
    399             }
    400         }
    401     }
    402 
    403     /* Perform the ISF averaging */
    404     for (j = 0; j < M; j++)
    405     {
    406         L_tmp = 0;
    407 
    408         for (i = 0; i < DTX_HIST_SIZE; i++)
    409         {
    410             L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j]));
    411         }
    412         isf_aver[j] = L_tmp;
    413     }
    414 
    415     /* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
    416     for (k = 0; k < 2; k++)
    417     {
    418         if ((indices[k] + 1) != 0)
    419         {
    420             for (i = 0; i < M; i++)
    421             {
    422                 isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
    423             }
    424         }
    425     }
    426 
    427     return;
    428 }
    429 
    430 static void find_frame_indices(
    431         Word16 isf_old_tx[],
    432         Word16 indices[],
    433         dtx_encState * st
    434         )
    435 {
    436     Word32 L_tmp, summin, summax, summax2nd;
    437     Word16 i, j, tmp;
    438     Word16 ptr;
    439 
    440     /* Remove the effect of the oldest frame from the column */
    441     /* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is    */
    442     /* not updated since it will be removed later.           */
    443 
    444     tmp = DTX_HIST_SIZE_MIN_ONE;
    445     j = -1;
    446     for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
    447     {
    448         j = add(j, tmp);
    449         st->sumD[i] = L_sub(st->sumD[i], st->D[j]);
    450         tmp = sub(tmp, 1);
    451     }
    452 
    453     /* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1]    */
    454     /* corresponding to the oldest frame is removed. The sum of     */
    455     /* the distances between the latest isf and other isfs, */
    456     /* i.e. the element sumD[0], will be computed during this call. */
    457     /* Hence this element is initialized to zero.                   */
    458 
    459     for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
    460     {
    461         st->sumD[i] = st->sumD[i - 1];
    462     }
    463     st->sumD[0] = 0;
    464 
    465     /* Remove the oldest frame from the distance matrix.           */
    466     /* Note that the distance matrix is replaced by a one-         */
    467     /* dimensional array to save static memory.                    */
    468 
    469     tmp = 0;
    470     for (i = 27; i >= 12; i = (Word16) (i - tmp))
    471     {
    472         tmp = add(tmp, 1);
    473         for (j = tmp; j > 0; j--)
    474         {
    475             st->D[i - j + 1] = st->D[i - j - tmp];
    476         }
    477     }
    478 
    479     /* Compute the first column of the distance matrix D            */
    480     /* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */
    481 
    482     ptr = st->hist_ptr;
    483     for (i = 1; i < DTX_HIST_SIZE; i++)
    484     {
    485         /* Compute the distance between the latest isf and the other isfs. */
    486         ptr = sub(ptr, 1);
    487         if (ptr < 0)
    488         {
    489             ptr = DTX_HIST_SIZE_MIN_ONE;
    490         }
    491         L_tmp = 0;
    492         for (j = 0; j < M; j++)
    493         {
    494             tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]);
    495             L_tmp = L_mac(L_tmp, tmp, tmp);
    496         }
    497         st->D[i - 1] = L_tmp;
    498 
    499         /* Update also the column sums. */
    500         st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]);
    501         st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]);
    502     }
    503 
    504     /* Find the minimum and maximum distances */
    505     summax = st->sumD[0];
    506     summin = st->sumD[0];
    507     indices[0] = 0;
    508     indices[2] = 0;
    509     for (i = 1; i < DTX_HIST_SIZE; i++)
    510     {
    511         if (L_sub(st->sumD[i], summax) > 0)
    512         {
    513             indices[0] = i;
    514             summax = st->sumD[i];
    515         }
    516         if (L_sub(st->sumD[i], summin) < 0)
    517         {
    518             indices[2] = i;
    519             summin = st->sumD[i];
    520         }
    521     }
    522 
    523     /* Find the second largest distance */
    524     summax2nd = -2147483647L;
    525     indices[1] = -1;
    526     for (i = 0; i < DTX_HIST_SIZE; i++)
    527     {
    528         if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0))
    529         {
    530             indices[1] = i;
    531             summax2nd = st->sumD[i];
    532         }
    533     }
    534 
    535     for (i = 0; i < 3; i++)
    536     {
    537         indices[i] = sub(st->hist_ptr, indices[i]);
    538         if (indices[i] < 0)
    539         {
    540             indices[i] = add(indices[i], DTX_HIST_SIZE);
    541         }
    542     }
    543 
    544     /* If maximum distance/MED_THRESH is smaller than minimum distance */
    545     /* then the median ISF vector replacement is not performed         */
    546     tmp = norm_l(summax);
    547     summax = (summax << tmp);
    548     summin = (summin << tmp);
    549     L_tmp = L_mult(voround(summax), INV_MED_THRESH);
    550     if(L_tmp <= summin)
    551     {
    552         indices[0] = -1;
    553     }
    554     /* If second largest distance/MED_THRESH is smaller than     */
    555     /* minimum distance then the median ISF vector replacement is    */
    556     /* not performed                                                 */
    557     summax2nd = L_shl(summax2nd, tmp);
    558     L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH);
    559     if(L_tmp <= summin)
    560     {
    561         indices[1] = -1;
    562     }
    563     return;
    564 }
    565 
    566 static Word16 dithering_control(
    567         dtx_encState * st
    568         )
    569 {
    570     Word16 tmp, mean, CN_dith, gain_diff;
    571     Word32 i, ISF_diff;
    572 
    573     /* determine how stationary the spectrum of background noise is */
    574     ISF_diff = 0;
    575     for (i = 0; i < 8; i++)
    576     {
    577         ISF_diff = L_add(ISF_diff, st->sumD[i]);
    578     }
    579     if ((ISF_diff >> 26) > 0)
    580     {
    581         CN_dith = 1;
    582     } else
    583     {
    584         CN_dith = 0;
    585     }
    586 
    587     /* determine how stationary the energy of background noise is */
    588     mean = 0;
    589     for (i = 0; i < DTX_HIST_SIZE; i++)
    590     {
    591         mean = add(mean, st->log_en_hist[i]);
    592     }
    593     mean = (mean >> 3);
    594     gain_diff = 0;
    595     for (i = 0; i < DTX_HIST_SIZE; i++)
    596     {
    597         tmp = abs_s(sub(st->log_en_hist[i], mean));
    598         gain_diff = add(gain_diff, tmp);
    599     }
    600     if (gain_diff > GAIN_THR)
    601     {
    602         CN_dith = 1;
    603     }
    604     return CN_dith;
    605 }
    606