Home | History | Annotate | Download | only in libspeex
      1 /* Copyright (C) 2002 Jean-Marc Valin
      2    File: vbr.c
      3 
      4    VBR-related routines
      5 
      6    Redistribution and use in source and binary forms, with or without
      7    modification, are permitted provided that the following conditions
      8    are met:
      9 
     10    - Redistributions of source code must retain the above copyright
     11    notice, this list of conditions and the following disclaimer.
     12 
     13    - Redistributions in binary form must reproduce the above copyright
     14    notice, this list of conditions and the following disclaimer in the
     15    documentation and/or other materials provided with the distribution.
     16 
     17    - Neither the name of the Xiph.org Foundation nor the names of its
     18    contributors may be used to endorse or promote products derived from
     19    this software without specific prior written permission.
     20 
     21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     25    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     26    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     27    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 
     33 */
     34 
     35 #ifdef HAVE_CONFIG_H
     36 #include "config.h"
     37 #endif
     38 
     39 #include "vbr.h"
     40 #include <math.h>
     41 
     42 
     43 #define sqr(x) ((x)*(x))
     44 
     45 #define MIN_ENERGY 6000
     46 #define NOISE_POW .3
     47 
     48 #ifndef DISABLE_VBR
     49 
     50 const float vbr_nb_thresh[9][11]={
     51    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */
     52    { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,  0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */
     53    {10.0f,  6.5f,  5.2f,  4.5f,  3.9f,  3.5f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */
     54    {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  3.9f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */
     55    {11.0f, 11.0f,  9.9f,  8.5f,  7.0f,  6.0f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */
     56    {11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  8.0f,  7.0f,  6.0f,  5.0f,  3.0f}, /* 15 kbps */
     57    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  7.0f,  6.0f,  5.0f}, /* 18 kbps */
     58    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  9.5f,  7.5f}, /* 24 kbps */
     59    { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  2.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */
     60 };
     61 
     62 
     63 const float vbr_hb_thresh[5][11]={
     64    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
     65    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */
     66    {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */
     67    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */
     68    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */
     69 };
     70 
     71 const float vbr_uhb_thresh[2][11]={
     72    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
     73    { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */
     74 };
     75 
     76 void vbr_init(VBRState *vbr)
     77 {
     78    int i;
     79 
     80    vbr->average_energy=0;
     81    vbr->last_energy=1;
     82    vbr->accum_sum=0;
     83    vbr->energy_alpha=.1;
     84    vbr->soft_pitch=0;
     85    vbr->last_pitch_coef=0;
     86    vbr->last_quality=0;
     87 
     88    vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
     89    vbr->noise_accum_count=.05;
     90    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
     91    vbr->consec_noise=0;
     92 
     93 
     94    for (i=0;i<VBR_MEMORY_SIZE;i++)
     95       vbr->last_log_energy[i] = log(MIN_ENERGY);
     96 }
     97 
     98 
     99 /*
    100   This function should analyse the signal and decide how critical the
    101   coding error will be perceptually. The following factors should be
    102   taken into account:
    103 
    104   -Attacks (positive energy derivative) should be coded with more bits
    105 
    106   -Stationary voiced segments should receive more bits
    107 
    108   -Segments with (very) low absolute energy should receive less bits (maybe
    109   only shaped noise?)
    110 
    111   -DTX for near-zero energy?
    112 
    113   -Stationary fricative segments should have less bits
    114 
    115   -Temporal masking: when energy slope is decreasing, decrease the bit-rate
    116 
    117   -Decrease bit-rate for males (low pitch)?
    118 
    119   -(wideband only) less bits in the high-band when signal is very
    120   non-stationary (harder to notice high-frequency noise)???
    121 
    122 */
    123 
    124 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
    125 {
    126    int i;
    127    float ener=0, ener1=0, ener2=0;
    128    float qual=7;
    129    int va;
    130    float log_energy;
    131    float non_st=0;
    132    float voicing;
    133    float pow_ener;
    134 
    135    for (i=0;i<len>>1;i++)
    136       ener1 += ((float)sig[i])*sig[i];
    137 
    138    for (i=len>>1;i<len;i++)
    139       ener2 += ((float)sig[i])*sig[i];
    140    ener=ener1+ener2;
    141 
    142    log_energy = log(ener+MIN_ENERGY);
    143    for (i=0;i<VBR_MEMORY_SIZE;i++)
    144       non_st += sqr(log_energy-vbr->last_log_energy[i]);
    145    non_st =  non_st/(30*VBR_MEMORY_SIZE);
    146    if (non_st>1)
    147       non_st=1;
    148 
    149    voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
    150    vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
    151    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
    152    pow_ener = pow(ener,NOISE_POW);
    153    if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
    154       vbr->noise_accum = .05*pow_ener;
    155 
    156    if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
    157        || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
    158        || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
    159        || (voicing<0 && non_st < .05))
    160    {
    161       float tmp;
    162       va = 0;
    163       vbr->consec_noise++;
    164       if (pow_ener > 3*vbr->noise_level)
    165          tmp = 3*vbr->noise_level;
    166       else
    167          tmp = pow_ener;
    168       if (vbr->consec_noise>=4)
    169       {
    170          vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
    171          vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
    172       }
    173    } else {
    174       va = 1;
    175       vbr->consec_noise=0;
    176    }
    177 
    178    if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
    179    {
    180       vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
    181       vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
    182    }
    183 
    184    /* Checking for very low absolute energy */
    185    if (ener < 30000)
    186    {
    187       qual -= .7;
    188       if (ener < 10000)
    189          qual-=.7;
    190       if (ener < 3000)
    191          qual-=.7;
    192    } else {
    193       float short_diff, long_diff;
    194       short_diff = log((ener+1)/(1+vbr->last_energy));
    195       long_diff = log((ener+1)/(1+vbr->average_energy));
    196       /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
    197 
    198       if (long_diff<-5)
    199          long_diff=-5;
    200       if (long_diff>2)
    201          long_diff=2;
    202 
    203       if (long_diff>0)
    204          qual += .6*long_diff;
    205       if (long_diff<0)
    206          qual += .5*long_diff;
    207       if (short_diff>0)
    208       {
    209          if (short_diff>5)
    210             short_diff=5;
    211          qual += .5*short_diff;
    212       }
    213       /* Checking for energy increases */
    214       if (ener2 > 1.6*ener1)
    215          qual += .5;
    216    }
    217    vbr->last_energy = ener;
    218    vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
    219    qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
    220 
    221    if (qual < vbr->last_quality)
    222       qual = .5*qual + .5*vbr->last_quality;
    223    if (qual<4)
    224       qual=4;
    225    if (qual>10)
    226       qual=10;
    227 
    228    /*
    229    if (vbr->consec_noise>=2)
    230       qual-=1.3;
    231    if (vbr->consec_noise>=5)
    232       qual-=1.3;
    233    if (vbr->consec_noise>=12)
    234       qual-=1.3;
    235    */
    236    if (vbr->consec_noise>=3)
    237       qual=4;
    238 
    239    if (vbr->consec_noise)
    240       qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
    241    if (qual<0)
    242       qual=0;
    243 
    244    if (ener<60000)
    245    {
    246       if (vbr->consec_noise>2)
    247          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
    248       if (ener<10000&&vbr->consec_noise>2)
    249          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
    250       if (qual<0)
    251          qual=0;
    252       qual += .3*log(.0001+ener/60000.0);
    253    }
    254    if (qual<-1)
    255       qual=-1;
    256 
    257    /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
    258 
    259    vbr->last_pitch_coef = pitch_coef;
    260    vbr->last_quality = qual;
    261 
    262    for (i=VBR_MEMORY_SIZE-1;i>0;i--)
    263       vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
    264    vbr->last_log_energy[0] = log_energy;
    265 
    266    /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
    267 
    268    return qual;
    269 }
    270 
    271 void vbr_destroy(VBRState *vbr)
    272 {
    273 }
    274 
    275 #endif /* #ifndef DISABLE_VBR */
    276