1 /* Copyright (C) 2002 Jean-Marc Valin 2 File: vbr.c 3 4 VBR-related routines 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 - Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 13 - Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 - Neither the name of the Xiph.org Foundation nor the names of its 18 contributors may be used to endorse or promote products derived from 19 this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 */ 34 35 #ifdef HAVE_CONFIG_H 36 #include "config.h" 37 #endif 38 39 #include "vbr.h" 40 #include <math.h> 41 42 43 #define sqr(x) ((x)*(x)) 44 45 #define MIN_ENERGY 6000 46 #define NOISE_POW .3 47 48 #ifndef DISABLE_VBR 49 50 const float vbr_nb_thresh[9][11]={ 51 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */ 52 { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */ 53 {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */ 54 {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */ 55 {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */ 56 {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */ 57 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */ 58 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */ 59 { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */ 60 }; 61 62 63 const float vbr_hb_thresh[5][11]={ 64 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ 65 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */ 66 {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */ 67 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */ 68 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */ 69 }; 70 71 const float vbr_uhb_thresh[2][11]={ 72 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */ 73 { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */ 74 }; 75 76 void vbr_init(VBRState *vbr) 77 { 78 int i; 79 80 vbr->average_energy=0; 81 vbr->last_energy=1; 82 vbr->accum_sum=0; 83 vbr->energy_alpha=.1; 84 vbr->soft_pitch=0; 85 vbr->last_pitch_coef=0; 86 vbr->last_quality=0; 87 88 vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); 89 vbr->noise_accum_count=.05; 90 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; 91 vbr->consec_noise=0; 92 93 94 for (i=0;i<VBR_MEMORY_SIZE;i++) 95 vbr->last_log_energy[i] = log(MIN_ENERGY); 96 } 97 98 99 /* 100 This function should analyse the signal and decide how critical the 101 coding error will be perceptually. The following factors should be 102 taken into account: 103 104 -Attacks (positive energy derivative) should be coded with more bits 105 106 -Stationary voiced segments should receive more bits 107 108 -Segments with (very) low absolute energy should receive less bits (maybe 109 only shaped noise?) 110 111 -DTX for near-zero energy? 112 113 -Stationary fricative segments should have less bits 114 115 -Temporal masking: when energy slope is decreasing, decrease the bit-rate 116 117 -Decrease bit-rate for males (low pitch)? 118 119 -(wideband only) less bits in the high-band when signal is very 120 non-stationary (harder to notice high-frequency noise)??? 121 122 */ 123 124 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef) 125 { 126 int i; 127 float ener=0, ener1=0, ener2=0; 128 float qual=7; 129 int va; 130 float log_energy; 131 float non_st=0; 132 float voicing; 133 float pow_ener; 134 135 for (i=0;i<len>>1;i++) 136 ener1 += ((float)sig[i])*sig[i]; 137 138 for (i=len>>1;i<len;i++) 139 ener2 += ((float)sig[i])*sig[i]; 140 ener=ener1+ener2; 141 142 log_energy = log(ener+MIN_ENERGY); 143 for (i=0;i<VBR_MEMORY_SIZE;i++) 144 non_st += sqr(log_energy-vbr->last_log_energy[i]); 145 non_st = non_st/(30*VBR_MEMORY_SIZE); 146 if (non_st>1) 147 non_st=1; 148 149 voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); 150 vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener; 151 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; 152 pow_ener = pow(ener,NOISE_POW); 153 if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) 154 vbr->noise_accum = .05*pow_ener; 155 156 if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) 157 || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) 158 || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) 159 || (voicing<0 && non_st < .05)) 160 { 161 float tmp; 162 va = 0; 163 vbr->consec_noise++; 164 if (pow_ener > 3*vbr->noise_level) 165 tmp = 3*vbr->noise_level; 166 else 167 tmp = pow_ener; 168 if (vbr->consec_noise>=4) 169 { 170 vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; 171 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; 172 } 173 } else { 174 va = 1; 175 vbr->consec_noise=0; 176 } 177 178 if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) 179 { 180 vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; 181 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; 182 } 183 184 /* Checking for very low absolute energy */ 185 if (ener < 30000) 186 { 187 qual -= .7; 188 if (ener < 10000) 189 qual-=.7; 190 if (ener < 3000) 191 qual-=.7; 192 } else { 193 float short_diff, long_diff; 194 short_diff = log((ener+1)/(1+vbr->last_energy)); 195 long_diff = log((ener+1)/(1+vbr->average_energy)); 196 /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ 197 198 if (long_diff<-5) 199 long_diff=-5; 200 if (long_diff>2) 201 long_diff=2; 202 203 if (long_diff>0) 204 qual += .6*long_diff; 205 if (long_diff<0) 206 qual += .5*long_diff; 207 if (short_diff>0) 208 { 209 if (short_diff>5) 210 short_diff=5; 211 qual += .5*short_diff; 212 } 213 /* Checking for energy increases */ 214 if (ener2 > 1.6*ener1) 215 qual += .5; 216 } 217 vbr->last_energy = ener; 218 vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; 219 qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); 220 221 if (qual < vbr->last_quality) 222 qual = .5*qual + .5*vbr->last_quality; 223 if (qual<4) 224 qual=4; 225 if (qual>10) 226 qual=10; 227 228 /* 229 if (vbr->consec_noise>=2) 230 qual-=1.3; 231 if (vbr->consec_noise>=5) 232 qual-=1.3; 233 if (vbr->consec_noise>=12) 234 qual-=1.3; 235 */ 236 if (vbr->consec_noise>=3) 237 qual=4; 238 239 if (vbr->consec_noise) 240 qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); 241 if (qual<0) 242 qual=0; 243 244 if (ener<60000) 245 { 246 if (vbr->consec_noise>2) 247 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); 248 if (ener<10000&&vbr->consec_noise>2) 249 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); 250 if (qual<0) 251 qual=0; 252 qual += .3*log(.0001+ener/60000.0); 253 } 254 if (qual<-1) 255 qual=-1; 256 257 /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/ 258 259 vbr->last_pitch_coef = pitch_coef; 260 vbr->last_quality = qual; 261 262 for (i=VBR_MEMORY_SIZE-1;i>0;i--) 263 vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; 264 vbr->last_log_energy[0] = log_energy; 265 266 /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ 267 268 return qual; 269 } 270 271 void vbr_destroy(VBRState *vbr) 272 { 273 } 274 275 #endif /* #ifndef DISABLE_VBR */ 276