1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2010 Xiph.Org Foundation 3 Copyright (c) 2008 Gregory Maxwell 4 Written by Jean-Marc Valin and Gregory Maxwell */ 5 /* 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 - Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 13 - Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #define CELT_ENCODER_C 35 36 #include "cpu_support.h" 37 #include "os_support.h" 38 #include "mdct.h" 39 #include <math.h> 40 #include "celt.h" 41 #include "pitch.h" 42 #include "bands.h" 43 #include "modes.h" 44 #include "entcode.h" 45 #include "quant_bands.h" 46 #include "rate.h" 47 #include "stack_alloc.h" 48 #include "mathops.h" 49 #include "float_cast.h" 50 #include <stdarg.h> 51 #include "celt_lpc.h" 52 #include "vq.h" 53 54 55 /** Encoder state 56 @brief Encoder state 57 */ 58 struct OpusCustomEncoder { 59 const OpusCustomMode *mode; /**< Mode used by the encoder */ 60 int channels; 61 int stream_channels; 62 63 int force_intra; 64 int clip; 65 int disable_pf; 66 int complexity; 67 int upsample; 68 int start, end; 69 70 opus_int32 bitrate; 71 int vbr; 72 int signalling; 73 int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ 74 int loss_rate; 75 int lsb_depth; 76 int lfe; 77 int disable_inv; 78 int arch; 79 80 /* Everything beyond this point gets cleared on a reset */ 81 #define ENCODER_RESET_START rng 82 83 opus_uint32 rng; 84 int spread_decision; 85 opus_val32 delayedIntra; 86 int tonal_average; 87 int lastCodedBands; 88 int hf_average; 89 int tapset_decision; 90 91 int prefilter_period; 92 opus_val16 prefilter_gain; 93 int prefilter_tapset; 94 #ifdef RESYNTH 95 int prefilter_period_old; 96 opus_val16 prefilter_gain_old; 97 int prefilter_tapset_old; 98 #endif 99 int consec_transient; 100 AnalysisInfo analysis; 101 SILKInfo silk_info; 102 103 opus_val32 preemph_memE[2]; 104 opus_val32 preemph_memD[2]; 105 106 /* VBR-related parameters */ 107 opus_int32 vbr_reservoir; 108 opus_int32 vbr_drift; 109 opus_int32 vbr_offset; 110 opus_int32 vbr_count; 111 opus_val32 overlap_max; 112 opus_val16 stereo_saving; 113 int intensity; 114 opus_val16 *energy_mask; 115 opus_val16 spec_avg; 116 117 #ifdef RESYNTH 118 /* +MAX_PERIOD/2 to make space for overlap */ 119 celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2]; 120 #endif 121 122 celt_sig in_mem[1]; /* Size = channels*mode->overlap */ 123 /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */ 124 /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ 125 /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ 126 /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ 127 /* opus_val16 energyError[], Size = channels*mode->nbEBands */ 128 }; 129 130 int celt_encoder_get_size(int channels) 131 { 132 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); 133 return opus_custom_encoder_get_size(mode, channels); 134 } 135 136 OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) 137 { 138 int size = sizeof(struct CELTEncoder) 139 + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ 140 + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ 141 + 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ 142 /* opus_val16 oldLogE[channels*mode->nbEBands]; */ 143 /* opus_val16 oldLogE2[channels*mode->nbEBands]; */ 144 /* opus_val16 energyError[channels*mode->nbEBands]; */ 145 return size; 146 } 147 148 #ifdef CUSTOM_MODES 149 CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) 150 { 151 int ret; 152 CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); 153 /* init will handle the NULL case */ 154 ret = opus_custom_encoder_init(st, mode, channels); 155 if (ret != OPUS_OK) 156 { 157 opus_custom_encoder_destroy(st); 158 st = NULL; 159 } 160 if (error) 161 *error = ret; 162 return st; 163 } 164 #endif /* CUSTOM_MODES */ 165 166 static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, 167 int channels, int arch) 168 { 169 if (channels < 0 || channels > 2) 170 return OPUS_BAD_ARG; 171 172 if (st==NULL || mode==NULL) 173 return OPUS_ALLOC_FAIL; 174 175 OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); 176 177 st->mode = mode; 178 st->stream_channels = st->channels = channels; 179 180 st->upsample = 1; 181 st->start = 0; 182 st->end = st->mode->effEBands; 183 st->signalling = 1; 184 st->arch = arch; 185 186 st->constrained_vbr = 1; 187 st->clip = 1; 188 189 st->bitrate = OPUS_BITRATE_MAX; 190 st->vbr = 0; 191 st->force_intra = 0; 192 st->complexity = 5; 193 st->lsb_depth=24; 194 195 opus_custom_encoder_ctl(st, OPUS_RESET_STATE); 196 197 return OPUS_OK; 198 } 199 200 #ifdef CUSTOM_MODES 201 int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) 202 { 203 return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); 204 } 205 #endif 206 207 int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, 208 int arch) 209 { 210 int ret; 211 ret = opus_custom_encoder_init_arch(st, 212 opus_custom_mode_create(48000, 960, NULL), channels, arch); 213 if (ret != OPUS_OK) 214 return ret; 215 st->upsample = resampling_factor(sampling_rate); 216 return OPUS_OK; 217 } 218 219 #ifdef CUSTOM_MODES 220 void opus_custom_encoder_destroy(CELTEncoder *st) 221 { 222 opus_free(st); 223 } 224 #endif /* CUSTOM_MODES */ 225 226 227 static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, 228 opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients, 229 int *weak_transient) 230 { 231 int i; 232 VARDECL(opus_val16, tmp); 233 opus_val32 mem0,mem1; 234 int is_transient = 0; 235 opus_int32 mask_metric = 0; 236 int c; 237 opus_val16 tf_max; 238 int len2; 239 /* Forward masking: 6.7 dB/ms. */ 240 #ifdef FIXED_POINT 241 int forward_shift = 4; 242 #else 243 opus_val16 forward_decay = QCONST16(.0625f,15); 244 #endif 245 /* Table of 6*64/x, trained on real data to minimize the average error */ 246 static const unsigned char inv_table[128] = { 247 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, 248 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, 249 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, 250 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 251 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 252 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 253 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 254 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 255 }; 256 SAVE_STACK; 257 ALLOC(tmp, len, opus_val16); 258 259 *weak_transient = 0; 260 /* For lower bitrates, let's be more conservative and have a forward masking 261 decay of 3.3 dB/ms. This avoids having to code transients at very low 262 bitrate (mostly for hybrid), which can result in unstable energy and/or 263 partial collapse. */ 264 if (allow_weak_transients) 265 { 266 #ifdef FIXED_POINT 267 forward_shift = 5; 268 #else 269 forward_decay = QCONST16(.03125f,15); 270 #endif 271 } 272 len2=len/2; 273 for (c=0;c<C;c++) 274 { 275 opus_val32 mean; 276 opus_int32 unmask=0; 277 opus_val32 norm; 278 opus_val16 maxE; 279 mem0=0; 280 mem1=0; 281 /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ 282 for (i=0;i<len;i++) 283 { 284 opus_val32 x,y; 285 x = SHR32(in[i+c*len],SIG_SHIFT); 286 y = ADD32(mem0, x); 287 #ifdef FIXED_POINT 288 mem0 = mem1 + y - SHL32(x,1); 289 mem1 = x - SHR32(y,1); 290 #else 291 mem0 = mem1 + y - 2*x; 292 mem1 = x - .5f*y; 293 #endif 294 tmp[i] = SROUND16(y, 2); 295 /*printf("%f ", tmp[i]);*/ 296 } 297 /*printf("\n");*/ 298 /* First few samples are bad because we don't propagate the memory */ 299 OPUS_CLEAR(tmp, 12); 300 301 #ifdef FIXED_POINT 302 /* Normalize tmp to max range */ 303 { 304 int shift=0; 305 shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len))); 306 if (shift!=0) 307 { 308 for (i=0;i<len;i++) 309 tmp[i] = SHL16(tmp[i], shift); 310 } 311 } 312 #endif 313 314 mean=0; 315 mem0=0; 316 /* Grouping by two to reduce complexity */ 317 /* Forward pass to compute the post-echo threshold*/ 318 for (i=0;i<len2;i++) 319 { 320 opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16); 321 mean += x2; 322 #ifdef FIXED_POINT 323 /* FIXME: Use PSHR16() instead */ 324 tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift); 325 #else 326 tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0); 327 #endif 328 mem0 = tmp[i]; 329 } 330 331 mem0=0; 332 maxE=0; 333 /* Backward pass to compute the pre-echo threshold */ 334 for (i=len2-1;i>=0;i--) 335 { 336 /* Backward masking: 13.9 dB/ms. */ 337 #ifdef FIXED_POINT 338 /* FIXME: Use PSHR16() instead */ 339 tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); 340 #else 341 tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0); 342 #endif 343 mem0 = tmp[i]; 344 maxE = MAX16(maxE, mem0); 345 } 346 /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/ 347 348 /* Compute the ratio of the "frame energy" over the harmonic mean of the energy. 349 This essentially corresponds to a bitrate-normalized temporal noise-to-mask 350 ratio */ 351 352 /* As a compromise with the old transient detector, frame energy is the 353 geometric mean of the energy and half the max */ 354 #ifdef FIXED_POINT 355 /* Costs two sqrt() to avoid overflows */ 356 mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1))); 357 #else 358 mean = celt_sqrt(mean * maxE*.5*len2); 359 #endif 360 /* Inverse of the mean energy in Q15+6 */ 361 norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1)); 362 /* Compute harmonic mean discarding the unreliable boundaries 363 The data is smooth, so we only take 1/4th of the samples */ 364 unmask=0; 365 /* We should never see NaNs here. If we find any, then something really bad happened and we better abort 366 before it does any damage later on. If these asserts are disabled (no hardening), then the table 367 lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX 368 that crash on NaN since it could result in a worse issue later on. */ 369 celt_assert(!celt_isnan(tmp[0])); 370 celt_assert(!celt_isnan(norm)); 371 for (i=12;i<len2-5;i+=4) 372 { 373 int id; 374 #ifdef FIXED_POINT 375 id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ 376 #else 377 id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ 378 #endif 379 unmask += inv_table[id]; 380 } 381 /*printf("%d\n", unmask);*/ 382 /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */ 383 unmask = 64*unmask*4/(6*(len2-17)); 384 if (unmask>mask_metric) 385 { 386 *tf_chan = c; 387 mask_metric = unmask; 388 } 389 } 390 is_transient = mask_metric>200; 391 /* For low bitrates, define "weak transients" that need to be 392 handled differently to avoid partial collapse. */ 393 if (allow_weak_transients && is_transient && mask_metric<600) { 394 is_transient = 0; 395 *weak_transient = 1; 396 } 397 /* Arbitrary metric for VBR boost */ 398 tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); 399 /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ 400 *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); 401 /*printf("%d %f\n", tf_max, mask_metric);*/ 402 RESTORE_STACK; 403 #ifdef FUZZING 404 is_transient = rand()&0x1; 405 #endif 406 /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ 407 return is_transient; 408 } 409 410 /* Looks for sudden increases of energy to decide whether we need to patch 411 the transient decision */ 412 static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, 413 int start, int end, int C) 414 { 415 int i, c; 416 opus_val32 mean_diff=0; 417 opus_val16 spread_old[26]; 418 /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to 419 avoid false detection caused by irrelevant bands */ 420 if (C==1) 421 { 422 spread_old[start] = oldE[start]; 423 for (i=start+1;i<end;i++) 424 spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]); 425 } else { 426 spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]); 427 for (i=start+1;i<end;i++) 428 spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), 429 MAX16(oldE[i],oldE[i+nbEBands])); 430 } 431 for (i=end-2;i>=start;i--) 432 spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); 433 /* Compute mean increase */ 434 c=0; do { 435 for (i=IMAX(2,start);i<end-1;i++) 436 { 437 opus_val16 x1, x2; 438 x1 = MAX16(0, newE[i + c*nbEBands]); 439 x2 = MAX16(0, spread_old[i]); 440 mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2)))); 441 } 442 } while (++c<C); 443 mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start))); 444 /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ 445 return mean_diff > QCONST16(1.f, DB_SHIFT); 446 } 447 448 /** Apply window and compute the MDCT for all sub-frames and 449 all channels in a frame */ 450 static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, 451 celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, 452 int arch) 453 { 454 const int overlap = mode->overlap; 455 int N; 456 int B; 457 int shift; 458 int i, b, c; 459 if (shortBlocks) 460 { 461 B = shortBlocks; 462 N = mode->shortMdctSize; 463 shift = mode->maxLM; 464 } else { 465 B = 1; 466 N = mode->shortMdctSize<<LM; 467 shift = mode->maxLM-LM; 468 } 469 c=0; do { 470 for (b=0;b<B;b++) 471 { 472 /* Interleaving the sub-frames while doing the MDCTs */ 473 clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, 474 &out[b+c*N*B], mode->window, overlap, shift, B, 475 arch); 476 } 477 } while (++c<CC); 478 if (CC==2&&C==1) 479 { 480 for (i=0;i<B*N;i++) 481 out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i])); 482 } 483 if (upsample != 1) 484 { 485 c=0; do 486 { 487 int bound = B*N/upsample; 488 for (i=0;i<bound;i++) 489 out[c*B*N+i] *= upsample; 490 OPUS_CLEAR(&out[c*B*N+bound], B*N-bound); 491 } while (++c<C); 492 } 493 } 494 495 496 void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, 497 int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) 498 { 499 int i; 500 opus_val16 coef0; 501 celt_sig m; 502 int Nu; 503 504 coef0 = coef[0]; 505 m = *mem; 506 507 /* Fast path for the normal 48kHz case and no clipping */ 508 if (coef[1] == 0 && upsample == 1 && !clip) 509 { 510 for (i=0;i<N;i++) 511 { 512 opus_val16 x; 513 x = SCALEIN(pcmp[CC*i]); 514 /* Apply pre-emphasis */ 515 inp[i] = SHL32(x, SIG_SHIFT) - m; 516 m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); 517 } 518 *mem = m; 519 return; 520 } 521 522 Nu = N/upsample; 523 if (upsample!=1) 524 { 525 OPUS_CLEAR(inp, N); 526 } 527 for (i=0;i<Nu;i++) 528 inp[i*upsample] = SCALEIN(pcmp[CC*i]); 529 530 #ifndef FIXED_POINT 531 if (clip) 532 { 533 /* Clip input to avoid encoding non-portable files */ 534 for (i=0;i<Nu;i++) 535 inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample])); 536 } 537 #else 538 (void)clip; /* Avoids a warning about clip being unused. */ 539 #endif 540 #ifdef CUSTOM_MODES 541 if (coef[1] != 0) 542 { 543 opus_val16 coef1 = coef[1]; 544 opus_val16 coef2 = coef[2]; 545 for (i=0;i<N;i++) 546 { 547 celt_sig x, tmp; 548 x = inp[i]; 549 /* Apply pre-emphasis */ 550 tmp = MULT16_16(coef2, x); 551 inp[i] = tmp + m; 552 m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp); 553 } 554 } else 555 #endif 556 { 557 for (i=0;i<N;i++) 558 { 559 opus_val16 x; 560 x = inp[i]; 561 /* Apply pre-emphasis */ 562 inp[i] = SHL32(x, SIG_SHIFT) - m; 563 m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT); 564 } 565 } 566 *mem = m; 567 } 568 569 570 571 static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias) 572 { 573 int i; 574 opus_val32 L1; 575 L1 = 0; 576 for (i=0;i<N;i++) 577 L1 += EXTEND32(ABS16(tmp[i])); 578 /* When in doubt, prefer good freq resolution */ 579 L1 = MAC16_32_Q15(L1, LM*bias, L1); 580 return L1; 581 582 } 583 584 static int tf_analysis(const CELTMode *m, int len, int isTransient, 585 int *tf_res, int lambda, celt_norm *X, int N0, int LM, 586 opus_val16 tf_estimate, int tf_chan, int *importance) 587 { 588 int i; 589 VARDECL(int, metric); 590 int cost0; 591 int cost1; 592 VARDECL(int, path0); 593 VARDECL(int, path1); 594 VARDECL(celt_norm, tmp); 595 VARDECL(celt_norm, tmp_1); 596 int sel; 597 int selcost[2]; 598 int tf_select=0; 599 opus_val16 bias; 600 601 SAVE_STACK; 602 bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate)); 603 /*printf("%f ", bias);*/ 604 605 ALLOC(metric, len, int); 606 ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); 607 ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); 608 ALLOC(path0, len, int); 609 ALLOC(path1, len, int); 610 611 for (i=0;i<len;i++) 612 { 613 int k, N; 614 int narrow; 615 opus_val32 L1, best_L1; 616 int best_level=0; 617 N = (m->eBands[i+1]-m->eBands[i])<<LM; 618 /* band is too narrow to be split down to LM=-1 */ 619 narrow = (m->eBands[i+1]-m->eBands[i])==1; 620 OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N); 621 /* Just add the right channel if we're in stereo */ 622 /*if (C==2) 623 for (j=0;j<N;j++) 624 tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/ 625 L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias); 626 best_L1 = L1; 627 /* Check the -1 case for transients */ 628 if (isTransient && !narrow) 629 { 630 OPUS_COPY(tmp_1, tmp, N); 631 haar1(tmp_1, N>>LM, 1<<LM); 632 L1 = l1_metric(tmp_1, N, LM+1, bias); 633 if (L1<best_L1) 634 { 635 best_L1 = L1; 636 best_level = -1; 637 } 638 } 639 /*printf ("%f ", L1);*/ 640 for (k=0;k<LM+!(isTransient||narrow);k++) 641 { 642 int B; 643 644 if (isTransient) 645 B = (LM-k-1); 646 else 647 B = k+1; 648 649 haar1(tmp, N>>k, 1<<k); 650 651 L1 = l1_metric(tmp, N, B, bias); 652 653 if (L1 < best_L1) 654 { 655 best_L1 = L1; 656 best_level = k+1; 657 } 658 } 659 /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ 660 /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */ 661 if (isTransient) 662 metric[i] = 2*best_level; 663 else 664 metric[i] = -2*best_level; 665 /* For bands that can't be split to -1, set the metric to the half-way point to avoid 666 biasing the decision */ 667 if (narrow && (metric[i]==0 || metric[i]==-2*LM)) 668 metric[i]-=1; 669 /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/ 670 } 671 /*printf("\n");*/ 672 /* Search for the optimal tf resolution, including tf_select */ 673 tf_select = 0; 674 for (sel=0;sel<2;sel++) 675 { 676 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]); 677 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda); 678 for (i=1;i<len;i++) 679 { 680 int curr0, curr1; 681 curr0 = IMIN(cost0, cost1 + lambda); 682 curr1 = IMIN(cost0 + lambda, cost1); 683 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); 684 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); 685 } 686 cost0 = IMIN(cost0, cost1); 687 selcost[sel]=cost0; 688 } 689 /* For now, we're conservative and only allow tf_select=1 for transients. 690 * If tests confirm it's useful for non-transients, we could allow it. */ 691 if (selcost[1]<selcost[0] && isTransient) 692 tf_select=1; 693 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); 694 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda); 695 /* Viterbi forward pass */ 696 for (i=1;i<len;i++) 697 { 698 int curr0, curr1; 699 int from0, from1; 700 701 from0 = cost0; 702 from1 = cost1 + lambda; 703 if (from0 < from1) 704 { 705 curr0 = from0; 706 path0[i]= 0; 707 } else { 708 curr0 = from1; 709 path0[i]= 1; 710 } 711 712 from0 = cost0 + lambda; 713 from1 = cost1; 714 if (from0 < from1) 715 { 716 curr1 = from0; 717 path1[i]= 0; 718 } else { 719 curr1 = from1; 720 path1[i]= 1; 721 } 722 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); 723 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); 724 } 725 tf_res[len-1] = cost0 < cost1 ? 0 : 1; 726 /* Viterbi backward pass to check the decisions */ 727 for (i=len-2;i>=0;i--) 728 { 729 if (tf_res[i+1] == 1) 730 tf_res[i] = path1[i+1]; 731 else 732 tf_res[i] = path0[i+1]; 733 } 734 /*printf("%d %f\n", *tf_sum, tf_estimate);*/ 735 RESTORE_STACK; 736 #ifdef FUZZING 737 tf_select = rand()&0x1; 738 tf_res[0] = rand()&0x1; 739 for (i=1;i<len;i++) 740 tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0); 741 #endif 742 return tf_select; 743 } 744 745 static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) 746 { 747 int curr, i; 748 int tf_select_rsv; 749 int tf_changed; 750 int logp; 751 opus_uint32 budget; 752 opus_uint32 tell; 753 budget = enc->storage*8; 754 tell = ec_tell(enc); 755 logp = isTransient ? 2 : 4; 756 /* Reserve space to code the tf_select decision. */ 757 tf_select_rsv = LM>0 && tell+logp+1 <= budget; 758 budget -= tf_select_rsv; 759 curr = tf_changed = 0; 760 for (i=start;i<end;i++) 761 { 762 if (tell+logp<=budget) 763 { 764 ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp); 765 tell = ec_tell(enc); 766 curr = tf_res[i]; 767 tf_changed |= curr; 768 } 769 else 770 tf_res[i] = curr; 771 logp = isTransient ? 4 : 5; 772 } 773 /* Only code tf_select if it would actually make a difference. */ 774 if (tf_select_rsv && 775 tf_select_table[LM][4*isTransient+0+tf_changed]!= 776 tf_select_table[LM][4*isTransient+2+tf_changed]) 777 ec_enc_bit_logp(enc, tf_select, 1); 778 else 779 tf_select = 0; 780 for (i=start;i<end;i++) 781 tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; 782 /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ 783 } 784 785 786 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, 787 const opus_val16 *bandLogE, int end, int LM, int C, int N0, 788 AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, 789 int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch) 790 { 791 int i; 792 opus_val32 diff=0; 793 int c; 794 int trim_index; 795 opus_val16 trim = QCONST16(5.f, 8); 796 opus_val16 logXC, logXC2; 797 /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less 798 clear what's best, so we're keeping it as it was before, at least for now. */ 799 if (equiv_rate < 64000) { 800 trim = QCONST16(4.f, 8); 801 } else if (equiv_rate < 80000) { 802 opus_int32 frac = (equiv_rate-64000) >> 10; 803 trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac; 804 } 805 if (C==2) 806 { 807 opus_val16 sum = 0; /* Q10 */ 808 opus_val16 minXC; /* Q10 */ 809 /* Compute inter-channel correlation for low frequencies */ 810 for (i=0;i<8;i++) 811 { 812 opus_val32 partial; 813 partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], 814 (m->eBands[i+1]-m->eBands[i])<<LM, arch); 815 sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); 816 } 817 sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); 818 sum = MIN16(QCONST16(1.f, 10), ABS16(sum)); 819 minXC = sum; 820 for (i=8;i<intensity;i++) 821 { 822 opus_val32 partial; 823 partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], 824 (m->eBands[i+1]-m->eBands[i])<<LM, arch); 825 minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18)))); 826 } 827 minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC)); 828 /*printf ("%f\n", sum);*/ 829 /* mid-side savings estimations based on the LF average*/ 830 logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); 831 /* mid-side savings estimations based on min correlation */ 832 logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC))); 833 #ifdef FIXED_POINT 834 /* Compensate for Q20 vs Q14 input and convert output to Q8 */ 835 logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); 836 logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); 837 #endif 838 839 trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC)); 840 *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2)); 841 } 842 843 /* Estimate spectral tilt */ 844 c=0; do { 845 for (i=0;i<end-1;i++) 846 { 847 diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end); 848 } 849 } while (++c<C); 850 diff /= C*(end-1); 851 /*printf("%f\n", diff);*/ 852 trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); 853 trim -= SHR16(surround_trim, DB_SHIFT-8); 854 trim -= 2*SHR16(tf_estimate, 14-8); 855 #ifndef DISABLE_FLOAT_API 856 if (analysis->valid) 857 { 858 trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 859 (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); 860 } 861 #else 862 (void)analysis; 863 #endif 864 865 #ifdef FIXED_POINT 866 trim_index = PSHR32(trim, 8); 867 #else 868 trim_index = (int)floor(.5f+trim); 869 #endif 870 trim_index = IMAX(0, IMIN(10, trim_index)); 871 /*printf("%d\n", trim_index);*/ 872 #ifdef FUZZING 873 trim_index = rand()%11; 874 #endif 875 return trim_index; 876 } 877 878 static int stereo_analysis(const CELTMode *m, const celt_norm *X, 879 int LM, int N0) 880 { 881 int i; 882 int thetas; 883 opus_val32 sumLR = EPSILON, sumMS = EPSILON; 884 885 /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ 886 for (i=0;i<13;i++) 887 { 888 int j; 889 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) 890 { 891 opus_val32 L, R, M, S; 892 /* We cast to 32-bit first because of the -32768 case */ 893 L = EXTEND32(X[j]); 894 R = EXTEND32(X[N0+j]); 895 M = ADD32(L, R); 896 S = SUB32(L, R); 897 sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R))); 898 sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S))); 899 } 900 } 901 sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS); 902 thetas = 13; 903 /* We don't need thetas for lower bands with LM<=1 */ 904 if (LM<=1) 905 thetas -= 8; 906 return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) 907 > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); 908 } 909 910 #define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0) 911 static opus_val16 median_of_5(const opus_val16 *x) 912 { 913 opus_val16 t0, t1, t2, t3, t4; 914 t2 = x[2]; 915 if (x[0] > x[1]) 916 { 917 t0 = x[1]; 918 t1 = x[0]; 919 } else { 920 t0 = x[0]; 921 t1 = x[1]; 922 } 923 if (x[3] > x[4]) 924 { 925 t3 = x[4]; 926 t4 = x[3]; 927 } else { 928 t3 = x[3]; 929 t4 = x[4]; 930 } 931 if (t0 > t3) 932 { 933 MSWAP(t0, t3); 934 MSWAP(t1, t4); 935 } 936 if (t2 > t1) 937 { 938 if (t1 < t3) 939 return MIN16(t2, t3); 940 else 941 return MIN16(t4, t1); 942 } else { 943 if (t2 < t3) 944 return MIN16(t1, t3); 945 else 946 return MIN16(t2, t4); 947 } 948 } 949 950 static opus_val16 median_of_3(const opus_val16 *x) 951 { 952 opus_val16 t0, t1, t2; 953 if (x[0] > x[1]) 954 { 955 t0 = x[1]; 956 t1 = x[0]; 957 } else { 958 t0 = x[0]; 959 t1 = x[1]; 960 } 961 t2 = x[2]; 962 if (t1 < t2) 963 return t1; 964 else if (t0 < t2) 965 return t2; 966 else 967 return t0; 968 } 969 970 static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, 971 int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, 972 int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, 973 int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, 974 AnalysisInfo *analysis, int *importance, int *spread_weight) 975 { 976 int i, c; 977 opus_int32 tot_boost=0; 978 opus_val16 maxDepth; 979 VARDECL(opus_val16, follower); 980 VARDECL(opus_val16, noise_floor); 981 SAVE_STACK; 982 ALLOC(follower, C*nbEBands, opus_val16); 983 ALLOC(noise_floor, C*nbEBands, opus_val16); 984 OPUS_CLEAR(offsets, nbEBands); 985 /* Dynamic allocation code */ 986 maxDepth=-QCONST16(31.9f, DB_SHIFT); 987 for (i=0;i<end;i++) 988 { 989 /* Noise floor must take into account eMeans, the depth, the width of the bands 990 and the preemphasis filter (approx. square of bark band ID) */ 991 noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i]) 992 +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6) 993 +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5)); 994 } 995 c=0;do 996 { 997 for (i=0;i<end;i++) 998 maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); 999 } while (++c<C); 1000 { 1001 /* Compute a really simple masking model to avoid taking into account completely masked 1002 bands when computing the spreading decision. */ 1003 VARDECL(opus_val16, mask); 1004 VARDECL(opus_val16, sig); 1005 ALLOC(mask, nbEBands, opus_val16); 1006 ALLOC(sig, nbEBands, opus_val16); 1007 for (i=0;i<end;i++) 1008 mask[i] = bandLogE[i]-noise_floor[i]; 1009 if (C==2) 1010 { 1011 for (i=0;i<end;i++) 1012 mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]); 1013 } 1014 OPUS_COPY(sig, mask, end); 1015 for (i=1;i<end;i++) 1016 mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT)); 1017 for (i=end-2;i>=0;i--) 1018 mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT)); 1019 for (i=0;i<end;i++) 1020 { 1021 /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/ 1022 opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]); 1023 /* Clamp SMR to make sure we're not shifting by something negative or too large. */ 1024 #ifdef FIXED_POINT 1025 /* FIXME: Use PSHR16() instead */ 1026 int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT); 1027 #else 1028 int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr))); 1029 #endif 1030 spread_weight[i] = 32 >> shift; 1031 } 1032 /*for (i=0;i<end;i++) 1033 printf("%d ", spread_weight[i]); 1034 printf("\n");*/ 1035 } 1036 /* Make sure that dynamic allocation can't make us bust the budget */ 1037 if (effectiveBytes > 50 && LM>=1 && !lfe) 1038 { 1039 int last=0; 1040 c=0;do 1041 { 1042 opus_val16 offset; 1043 opus_val16 tmp; 1044 opus_val16 *f; 1045 f = &follower[c*nbEBands]; 1046 f[0] = bandLogE2[c*nbEBands]; 1047 for (i=1;i<end;i++) 1048 { 1049 /* The last band to be at least 3 dB higher than the previous one 1050 is the last we'll consider. Otherwise, we run into problems on 1051 bandlimited signals. */ 1052 if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) 1053 last=i; 1054 f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); 1055 } 1056 for (i=last-1;i>=0;i--) 1057 f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); 1058 1059 /* Combine with a median filter to avoid dynalloc triggering unnecessarily. 1060 The "offset" value controls how conservative we are -- a higher offset 1061 reduces the impact of the median filter and makes dynalloc use more bits. */ 1062 offset = QCONST16(1.f, DB_SHIFT); 1063 for (i=2;i<end-2;i++) 1064 f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset); 1065 tmp = median_of_3(&bandLogE2[c*nbEBands])-offset; 1066 f[0] = MAX16(f[0], tmp); 1067 f[1] = MAX16(f[1], tmp); 1068 tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset; 1069 f[end-2] = MAX16(f[end-2], tmp); 1070 f[end-1] = MAX16(f[end-1], tmp); 1071 1072 for (i=0;i<end;i++) 1073 f[i] = MAX16(f[i], noise_floor[i]); 1074 } while (++c<C); 1075 if (C==2) 1076 { 1077 for (i=start;i<end;i++) 1078 { 1079 /* Consider 24 dB "cross-talk" */ 1080 follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT)); 1081 follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT)); 1082 follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i])); 1083 } 1084 } else { 1085 for (i=start;i<end;i++) 1086 { 1087 follower[i] = MAX16(0, bandLogE[i]-follower[i]); 1088 } 1089 } 1090 for (i=start;i<end;i++) 1091 follower[i] = MAX16(follower[i], surround_dynalloc[i]); 1092 for (i=start;i<end;i++) 1093 { 1094 #ifdef FIXED_POINT 1095 importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16); 1096 #else 1097 importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT)))); 1098 #endif 1099 } 1100 /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ 1101 if ((!vbr || constrained_vbr)&&!isTransient) 1102 { 1103 for (i=start;i<end;i++) 1104 follower[i] = HALF16(follower[i]); 1105 } 1106 for (i=start;i<end;i++) 1107 { 1108 if (i<8) 1109 follower[i] *= 2; 1110 if (i>=12) 1111 follower[i] = HALF16(follower[i]); 1112 } 1113 #ifdef DISABLE_FLOAT_API 1114 (void)analysis; 1115 #else 1116 if (analysis->valid) 1117 { 1118 for (i=start;i<IMIN(LEAK_BANDS, end);i++) 1119 follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i]; 1120 } 1121 #endif 1122 for (i=start;i<end;i++) 1123 { 1124 int width; 1125 int boost; 1126 int boost_bits; 1127 1128 follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); 1129 1130 width = C*(eBands[i+1]-eBands[i])<<LM; 1131 if (width<6) 1132 { 1133 boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT); 1134 boost_bits = boost*width<<BITRES; 1135 } else if (width > 48) { 1136 boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT); 1137 boost_bits = (boost*width<<BITRES)/8; 1138 } else { 1139 boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); 1140 boost_bits = boost*6<<BITRES; 1141 } 1142 /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */ 1143 if ((!vbr || (constrained_vbr&&!isTransient)) 1144 && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3) 1145 { 1146 opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3); 1147 offsets[i] = cap-tot_boost; 1148 tot_boost = cap; 1149 break; 1150 } else { 1151 offsets[i] = boost; 1152 tot_boost += boost_bits; 1153 } 1154 } 1155 } else { 1156 for (i=start;i<end;i++) 1157 importance[i] = 13; 1158 } 1159 *tot_boost_ = tot_boost; 1160 RESTORE_STACK; 1161 return maxDepth; 1162 } 1163 1164 1165 static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N, 1166 int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis) 1167 { 1168 int c; 1169 VARDECL(celt_sig, _pre); 1170 celt_sig *pre[2]; 1171 const CELTMode *mode; 1172 int pitch_index; 1173 opus_val16 gain1; 1174 opus_val16 pf_threshold; 1175 int pf_on; 1176 int qg; 1177 int overlap; 1178 SAVE_STACK; 1179 1180 mode = st->mode; 1181 overlap = mode->overlap; 1182 ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); 1183 1184 pre[0] = _pre; 1185 pre[1] = _pre + (N+COMBFILTER_MAXPERIOD); 1186 1187 1188 c=0; do { 1189 OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); 1190 OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N); 1191 } while (++c<CC); 1192 1193 if (enabled) 1194 { 1195 VARDECL(opus_val16, pitch_buf); 1196 ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); 1197 1198 pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch); 1199 /* Don't search for the fir last 1.5 octave of the range because 1200 there's too many false-positives due to short-term correlation */ 1201 pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, 1202 COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index, 1203 st->arch); 1204 pitch_index = COMBFILTER_MAXPERIOD-pitch_index; 1205 1206 gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, 1207 N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); 1208 if (pitch_index > COMBFILTER_MAXPERIOD-2) 1209 pitch_index = COMBFILTER_MAXPERIOD-2; 1210 gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); 1211 /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ 1212 if (st->loss_rate>2) 1213 gain1 = HALF32(gain1); 1214 if (st->loss_rate>4) 1215 gain1 = HALF32(gain1); 1216 if (st->loss_rate>8) 1217 gain1 = 0; 1218 } else { 1219 gain1 = 0; 1220 pitch_index = COMBFILTER_MINPERIOD; 1221 } 1222 #ifndef DISABLE_FLOAT_API 1223 if (analysis->valid) 1224 gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio); 1225 #else 1226 (void)analysis; 1227 #endif 1228 /* Gain threshold for enabling the prefilter/postfilter */ 1229 pf_threshold = QCONST16(.2f,15); 1230 1231 /* Adjusting the threshold based on rate and continuity */ 1232 if (abs(pitch_index-st->prefilter_period)*10>pitch_index) 1233 pf_threshold += QCONST16(.2f,15); 1234 if (nbAvailableBytes<25) 1235 pf_threshold += QCONST16(.1f,15); 1236 if (nbAvailableBytes<35) 1237 pf_threshold += QCONST16(.1f,15); 1238 if (st->prefilter_gain > QCONST16(.4f,15)) 1239 pf_threshold -= QCONST16(.1f,15); 1240 if (st->prefilter_gain > QCONST16(.55f,15)) 1241 pf_threshold -= QCONST16(.1f,15); 1242 1243 /* Hard threshold at 0.2 */ 1244 pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); 1245 if (gain1<pf_threshold) 1246 { 1247 gain1 = 0; 1248 pf_on = 0; 1249 qg = 0; 1250 } else { 1251 /*This block is not gated by a total bits check only because 1252 of the nbAvailableBytes check above.*/ 1253 if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15)) 1254 gain1=st->prefilter_gain; 1255 1256 #ifdef FIXED_POINT 1257 qg = ((gain1+1536)>>10)/3-1; 1258 #else 1259 qg = (int)floor(.5f+gain1*32/3)-1; 1260 #endif 1261 qg = IMAX(0, IMIN(7, qg)); 1262 gain1 = QCONST16(0.09375f,15)*(qg+1); 1263 pf_on = 1; 1264 } 1265 /*printf("%d %f\n", pitch_index, gain1);*/ 1266 1267 c=0; do { 1268 int offset = mode->shortMdctSize-overlap; 1269 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); 1270 OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); 1271 if (offset) 1272 comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD, 1273 st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, 1274 st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); 1275 1276 comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, 1277 st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, 1278 st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); 1279 OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); 1280 1281 if (N>COMBFILTER_MAXPERIOD) 1282 { 1283 OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); 1284 } else { 1285 OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); 1286 OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); 1287 } 1288 } while (++c<CC); 1289 1290 RESTORE_STACK; 1291 *gain = gain1; 1292 *pitch = pitch_index; 1293 *qgain = qg; 1294 return pf_on; 1295 } 1296 1297 static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target, 1298 int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, 1299 int constrained_vbr, opus_val16 stereo_saving, int tot_boost, 1300 opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, 1301 int lfe, int has_surround_mask, opus_val16 surround_masking, 1302 opus_val16 temporal_vbr) 1303 { 1304 /* The target rate in 8th bits per frame */ 1305 opus_int32 target; 1306 int coded_bins; 1307 int coded_bands; 1308 opus_val16 tf_calibration; 1309 int nbEBands; 1310 const opus_int16 *eBands; 1311 1312 nbEBands = mode->nbEBands; 1313 eBands = mode->eBands; 1314 1315 coded_bands = lastCodedBands ? lastCodedBands : nbEBands; 1316 coded_bins = eBands[coded_bands]<<LM; 1317 if (C==2) 1318 coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM; 1319 1320 target = base_target; 1321 1322 /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ 1323 #ifndef DISABLE_FLOAT_API 1324 if (analysis->valid && analysis->activity<.4) 1325 target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); 1326 #endif 1327 /* Stereo savings */ 1328 if (C==2) 1329 { 1330 int coded_stereo_bands; 1331 int coded_stereo_dof; 1332 opus_val16 max_frac; 1333 coded_stereo_bands = IMIN(intensity, coded_bands); 1334 coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; 1335 /* Maximum fraction of the bits we can save if the signal is mono. */ 1336 max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); 1337 stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8)); 1338 /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ 1339 target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), 1340 SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); 1341 } 1342 /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ 1343 target += tot_boost-(19<<LM); 1344 /* Apply transient boost, compensating for average boost. */ 1345 tf_calibration = QCONST16(0.044f,14); 1346 target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); 1347 1348 #ifndef DISABLE_FLOAT_API 1349 /* Apply tonality boost */ 1350 if (analysis->valid && !lfe) 1351 { 1352 opus_int32 tonal_target; 1353 float tonal; 1354 1355 /* Tonality boost (compensating for the average). */ 1356 tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f; 1357 tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); 1358 if (pitch_change) 1359 tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); 1360 /*printf("%f %f ", analysis->tonality, tonal);*/ 1361 target = tonal_target; 1362 } 1363 #else 1364 (void)analysis; 1365 (void)pitch_change; 1366 #endif 1367 1368 if (has_surround_mask&&!lfe) 1369 { 1370 opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT); 1371 /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ 1372 target = IMAX(target/4, surround_target); 1373 } 1374 1375 { 1376 opus_int32 floor_depth; 1377 int bins; 1378 bins = eBands[nbEBands-2]<<LM; 1379 /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/ 1380 floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT); 1381 floor_depth = IMAX(floor_depth, target>>2); 1382 target = IMIN(target, floor_depth); 1383 /*printf("%f %d\n", maxDepth, floor_depth);*/ 1384 } 1385 1386 /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate 1387 for long. Needs tuning. */ 1388 if ((!has_surround_mask||lfe) && constrained_vbr) 1389 { 1390 target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target); 1391 } 1392 1393 if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) 1394 { 1395 opus_val16 amount; 1396 opus_val16 tvbr_factor; 1397 amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate))); 1398 tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT); 1399 target += (opus_int32)MULT16_32_Q15(tvbr_factor, target); 1400 } 1401 1402 /* Don't allow more than doubling the rate */ 1403 target = IMIN(2*base_target, target); 1404 1405 return target; 1406 } 1407 1408 int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) 1409 { 1410 int i, c, N; 1411 opus_int32 bits; 1412 ec_enc _enc; 1413 VARDECL(celt_sig, in); 1414 VARDECL(celt_sig, freq); 1415 VARDECL(celt_norm, X); 1416 VARDECL(celt_ener, bandE); 1417 VARDECL(opus_val16, bandLogE); 1418 VARDECL(opus_val16, bandLogE2); 1419 VARDECL(int, fine_quant); 1420 VARDECL(opus_val16, error); 1421 VARDECL(int, pulses); 1422 VARDECL(int, cap); 1423 VARDECL(int, offsets); 1424 VARDECL(int, importance); 1425 VARDECL(int, spread_weight); 1426 VARDECL(int, fine_priority); 1427 VARDECL(int, tf_res); 1428 VARDECL(unsigned char, collapse_masks); 1429 celt_sig *prefilter_mem; 1430 opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError; 1431 int shortBlocks=0; 1432 int isTransient=0; 1433 const int CC = st->channels; 1434 const int C = st->stream_channels; 1435 int LM, M; 1436 int tf_select; 1437 int nbFilledBytes, nbAvailableBytes; 1438 int start; 1439 int end; 1440 int effEnd; 1441 int codedBands; 1442 int alloc_trim; 1443 int pitch_index=COMBFILTER_MINPERIOD; 1444 opus_val16 gain1 = 0; 1445 int dual_stereo=0; 1446 int effectiveBytes; 1447 int dynalloc_logp; 1448 opus_int32 vbr_rate; 1449 opus_int32 total_bits; 1450 opus_int32 total_boost; 1451 opus_int32 balance; 1452 opus_int32 tell; 1453 opus_int32 tell0_frac; 1454 int prefilter_tapset=0; 1455 int pf_on; 1456 int anti_collapse_rsv; 1457 int anti_collapse_on=0; 1458 int silence=0; 1459 int tf_chan = 0; 1460 opus_val16 tf_estimate; 1461 int pitch_change=0; 1462 opus_int32 tot_boost; 1463 opus_val32 sample_max; 1464 opus_val16 maxDepth; 1465 const OpusCustomMode *mode; 1466 int nbEBands; 1467 int overlap; 1468 const opus_int16 *eBands; 1469 int secondMdct; 1470 int signalBandwidth; 1471 int transient_got_disabled=0; 1472 opus_val16 surround_masking=0; 1473 opus_val16 temporal_vbr=0; 1474 opus_val16 surround_trim = 0; 1475 opus_int32 equiv_rate; 1476 int hybrid; 1477 int weak_transient = 0; 1478 int enable_tf_analysis; 1479 VARDECL(opus_val16, surround_dynalloc); 1480 ALLOC_STACK; 1481 1482 mode = st->mode; 1483 nbEBands = mode->nbEBands; 1484 overlap = mode->overlap; 1485 eBands = mode->eBands; 1486 start = st->start; 1487 end = st->end; 1488 hybrid = start != 0; 1489 tf_estimate = 0; 1490 if (nbCompressedBytes<2 || pcm==NULL) 1491 { 1492 RESTORE_STACK; 1493 return OPUS_BAD_ARG; 1494 } 1495 1496 frame_size *= st->upsample; 1497 for (LM=0;LM<=mode->maxLM;LM++) 1498 if (mode->shortMdctSize<<LM==frame_size) 1499 break; 1500 if (LM>mode->maxLM) 1501 { 1502 RESTORE_STACK; 1503 return OPUS_BAD_ARG; 1504 } 1505 M=1<<LM; 1506 N = M*mode->shortMdctSize; 1507 1508 prefilter_mem = st->in_mem+CC*(overlap); 1509 oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); 1510 oldLogE = oldBandE + CC*nbEBands; 1511 oldLogE2 = oldLogE + CC*nbEBands; 1512 energyError = oldLogE2 + CC*nbEBands; 1513 1514 if (enc==NULL) 1515 { 1516 tell0_frac=tell=1; 1517 nbFilledBytes=0; 1518 } else { 1519 tell0_frac=ec_tell_frac(enc); 1520 tell=ec_tell(enc); 1521 nbFilledBytes=(tell+4)>>3; 1522 } 1523 1524 #ifdef CUSTOM_MODES 1525 if (st->signalling && enc==NULL) 1526 { 1527 int tmp = (mode->effEBands-end)>>1; 1528 end = st->end = IMAX(1, mode->effEBands-tmp); 1529 compressed[0] = tmp<<5; 1530 compressed[0] |= LM<<3; 1531 compressed[0] |= (C==2)<<2; 1532 /* Convert "standard mode" to Opus header */ 1533 if (mode->Fs==48000 && mode->shortMdctSize==120) 1534 { 1535 int c0 = toOpus(compressed[0]); 1536 if (c0<0) 1537 { 1538 RESTORE_STACK; 1539 return OPUS_BAD_ARG; 1540 } 1541 compressed[0] = c0; 1542 } 1543 compressed++; 1544 nbCompressedBytes--; 1545 } 1546 #else 1547 celt_assert(st->signalling==0); 1548 #endif 1549 1550 /* Can't produce more than 1275 output bytes */ 1551 nbCompressedBytes = IMIN(nbCompressedBytes,1275); 1552 nbAvailableBytes = nbCompressedBytes - nbFilledBytes; 1553 1554 if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) 1555 { 1556 opus_int32 den=mode->Fs>>BITRES; 1557 vbr_rate=(st->bitrate*frame_size+(den>>1))/den; 1558 #ifdef CUSTOM_MODES 1559 if (st->signalling) 1560 vbr_rate -= 8<<BITRES; 1561 #endif 1562 effectiveBytes = vbr_rate>>(3+BITRES); 1563 } else { 1564 opus_int32 tmp; 1565 vbr_rate = 0; 1566 tmp = st->bitrate*frame_size; 1567 if (tell>1) 1568 tmp += tell; 1569 if (st->bitrate!=OPUS_BITRATE_MAX) 1570 nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, 1571 (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); 1572 effectiveBytes = nbCompressedBytes - nbFilledBytes; 1573 } 1574 equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50); 1575 if (st->bitrate != OPUS_BITRATE_MAX) 1576 equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50)); 1577 1578 if (enc==NULL) 1579 { 1580 ec_enc_init(&_enc, compressed, nbCompressedBytes); 1581 enc = &_enc; 1582 } 1583 1584 if (vbr_rate>0) 1585 { 1586 /* Computes the max bit-rate allowed in VBR mode to avoid violating the 1587 target rate and buffering. 1588 We must do this up front so that bust-prevention logic triggers 1589 correctly if we don't have enough bits. */ 1590 if (st->constrained_vbr) 1591 { 1592 opus_int32 vbr_bound; 1593 opus_int32 max_allowed; 1594 /* We could use any multiple of vbr_rate as bound (depending on the 1595 delay). 1596 This is clamped to ensure we use at least two bytes if the encoder 1597 was entirely empty, but to allow 0 in hybrid mode. */ 1598 vbr_bound = vbr_rate; 1599 max_allowed = IMIN(IMAX(tell==1?2:0, 1600 (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), 1601 nbAvailableBytes); 1602 if(max_allowed < nbAvailableBytes) 1603 { 1604 nbCompressedBytes = nbFilledBytes+max_allowed; 1605 nbAvailableBytes = max_allowed; 1606 ec_enc_shrink(enc, nbCompressedBytes); 1607 } 1608 } 1609 } 1610 total_bits = nbCompressedBytes*8; 1611 1612 effEnd = end; 1613 if (effEnd > mode->effEBands) 1614 effEnd = mode->effEBands; 1615 1616 ALLOC(in, CC*(N+overlap), celt_sig); 1617 1618 sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); 1619 st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); 1620 sample_max=MAX32(sample_max, st->overlap_max); 1621 #ifdef FIXED_POINT 1622 silence = (sample_max==0); 1623 #else 1624 silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth)); 1625 #endif 1626 #ifdef FUZZING 1627 if ((rand()&0x3F)==0) 1628 silence = 1; 1629 #endif 1630 if (tell==1) 1631 ec_enc_bit_logp(enc, silence, 15); 1632 else 1633 silence=0; 1634 if (silence) 1635 { 1636 /*In VBR mode there is no need to send more than the minimum. */ 1637 if (vbr_rate>0) 1638 { 1639 effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); 1640 total_bits=nbCompressedBytes*8; 1641 nbAvailableBytes=2; 1642 ec_enc_shrink(enc, nbCompressedBytes); 1643 } 1644 /* Pretend we've filled all the remaining bits with zeros 1645 (that's what the initialiser did anyway) */ 1646 tell = nbCompressedBytes*8; 1647 enc->nbits_total+=tell-ec_tell(enc); 1648 } 1649 c=0; do { 1650 int need_clip=0; 1651 #ifndef FIXED_POINT 1652 need_clip = st->clip && sample_max>65536.f; 1653 #endif 1654 celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, 1655 mode->preemph, st->preemph_memE+c, need_clip); 1656 } while (++c<CC); 1657 1658 1659 1660 /* Find pitch period and gain */ 1661 { 1662 int enabled; 1663 int qg; 1664 enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf 1665 && st->complexity >= 5; 1666 1667 prefilter_tapset = st->tapset_decision; 1668 pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis); 1669 if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) 1670 && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) 1671 pitch_change = 1; 1672 if (pf_on==0) 1673 { 1674 if(!hybrid && tell+16<=total_bits) 1675 ec_enc_bit_logp(enc, 0, 1); 1676 } else { 1677 /*This block is not gated by a total bits check only because 1678 of the nbAvailableBytes check above.*/ 1679 int octave; 1680 ec_enc_bit_logp(enc, 1, 1); 1681 pitch_index += 1; 1682 octave = EC_ILOG(pitch_index)-5; 1683 ec_enc_uint(enc, octave, 6); 1684 ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave); 1685 pitch_index -= 1; 1686 ec_enc_bits(enc, qg, 3); 1687 ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2); 1688 } 1689 } 1690 1691 isTransient = 0; 1692 shortBlocks = 0; 1693 if (st->complexity >= 1 && !st->lfe) 1694 { 1695 /* Reduces the likelihood of energy instability on fricatives at low bitrate 1696 in hybrid mode. It seems like we still want to have real transients on vowels 1697 though (small SILK quantization offset value). */ 1698 int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2; 1699 isTransient = transient_analysis(in, N+overlap, CC, 1700 &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient); 1701 } 1702 if (LM>0 && ec_tell(enc)+3<=total_bits) 1703 { 1704 if (isTransient) 1705 shortBlocks = M; 1706 } else { 1707 isTransient = 0; 1708 transient_got_disabled=1; 1709 } 1710 1711 ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ 1712 ALLOC(bandE,nbEBands*CC, celt_ener); 1713 ALLOC(bandLogE,nbEBands*CC, opus_val16); 1714 1715 secondMdct = shortBlocks && st->complexity>=8; 1716 ALLOC(bandLogE2, C*nbEBands, opus_val16); 1717 if (secondMdct) 1718 { 1719 compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); 1720 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 1721 amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); 1722 for (i=0;i<C*nbEBands;i++) 1723 bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); 1724 } 1725 1726 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); 1727 /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered 1728 at the Opus layer), just abort. */ 1729 celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N]))); 1730 if (CC==2&&C==1) 1731 tf_chan = 0; 1732 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 1733 1734 if (st->lfe) 1735 { 1736 for (i=2;i<end;i++) 1737 { 1738 bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); 1739 bandE[i] = MAX32(bandE[i], EPSILON); 1740 } 1741 } 1742 amp2Log2(mode, effEnd, end, bandE, bandLogE, C); 1743 1744 ALLOC(surround_dynalloc, C*nbEBands, opus_val16); 1745 OPUS_CLEAR(surround_dynalloc, end); 1746 /* This computes how much masking takes place between surround channels */ 1747 if (!hybrid&&st->energy_mask&&!st->lfe) 1748 { 1749 int mask_end; 1750 int midband; 1751 int count_dynalloc; 1752 opus_val32 mask_avg=0; 1753 opus_val32 diff=0; 1754 int count=0; 1755 mask_end = IMAX(2,st->lastCodedBands); 1756 for (c=0;c<C;c++) 1757 { 1758 for(i=0;i<mask_end;i++) 1759 { 1760 opus_val16 mask; 1761 mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], 1762 QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); 1763 if (mask > 0) 1764 mask = HALF16(mask); 1765 mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); 1766 count += eBands[i+1]-eBands[i]; 1767 diff += MULT16_16(mask, 1+2*i-mask_end); 1768 } 1769 } 1770 celt_assert(count>0); 1771 mask_avg = DIV32_16(mask_avg,count); 1772 mask_avg += QCONST16(.2f, DB_SHIFT); 1773 diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); 1774 /* Again, being conservative */ 1775 diff = HALF32(diff); 1776 diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); 1777 /* Find the band that's in the middle of the coded spectrum */ 1778 for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); 1779 count_dynalloc=0; 1780 for(i=0;i<mask_end;i++) 1781 { 1782 opus_val32 lin; 1783 opus_val16 unmask; 1784 lin = mask_avg + diff*(i-midband); 1785 if (C==2) 1786 unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); 1787 else 1788 unmask = st->energy_mask[i]; 1789 unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); 1790 unmask -= lin; 1791 if (unmask > QCONST16(.25f, DB_SHIFT)) 1792 { 1793 surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); 1794 count_dynalloc++; 1795 } 1796 } 1797 if (count_dynalloc>=3) 1798 { 1799 /* If we need dynalloc in many bands, it's probably because our 1800 initial masking rate was too low. */ 1801 mask_avg += QCONST16(.25f, DB_SHIFT); 1802 if (mask_avg>0) 1803 { 1804 /* Something went really wrong in the original calculations, 1805 disabling masking. */ 1806 mask_avg = 0; 1807 diff = 0; 1808 OPUS_CLEAR(surround_dynalloc, mask_end); 1809 } else { 1810 for(i=0;i<mask_end;i++) 1811 surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); 1812 } 1813 } 1814 mask_avg += QCONST16(.2f, DB_SHIFT); 1815 /* Convert to 1/64th units used for the trim */ 1816 surround_trim = 64*diff; 1817 /*printf("%d %d ", mask_avg, surround_trim);*/ 1818 surround_masking = mask_avg; 1819 } 1820 /* Temporal VBR (but not for LFE) */ 1821 if (!st->lfe) 1822 { 1823 opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); 1824 opus_val32 frame_avg=0; 1825 opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; 1826 for(i=start;i<end;i++) 1827 { 1828 follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset); 1829 if (C==2) 1830 follow = MAX16(follow, bandLogE[i+nbEBands]-offset); 1831 frame_avg += follow; 1832 } 1833 frame_avg /= (end-start); 1834 temporal_vbr = SUB16(frame_avg,st->spec_avg); 1835 temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); 1836 st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); 1837 } 1838 /*for (i=0;i<21;i++) 1839 printf("%f ", bandLogE[i]); 1840 printf("\n");*/ 1841 1842 if (!secondMdct) 1843 { 1844 OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); 1845 } 1846 1847 /* Last chance to catch any transient we might have missed in the 1848 time-domain analysis */ 1849 if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid) 1850 { 1851 if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) 1852 { 1853 isTransient = 1; 1854 shortBlocks = M; 1855 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); 1856 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 1857 amp2Log2(mode, effEnd, end, bandE, bandLogE, C); 1858 /* Compensate for the scaling of short vs long mdcts */ 1859 for (i=0;i<C*nbEBands;i++) 1860 bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); 1861 tf_estimate = QCONST16(.2f,14); 1862 } 1863 } 1864 1865 if (LM>0 && ec_tell(enc)+3<=total_bits) 1866 ec_enc_bit_logp(enc, isTransient, 3); 1867 1868 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 1869 1870 /* Band normalisation */ 1871 normalise_bands(mode, freq, X, bandE, effEnd, C, M); 1872 1873 enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe; 1874 1875 ALLOC(offsets, nbEBands, int); 1876 ALLOC(importance, nbEBands, int); 1877 ALLOC(spread_weight, nbEBands, int); 1878 1879 maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, 1880 st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, 1881 eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight); 1882 1883 ALLOC(tf_res, nbEBands, int); 1884 /* Disable variable tf resolution for hybrid and at very low bitrate */ 1885 if (enable_tf_analysis) 1886 { 1887 int lambda; 1888 lambda = IMAX(80, 20480/effectiveBytes + 2); 1889 tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance); 1890 for (i=effEnd;i<end;i++) 1891 tf_res[i] = tf_res[effEnd-1]; 1892 } else if (hybrid && weak_transient) 1893 { 1894 /* For weak transients, we rely on the fact that improving time resolution using 1895 TF on a long window is imperfect and will not result in an energy collapse at 1896 low bitrate. */ 1897 for (i=0;i<end;i++) 1898 tf_res[i] = 1; 1899 tf_select=0; 1900 } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2) 1901 { 1902 /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */ 1903 for (i=0;i<end;i++) 1904 tf_res[i] = 0; 1905 tf_select=isTransient; 1906 } else { 1907 for (i=0;i<end;i++) 1908 tf_res[i] = isTransient; 1909 tf_select=0; 1910 } 1911 1912 ALLOC(error, C*nbEBands, opus_val16); 1913 c=0; 1914 do { 1915 for (i=start;i<end;i++) 1916 { 1917 /* When the energy is stable, slightly bias energy quantization towards 1918 the previous error to make the gain more stable (a constant offset is 1919 better than fluctuations). */ 1920 if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT)) 1921 { 1922 bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15)); 1923 } 1924 } 1925 } while (++c < C); 1926 quant_coarse_energy(mode, start, end, effEnd, bandLogE, 1927 oldBandE, total_bits, error, enc, 1928 C, LM, nbAvailableBytes, st->force_intra, 1929 &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); 1930 1931 tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); 1932 1933 if (ec_tell(enc)+4<=total_bits) 1934 { 1935 if (st->lfe) 1936 { 1937 st->tapset_decision = 0; 1938 st->spread_decision = SPREAD_NORMAL; 1939 } else if (hybrid) 1940 { 1941 if (st->complexity == 0) 1942 st->spread_decision = SPREAD_NONE; 1943 else if (isTransient) 1944 st->spread_decision = SPREAD_NORMAL; 1945 else 1946 st->spread_decision = SPREAD_AGGRESSIVE; 1947 } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) 1948 { 1949 if (st->complexity == 0) 1950 st->spread_decision = SPREAD_NONE; 1951 else 1952 st->spread_decision = SPREAD_NORMAL; 1953 } else { 1954 /* Disable new spreading+tapset estimator until we can show it works 1955 better than the old one. So far it seems like spreading_decision() 1956 works best. */ 1957 #if 0 1958 if (st->analysis.valid) 1959 { 1960 static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; 1961 static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; 1962 static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; 1963 static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; 1964 st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); 1965 st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); 1966 } else 1967 #endif 1968 { 1969 st->spread_decision = spreading_decision(mode, X, 1970 &st->tonal_average, st->spread_decision, &st->hf_average, 1971 &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight); 1972 } 1973 /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ 1974 /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ 1975 } 1976 ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); 1977 } 1978 1979 /* For LFE, everything interesting is in the first band */ 1980 if (st->lfe) 1981 offsets[0] = IMIN(8, effectiveBytes/3); 1982 ALLOC(cap, nbEBands, int); 1983 init_caps(mode,cap,LM,C); 1984 1985 dynalloc_logp = 6; 1986 total_bits<<=BITRES; 1987 total_boost = 0; 1988 tell = ec_tell_frac(enc); 1989 for (i=start;i<end;i++) 1990 { 1991 int width, quanta; 1992 int dynalloc_loop_logp; 1993 int boost; 1994 int j; 1995 width = C*(eBands[i+1]-eBands[i])<<LM; 1996 /* quanta is 6 bits, but no more than 1 bit/sample 1997 and no less than 1/8 bit/sample */ 1998 quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); 1999 dynalloc_loop_logp = dynalloc_logp; 2000 boost = 0; 2001 for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost 2002 && boost < cap[i]; j++) 2003 { 2004 int flag; 2005 flag = j<offsets[i]; 2006 ec_enc_bit_logp(enc, flag, dynalloc_loop_logp); 2007 tell = ec_tell_frac(enc); 2008 if (!flag) 2009 break; 2010 boost += quanta; 2011 total_boost += quanta; 2012 dynalloc_loop_logp = 1; 2013 } 2014 /* Making dynalloc more likely */ 2015 if (j) 2016 dynalloc_logp = IMAX(2, dynalloc_logp-1); 2017 offsets[i] = boost; 2018 } 2019 2020 if (C==2) 2021 { 2022 static const opus_val16 intensity_thresholds[21]= 2023 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ 2024 { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134}; 2025 static const opus_val16 intensity_histeresis[21]= 2026 { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8}; 2027 2028 /* Always use MS for 2.5 ms frames until we can do a better analysis */ 2029 if (LM!=0) 2030 dual_stereo = stereo_analysis(mode, X, LM, N); 2031 2032 st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000), 2033 intensity_thresholds, intensity_histeresis, 21, st->intensity); 2034 st->intensity = IMIN(end,IMAX(start, st->intensity)); 2035 } 2036 2037 alloc_trim = 5; 2038 if (tell+(6<<BITRES) <= total_bits - total_boost) 2039 { 2040 if (start > 0 || st->lfe) 2041 { 2042 st->stereo_saving = 0; 2043 alloc_trim = 5; 2044 } else { 2045 alloc_trim = alloc_trim_analysis(mode, X, bandLogE, 2046 end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, 2047 st->intensity, surround_trim, equiv_rate, st->arch); 2048 } 2049 ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); 2050 tell = ec_tell_frac(enc); 2051 } 2052 2053 /* Variable bitrate */ 2054 if (vbr_rate>0) 2055 { 2056 opus_val16 alpha; 2057 opus_int32 delta; 2058 /* The target rate in 8th bits per frame */ 2059 opus_int32 target, base_target; 2060 opus_int32 min_allowed; 2061 int lm_diff = mode->maxLM - LM; 2062 2063 /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. 2064 The CELT allocator will just not be able to use more than that anyway. */ 2065 nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); 2066 if (!hybrid) 2067 { 2068 base_target = vbr_rate - ((40*C+20)<<BITRES); 2069 } else { 2070 base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES)); 2071 } 2072 2073 if (st->constrained_vbr) 2074 base_target += (st->vbr_offset>>lm_diff); 2075 2076 if (!hybrid) 2077 { 2078 target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, 2079 st->lastCodedBands, C, st->intensity, st->constrained_vbr, 2080 st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, 2081 st->lfe, st->energy_mask!=NULL, surround_masking, 2082 temporal_vbr); 2083 } else { 2084 target = base_target; 2085 /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */ 2086 if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM); 2087 if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM); 2088 /* Boosting bitrate on transients and vowels with significant temporal 2089 spikes. */ 2090 target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES)); 2091 /* If we have a strong transient, let's make sure it has enough bits to code 2092 the first two bands, so that it can use folding rather than noise. */ 2093 if (tf_estimate > QCONST16(.7f,14)) 2094 target = IMAX(target, 50<<BITRES); 2095 } 2096 /* The current offset is removed from the target and the space used 2097 so far is added*/ 2098 target=target+tell; 2099 /* In VBR mode the frame size must not be reduced so much that it would 2100 result in the encoder running out of bits. 2101 The margin of 2 bytes ensures that none of the bust-prevention logic 2102 in the decoder will have triggered so far. */ 2103 min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2; 2104 /* Take into account the 37 bits we need to have left in the packet to 2105 signal a redundant frame in hybrid mode. Creating a shorter packet would 2106 create an entropy coder desync. */ 2107 if (hybrid) 2108 min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)); 2109 2110 nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); 2111 nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); 2112 nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes); 2113 2114 /* By how much did we "miss" the target on that frame */ 2115 delta = target - vbr_rate; 2116 2117 target=nbAvailableBytes<<(BITRES+3); 2118 2119 /*If the frame is silent we don't adjust our drift, otherwise 2120 the encoder will shoot to very high rates after hitting a 2121 span of silence, but we do allow the bitres to refill. 2122 This means that we'll undershoot our target in CVBR/VBR modes 2123 on files with lots of silence. */ 2124 if(silence) 2125 { 2126 nbAvailableBytes = 2; 2127 target = 2*8<<BITRES; 2128 delta = 0; 2129 } 2130 2131 if (st->vbr_count < 970) 2132 { 2133 st->vbr_count++; 2134 alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); 2135 } else 2136 alpha = QCONST16(.001f,15); 2137 /* How many bits have we used in excess of what we're allowed */ 2138 if (st->constrained_vbr) 2139 st->vbr_reservoir += target - vbr_rate; 2140 /*printf ("%d\n", st->vbr_reservoir);*/ 2141 2142 /* Compute the offset we need to apply in order to reach the target */ 2143 if (st->constrained_vbr) 2144 { 2145 st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); 2146 st->vbr_offset = -st->vbr_drift; 2147 } 2148 /*printf ("%d\n", st->vbr_drift);*/ 2149 2150 if (st->constrained_vbr && st->vbr_reservoir < 0) 2151 { 2152 /* We're under the min value -- increase rate */ 2153 int adjust = (-st->vbr_reservoir)/(8<<BITRES); 2154 /* Unless we're just coding silence */ 2155 nbAvailableBytes += silence?0:adjust; 2156 st->vbr_reservoir = 0; 2157 /*printf ("+%d\n", adjust);*/ 2158 } 2159 nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes); 2160 /*printf("%d\n", nbCompressedBytes*50*8);*/ 2161 /* This moves the raw bits to take into account the new compressed size */ 2162 ec_enc_shrink(enc, nbCompressedBytes); 2163 } 2164 2165 /* Bit allocation */ 2166 ALLOC(fine_quant, nbEBands, int); 2167 ALLOC(pulses, nbEBands, int); 2168 ALLOC(fine_priority, nbEBands, int); 2169 2170 /* bits = packet size - where we are - safety*/ 2171 bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; 2172 anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; 2173 bits -= anti_collapse_rsv; 2174 signalBandwidth = end-1; 2175 #ifndef DISABLE_FLOAT_API 2176 if (st->analysis.valid) 2177 { 2178 int min_bandwidth; 2179 if (equiv_rate < (opus_int32)32000*C) 2180 min_bandwidth = 13; 2181 else if (equiv_rate < (opus_int32)48000*C) 2182 min_bandwidth = 16; 2183 else if (equiv_rate < (opus_int32)60000*C) 2184 min_bandwidth = 18; 2185 else if (equiv_rate < (opus_int32)80000*C) 2186 min_bandwidth = 19; 2187 else 2188 min_bandwidth = 20; 2189 signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); 2190 } 2191 #endif 2192 if (st->lfe) 2193 signalBandwidth = 1; 2194 codedBands = clt_compute_allocation(mode, start, end, offsets, cap, 2195 alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, 2196 fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); 2197 if (st->lastCodedBands) 2198 st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); 2199 else 2200 st->lastCodedBands = codedBands; 2201 2202 quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C); 2203 2204 /* Residual quantisation */ 2205 ALLOC(collapse_masks, C*nbEBands, unsigned char); 2206 quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, 2207 bandE, pulses, shortBlocks, st->spread_decision, 2208 dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, 2209 balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv); 2210 2211 if (anti_collapse_rsv > 0) 2212 { 2213 anti_collapse_on = st->consec_transient<2; 2214 #ifdef FUZZING 2215 anti_collapse_on = rand()&0x1; 2216 #endif 2217 ec_enc_bits(enc, anti_collapse_on, 1); 2218 } 2219 quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); 2220 OPUS_CLEAR(energyError, nbEBands*CC); 2221 c=0; 2222 do { 2223 for (i=start;i<end;i++) 2224 { 2225 energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands])); 2226 } 2227 } while (++c < C); 2228 2229 if (silence) 2230 { 2231 for (i=0;i<C*nbEBands;i++) 2232 oldBandE[i] = -QCONST16(28.f,DB_SHIFT); 2233 } 2234 2235 #ifdef RESYNTH 2236 /* Re-synthesis of the coded audio if required */ 2237 { 2238 celt_sig *out_mem[2]; 2239 2240 if (anti_collapse_on) 2241 { 2242 anti_collapse(mode, X, collapse_masks, LM, C, N, 2243 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 2244 } 2245 2246 c=0; do { 2247 OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); 2248 } while (++c<CC); 2249 2250 c=0; do { 2251 out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N; 2252 } while (++c<CC); 2253 2254 celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd, 2255 C, CC, isTransient, LM, st->upsample, silence, st->arch); 2256 2257 c=0; do { 2258 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); 2259 st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); 2260 comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, 2261 st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, 2262 mode->window, overlap); 2263 if (LM!=0) 2264 comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, 2265 st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, 2266 mode->window, overlap); 2267 } while (++c<CC); 2268 2269 /* We reuse freq[] as scratch space for the de-emphasis */ 2270 deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD); 2271 st->prefilter_period_old = st->prefilter_period; 2272 st->prefilter_gain_old = st->prefilter_gain; 2273 st->prefilter_tapset_old = st->prefilter_tapset; 2274 } 2275 #endif 2276 2277 st->prefilter_period = pitch_index; 2278 st->prefilter_gain = gain1; 2279 st->prefilter_tapset = prefilter_tapset; 2280 #ifdef RESYNTH 2281 if (LM!=0) 2282 { 2283 st->prefilter_period_old = st->prefilter_period; 2284 st->prefilter_gain_old = st->prefilter_gain; 2285 st->prefilter_tapset_old = st->prefilter_tapset; 2286 } 2287 #endif 2288 2289 if (CC==2&&C==1) { 2290 OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); 2291 } 2292 2293 if (!isTransient) 2294 { 2295 OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); 2296 OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); 2297 } else { 2298 for (i=0;i<CC*nbEBands;i++) 2299 oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); 2300 } 2301 /* In case start or end were to change */ 2302 c=0; do 2303 { 2304 for (i=0;i<start;i++) 2305 { 2306 oldBandE[c*nbEBands+i]=0; 2307 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 2308 } 2309 for (i=end;i<nbEBands;i++) 2310 { 2311 oldBandE[c*nbEBands+i]=0; 2312 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 2313 } 2314 } while (++c<CC); 2315 2316 if (isTransient || transient_got_disabled) 2317 st->consec_transient++; 2318 else 2319 st->consec_transient=0; 2320 st->rng = enc->rng; 2321 2322 /* If there's any room left (can only happen for very high rates), 2323 it's already filled with zeros */ 2324 ec_enc_done(enc); 2325 2326 #ifdef CUSTOM_MODES 2327 if (st->signalling) 2328 nbCompressedBytes++; 2329 #endif 2330 2331 RESTORE_STACK; 2332 if (ec_get_error(enc)) 2333 return OPUS_INTERNAL_ERROR; 2334 else 2335 return nbCompressedBytes; 2336 } 2337 2338 2339 #ifdef CUSTOM_MODES 2340 2341 #ifdef FIXED_POINT 2342 int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2343 { 2344 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); 2345 } 2346 2347 #ifndef DISABLE_FLOAT_API 2348 int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2349 { 2350 int j, ret, C, N; 2351 VARDECL(opus_int16, in); 2352 ALLOC_STACK; 2353 2354 if (pcm==NULL) 2355 return OPUS_BAD_ARG; 2356 2357 C = st->channels; 2358 N = frame_size; 2359 ALLOC(in, C*N, opus_int16); 2360 2361 for (j=0;j<C*N;j++) 2362 in[j] = FLOAT2INT16(pcm[j]); 2363 2364 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); 2365 #ifdef RESYNTH 2366 for (j=0;j<C*N;j++) 2367 ((float*)pcm)[j]=in[j]*(1.f/32768.f); 2368 #endif 2369 RESTORE_STACK; 2370 return ret; 2371 } 2372 #endif /* DISABLE_FLOAT_API */ 2373 #else 2374 2375 int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2376 { 2377 int j, ret, C, N; 2378 VARDECL(celt_sig, in); 2379 ALLOC_STACK; 2380 2381 if (pcm==NULL) 2382 return OPUS_BAD_ARG; 2383 2384 C=st->channels; 2385 N=frame_size; 2386 ALLOC(in, C*N, celt_sig); 2387 for (j=0;j<C*N;j++) { 2388 in[j] = SCALEOUT(pcm[j]); 2389 } 2390 2391 ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); 2392 #ifdef RESYNTH 2393 for (j=0;j<C*N;j++) 2394 ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]); 2395 #endif 2396 RESTORE_STACK; 2397 return ret; 2398 } 2399 2400 int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2401 { 2402 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); 2403 } 2404 2405 #endif 2406 2407 #endif /* CUSTOM_MODES */ 2408 2409 int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) 2410 { 2411 va_list ap; 2412 2413 va_start(ap, request); 2414 switch (request) 2415 { 2416 case OPUS_SET_COMPLEXITY_REQUEST: 2417 { 2418 int value = va_arg(ap, opus_int32); 2419 if (value<0 || value>10) 2420 goto bad_arg; 2421 st->complexity = value; 2422 } 2423 break; 2424 case CELT_SET_START_BAND_REQUEST: 2425 { 2426 opus_int32 value = va_arg(ap, opus_int32); 2427 if (value<0 || value>=st->mode->nbEBands) 2428 goto bad_arg; 2429 st->start = value; 2430 } 2431 break; 2432 case CELT_SET_END_BAND_REQUEST: 2433 { 2434 opus_int32 value = va_arg(ap, opus_int32); 2435 if (value<1 || value>st->mode->nbEBands) 2436 goto bad_arg; 2437 st->end = value; 2438 } 2439 break; 2440 case CELT_SET_PREDICTION_REQUEST: 2441 { 2442 int value = va_arg(ap, opus_int32); 2443 if (value<0 || value>2) 2444 goto bad_arg; 2445 st->disable_pf = value<=1; 2446 st->force_intra = value==0; 2447 } 2448 break; 2449 case OPUS_SET_PACKET_LOSS_PERC_REQUEST: 2450 { 2451 int value = va_arg(ap, opus_int32); 2452 if (value<0 || value>100) 2453 goto bad_arg; 2454 st->loss_rate = value; 2455 } 2456 break; 2457 case OPUS_SET_VBR_CONSTRAINT_REQUEST: 2458 { 2459 opus_int32 value = va_arg(ap, opus_int32); 2460 st->constrained_vbr = value; 2461 } 2462 break; 2463 case OPUS_SET_VBR_REQUEST: 2464 { 2465 opus_int32 value = va_arg(ap, opus_int32); 2466 st->vbr = value; 2467 } 2468 break; 2469 case OPUS_SET_BITRATE_REQUEST: 2470 { 2471 opus_int32 value = va_arg(ap, opus_int32); 2472 if (value<=500 && value!=OPUS_BITRATE_MAX) 2473 goto bad_arg; 2474 value = IMIN(value, 260000*st->channels); 2475 st->bitrate = value; 2476 } 2477 break; 2478 case CELT_SET_CHANNELS_REQUEST: 2479 { 2480 opus_int32 value = va_arg(ap, opus_int32); 2481 if (value<1 || value>2) 2482 goto bad_arg; 2483 st->stream_channels = value; 2484 } 2485 break; 2486 case OPUS_SET_LSB_DEPTH_REQUEST: 2487 { 2488 opus_int32 value = va_arg(ap, opus_int32); 2489 if (value<8 || value>24) 2490 goto bad_arg; 2491 st->lsb_depth=value; 2492 } 2493 break; 2494 case OPUS_GET_LSB_DEPTH_REQUEST: 2495 { 2496 opus_int32 *value = va_arg(ap, opus_int32*); 2497 *value=st->lsb_depth; 2498 } 2499 break; 2500 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: 2501 { 2502 opus_int32 value = va_arg(ap, opus_int32); 2503 if(value<0 || value>1) 2504 { 2505 goto bad_arg; 2506 } 2507 st->disable_inv = value; 2508 } 2509 break; 2510 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: 2511 { 2512 opus_int32 *value = va_arg(ap, opus_int32*); 2513 if (!value) 2514 { 2515 goto bad_arg; 2516 } 2517 *value = st->disable_inv; 2518 } 2519 break; 2520 case OPUS_RESET_STATE: 2521 { 2522 int i; 2523 opus_val16 *oldBandE, *oldLogE, *oldLogE2; 2524 oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD)); 2525 oldLogE = oldBandE + st->channels*st->mode->nbEBands; 2526 oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; 2527 OPUS_CLEAR((char*)&st->ENCODER_RESET_START, 2528 opus_custom_encoder_get_size(st->mode, st->channels)- 2529 ((char*)&st->ENCODER_RESET_START - (char*)st)); 2530 for (i=0;i<st->channels*st->mode->nbEBands;i++) 2531 oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); 2532 st->vbr_offset = 0; 2533 st->delayedIntra = 1; 2534 st->spread_decision = SPREAD_NORMAL; 2535 st->tonal_average = 256; 2536 st->hf_average = 0; 2537 st->tapset_decision = 0; 2538 } 2539 break; 2540 #ifdef CUSTOM_MODES 2541 case CELT_SET_INPUT_CLIPPING_REQUEST: 2542 { 2543 opus_int32 value = va_arg(ap, opus_int32); 2544 st->clip = value; 2545 } 2546 break; 2547 #endif 2548 case CELT_SET_SIGNALLING_REQUEST: 2549 { 2550 opus_int32 value = va_arg(ap, opus_int32); 2551 st->signalling = value; 2552 } 2553 break; 2554 case CELT_SET_ANALYSIS_REQUEST: 2555 { 2556 AnalysisInfo *info = va_arg(ap, AnalysisInfo *); 2557 if (info) 2558 OPUS_COPY(&st->analysis, info, 1); 2559 } 2560 break; 2561 case CELT_SET_SILK_INFO_REQUEST: 2562 { 2563 SILKInfo *info = va_arg(ap, SILKInfo *); 2564 if (info) 2565 OPUS_COPY(&st->silk_info, info, 1); 2566 } 2567 break; 2568 case CELT_GET_MODE_REQUEST: 2569 { 2570 const CELTMode ** value = va_arg(ap, const CELTMode**); 2571 if (value==0) 2572 goto bad_arg; 2573 *value=st->mode; 2574 } 2575 break; 2576 case OPUS_GET_FINAL_RANGE_REQUEST: 2577 { 2578 opus_uint32 * value = va_arg(ap, opus_uint32 *); 2579 if (value==0) 2580 goto bad_arg; 2581 *value=st->rng; 2582 } 2583 break; 2584 case OPUS_SET_LFE_REQUEST: 2585 { 2586 opus_int32 value = va_arg(ap, opus_int32); 2587 st->lfe = value; 2588 } 2589 break; 2590 case OPUS_SET_ENERGY_MASK_REQUEST: 2591 { 2592 opus_val16 *value = va_arg(ap, opus_val16*); 2593 st->energy_mask = value; 2594 } 2595 break; 2596 default: 2597 goto bad_request; 2598 } 2599 va_end(ap); 2600 return OPUS_OK; 2601 bad_arg: 2602 va_end(ap); 2603 return OPUS_BAD_ARG; 2604 bad_request: 2605 va_end(ap); 2606 return OPUS_UNIMPLEMENTED; 2607 } 2608