Home | History | Annotate | Download | only in celt
      1 /* Copyright (c) 2007-2008 CSIRO
      2    Copyright (c) 2007-2009 Xiph.Org Foundation
      3    Copyright (c) 2008-2009 Gregory Maxwell
      4    Written by Jean-Marc Valin and Gregory Maxwell */
      5 /*
      6    Redistribution and use in source and binary forms, with or without
      7    modification, are permitted provided that the following conditions
      8    are met:
      9 
     10    - Redistributions of source code must retain the above copyright
     11    notice, this list of conditions and the following disclaimer.
     12 
     13    - Redistributions in binary form must reproduce the above copyright
     14    notice, this list of conditions and the following disclaimer in the
     15    documentation and/or other materials provided with the distribution.
     16 
     17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     21    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     22    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     23    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 */
     29 
     30 #ifdef HAVE_CONFIG_H
     31 #include "config.h"
     32 #endif
     33 
     34 #include <math.h>
     35 #include "bands.h"
     36 #include "modes.h"
     37 #include "vq.h"
     38 #include "cwrs.h"
     39 #include "stack_alloc.h"
     40 #include "os_support.h"
     41 #include "mathops.h"
     42 #include "rate.h"
     43 #include "quant_bands.h"
     44 #include "pitch.h"
     45 
     46 int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
     47 {
     48    int i;
     49    for (i=0;i<N;i++)
     50    {
     51       if (val < thresholds[i])
     52          break;
     53    }
     54    if (i>prev && val < thresholds[prev]+hysteresis[prev])
     55       i=prev;
     56    if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
     57       i=prev;
     58    return i;
     59 }
     60 
     61 opus_uint32 celt_lcg_rand(opus_uint32 seed)
     62 {
     63    return 1664525 * seed + 1013904223;
     64 }
     65 
     66 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
     67    with this approximation is important because it has an impact on the bit allocation */
     68 static opus_int16 bitexact_cos(opus_int16 x)
     69 {
     70    opus_int32 tmp;
     71    opus_int16 x2;
     72    tmp = (4096+((opus_int32)(x)*(x)))>>13;
     73    celt_assert(tmp<=32767);
     74    x2 = tmp;
     75    x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
     76    celt_assert(x2<=32766);
     77    return 1+x2;
     78 }
     79 
     80 static int bitexact_log2tan(int isin,int icos)
     81 {
     82    int lc;
     83    int ls;
     84    lc=EC_ILOG(icos);
     85    ls=EC_ILOG(isin);
     86    icos<<=15-lc;
     87    isin<<=15-ls;
     88    return (ls-lc)*(1<<11)
     89          +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
     90          -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
     91 }
     92 
     93 #ifdef FIXED_POINT
     94 /* Compute the amplitude (sqrt energy) in each of the bands */
     95 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
     96 {
     97    int i, c, N;
     98    const opus_int16 *eBands = m->eBands;
     99    N = m->shortMdctSize<<LM;
    100    c=0; do {
    101       for (i=0;i<end;i++)
    102       {
    103          int j;
    104          opus_val32 maxval=0;
    105          opus_val32 sum = 0;
    106 
    107          maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
    108          if (maxval > 0)
    109          {
    110             int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
    111             j=eBands[i]<<LM;
    112             if (shift>0)
    113             {
    114                do {
    115                   sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
    116                         EXTRACT16(SHR32(X[j+c*N],shift)));
    117                } while (++j<eBands[i+1]<<LM);
    118             } else {
    119                do {
    120                   sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
    121                         EXTRACT16(SHL32(X[j+c*N],-shift)));
    122                } while (++j<eBands[i+1]<<LM);
    123             }
    124             /* We're adding one here to ensure the normalized band isn't larger than unity norm */
    125             bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
    126          } else {
    127             bandE[i+c*m->nbEBands] = EPSILON;
    128          }
    129          /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
    130       }
    131    } while (++c<C);
    132    /*printf ("\n");*/
    133 }
    134 
    135 /* Normalise each band such that the energy is one. */
    136 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
    137 {
    138    int i, c, N;
    139    const opus_int16 *eBands = m->eBands;
    140    N = M*m->shortMdctSize;
    141    c=0; do {
    142       i=0; do {
    143          opus_val16 g;
    144          int j,shift;
    145          opus_val16 E;
    146          shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
    147          E = VSHR32(bandE[i+c*m->nbEBands], shift);
    148          g = EXTRACT16(celt_rcp(SHL32(E,3)));
    149          j=M*eBands[i]; do {
    150             X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
    151          } while (++j<M*eBands[i+1]);
    152       } while (++i<end);
    153    } while (++c<C);
    154 }
    155 
    156 #else /* FIXED_POINT */
    157 /* Compute the amplitude (sqrt energy) in each of the bands */
    158 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
    159 {
    160    int i, c, N;
    161    const opus_int16 *eBands = m->eBands;
    162    N = m->shortMdctSize<<LM;
    163    c=0; do {
    164       for (i=0;i<end;i++)
    165       {
    166          opus_val32 sum;
    167          sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
    168          bandE[i+c*m->nbEBands] = celt_sqrt(sum);
    169          /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
    170       }
    171    } while (++c<C);
    172    /*printf ("\n");*/
    173 }
    174 
    175 /* Normalise each band such that the energy is one. */
    176 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
    177 {
    178    int i, c, N;
    179    const opus_int16 *eBands = m->eBands;
    180    N = M*m->shortMdctSize;
    181    c=0; do {
    182       for (i=0;i<end;i++)
    183       {
    184          int j;
    185          opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
    186          for (j=M*eBands[i];j<M*eBands[i+1];j++)
    187             X[j+c*N] = freq[j+c*N]*g;
    188       }
    189    } while (++c<C);
    190 }
    191 
    192 #endif /* FIXED_POINT */
    193 
    194 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
    195 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
    196       celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
    197       int end, int M, int downsample, int silence)
    198 {
    199    int i, N;
    200    int bound;
    201    celt_sig * OPUS_RESTRICT f;
    202    const celt_norm * OPUS_RESTRICT x;
    203    const opus_int16 *eBands = m->eBands;
    204    N = M*m->shortMdctSize;
    205    bound = M*eBands[end];
    206    if (downsample!=1)
    207       bound = IMIN(bound, N/downsample);
    208    if (silence)
    209    {
    210       bound = 0;
    211       start = end = 0;
    212    }
    213    f = freq;
    214    x = X+M*eBands[start];
    215    for (i=0;i<M*eBands[start];i++)
    216       *f++ = 0;
    217    for (i=start;i<end;i++)
    218    {
    219       int j, band_end;
    220       opus_val16 g;
    221       opus_val16 lg;
    222 #ifdef FIXED_POINT
    223       int shift;
    224 #endif
    225       j=M*eBands[i];
    226       band_end = M*eBands[i+1];
    227       lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
    228 #ifndef FIXED_POINT
    229       g = celt_exp2(lg);
    230 #else
    231       /* Handle the integer part of the log energy */
    232       shift = 16-(lg>>DB_SHIFT);
    233       if (shift>31)
    234       {
    235          shift=0;
    236          g=0;
    237       } else {
    238          /* Handle the fractional part. */
    239          g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
    240       }
    241       /* Handle extreme gains with negative shift. */
    242       if (shift<0)
    243       {
    244          /* For shift < -2 we'd be likely to overflow, so we're capping
    245                the gain here. This shouldn't happen unless the bitstream is
    246                already corrupted. */
    247          if (shift < -2)
    248          {
    249             g = 32767;
    250             shift = -2;
    251          }
    252          do {
    253             *f++ = SHL32(MULT16_16(*x++, g), -shift);
    254          } while (++j<band_end);
    255       } else
    256 #endif
    257          /* Be careful of the fixed-point "else" just above when changing this code */
    258          do {
    259             *f++ = SHR32(MULT16_16(*x++, g), shift);
    260          } while (++j<band_end);
    261    }
    262    celt_assert(start <= end);
    263    OPUS_CLEAR(&freq[bound], N-bound);
    264 }
    265 
    266 /* This prevents energy collapse for transients with multiple short MDCTs */
    267 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
    268       int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
    269       const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
    270 {
    271    int c, i, j, k;
    272    for (i=start;i<end;i++)
    273    {
    274       int N0;
    275       opus_val16 thresh, sqrt_1;
    276       int depth;
    277 #ifdef FIXED_POINT
    278       int shift;
    279       opus_val32 thresh32;
    280 #endif
    281 
    282       N0 = m->eBands[i+1]-m->eBands[i];
    283       /* depth in 1/8 bits */
    284       celt_assert(pulses[i]>=0);
    285       depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
    286 
    287 #ifdef FIXED_POINT
    288       thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
    289       thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
    290       {
    291          opus_val32 t;
    292          t = N0<<LM;
    293          shift = celt_ilog2(t)>>1;
    294          t = SHL32(t, (7-shift)<<1);
    295          sqrt_1 = celt_rsqrt_norm(t);
    296       }
    297 #else
    298       thresh = .5f*celt_exp2(-.125f*depth);
    299       sqrt_1 = celt_rsqrt(N0<<LM);
    300 #endif
    301 
    302       c=0; do
    303       {
    304          celt_norm *X;
    305          opus_val16 prev1;
    306          opus_val16 prev2;
    307          opus_val32 Ediff;
    308          opus_val16 r;
    309          int renormalize=0;
    310          prev1 = prev1logE[c*m->nbEBands+i];
    311          prev2 = prev2logE[c*m->nbEBands+i];
    312          if (C==1)
    313          {
    314             prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
    315             prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
    316          }
    317          Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
    318          Ediff = MAX32(0, Ediff);
    319 
    320 #ifdef FIXED_POINT
    321          if (Ediff < 16384)
    322          {
    323             opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
    324             r = 2*MIN16(16383,r32);
    325          } else {
    326             r = 0;
    327          }
    328          if (LM==3)
    329             r = MULT16_16_Q14(23170, MIN32(23169, r));
    330          r = SHR16(MIN16(thresh, r),1);
    331          r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
    332 #else
    333          /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
    334             short blocks don't have the same energy as long */
    335          r = 2.f*celt_exp2(-Ediff);
    336          if (LM==3)
    337             r *= 1.41421356f;
    338          r = MIN16(thresh, r);
    339          r = r*sqrt_1;
    340 #endif
    341          X = X_+c*size+(m->eBands[i]<<LM);
    342          for (k=0;k<1<<LM;k++)
    343          {
    344             /* Detect collapse */
    345             if (!(collapse_masks[i*C+c]&1<<k))
    346             {
    347                /* Fill with noise */
    348                for (j=0;j<N0;j++)
    349                {
    350                   seed = celt_lcg_rand(seed);
    351                   X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
    352                }
    353                renormalize = 1;
    354             }
    355          }
    356          /* We just added some energy, so we need to renormalise */
    357          if (renormalize)
    358             renormalise_vector(X, N0<<LM, Q15ONE, arch);
    359       } while (++c<C);
    360    }
    361 }
    362 
    363 static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
    364 {
    365    int i = bandID;
    366    int j;
    367    opus_val16 a1, a2;
    368    opus_val16 left, right;
    369    opus_val16 norm;
    370 #ifdef FIXED_POINT
    371    int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
    372 #endif
    373    left = VSHR32(bandE[i],shift);
    374    right = VSHR32(bandE[i+m->nbEBands],shift);
    375    norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
    376    a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
    377    a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
    378    for (j=0;j<N;j++)
    379    {
    380       celt_norm r, l;
    381       l = X[j];
    382       r = Y[j];
    383       X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
    384       /* Side is not encoded, no need to calculate */
    385    }
    386 }
    387 
    388 static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
    389 {
    390    int j;
    391    for (j=0;j<N;j++)
    392    {
    393       opus_val32 r, l;
    394       l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
    395       r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
    396       X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
    397       Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
    398    }
    399 }
    400 
    401 static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
    402 {
    403    int j;
    404    opus_val32 xp=0, side=0;
    405    opus_val32 El, Er;
    406    opus_val16 mid2;
    407 #ifdef FIXED_POINT
    408    int kl, kr;
    409 #endif
    410    opus_val32 t, lgain, rgain;
    411 
    412    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
    413    dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
    414    /* Compensating for the mid normalization */
    415    xp = MULT16_32_Q15(mid, xp);
    416    /* mid and side are in Q15, not Q14 like X and Y */
    417    mid2 = SHR16(mid, 1);
    418    El = MULT16_16(mid2, mid2) + side - 2*xp;
    419    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    420    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
    421    {
    422       OPUS_COPY(Y, X, N);
    423       return;
    424    }
    425 
    426 #ifdef FIXED_POINT
    427    kl = celt_ilog2(El)>>1;
    428    kr = celt_ilog2(Er)>>1;
    429 #endif
    430    t = VSHR32(El, (kl-7)<<1);
    431    lgain = celt_rsqrt_norm(t);
    432    t = VSHR32(Er, (kr-7)<<1);
    433    rgain = celt_rsqrt_norm(t);
    434 
    435 #ifdef FIXED_POINT
    436    if (kl < 7)
    437       kl = 7;
    438    if (kr < 7)
    439       kr = 7;
    440 #endif
    441 
    442    for (j=0;j<N;j++)
    443    {
    444       celt_norm r, l;
    445       /* Apply mid scaling (side is already scaled) */
    446       l = MULT16_16_P15(mid, X[j]);
    447       r = Y[j];
    448       X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
    449       Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
    450    }
    451 }
    452 
    453 /* Decide whether we should spread the pulses in the current frame */
    454 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
    455       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
    456       int end, int C, int M)
    457 {
    458    int i, c, N0;
    459    int sum = 0, nbBands=0;
    460    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
    461    int decision;
    462    int hf_sum=0;
    463 
    464    celt_assert(end>0);
    465 
    466    N0 = M*m->shortMdctSize;
    467 
    468    if (M*(eBands[end]-eBands[end-1]) <= 8)
    469       return SPREAD_NONE;
    470    c=0; do {
    471       for (i=0;i<end;i++)
    472       {
    473          int j, N, tmp=0;
    474          int tcount[3] = {0,0,0};
    475          const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
    476          N = M*(eBands[i+1]-eBands[i]);
    477          if (N<=8)
    478             continue;
    479          /* Compute rough CDF of |x[j]| */
    480          for (j=0;j<N;j++)
    481          {
    482             opus_val32 x2N; /* Q13 */
    483 
    484             x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
    485             if (x2N < QCONST16(0.25f,13))
    486                tcount[0]++;
    487             if (x2N < QCONST16(0.0625f,13))
    488                tcount[1]++;
    489             if (x2N < QCONST16(0.015625f,13))
    490                tcount[2]++;
    491          }
    492 
    493          /* Only include four last bands (8 kHz and up) */
    494          if (i>m->nbEBands-4)
    495             hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
    496          tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
    497          sum += tmp*256;
    498          nbBands++;
    499       }
    500    } while (++c<C);
    501 
    502    if (update_hf)
    503    {
    504       if (hf_sum)
    505          hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
    506       *hf_average = (*hf_average+hf_sum)>>1;
    507       hf_sum = *hf_average;
    508       if (*tapset_decision==2)
    509          hf_sum += 4;
    510       else if (*tapset_decision==0)
    511          hf_sum -= 4;
    512       if (hf_sum > 22)
    513          *tapset_decision=2;
    514       else if (hf_sum > 18)
    515          *tapset_decision=1;
    516       else
    517          *tapset_decision=0;
    518    }
    519    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
    520    celt_assert(nbBands>0); /* end has to be non-zero */
    521    celt_assert(sum>=0);
    522    sum = celt_udiv(sum, nbBands);
    523    /* Recursive averaging */
    524    sum = (sum+*average)>>1;
    525    *average = sum;
    526    /* Hysteresis */
    527    sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
    528    if (sum < 80)
    529    {
    530       decision = SPREAD_AGGRESSIVE;
    531    } else if (sum < 256)
    532    {
    533       decision = SPREAD_NORMAL;
    534    } else if (sum < 384)
    535    {
    536       decision = SPREAD_LIGHT;
    537    } else {
    538       decision = SPREAD_NONE;
    539    }
    540 #ifdef FUZZING
    541    decision = rand()&0x3;
    542    *tapset_decision=rand()%3;
    543 #endif
    544    return decision;
    545 }
    546 
    547 /* Indexing table for converting from natural Hadamard to ordery Hadamard
    548    This is essentially a bit-reversed Gray, on top of which we've added
    549    an inversion of the order because we want the DC at the end rather than
    550    the beginning. The lines are for N=2, 4, 8, 16 */
    551 static const int ordery_table[] = {
    552        1,  0,
    553        3,  0,  2,  1,
    554        7,  0,  4,  3,  6,  1,  5,  2,
    555       15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
    556 };
    557 
    558 static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
    559 {
    560    int i,j;
    561    VARDECL(celt_norm, tmp);
    562    int N;
    563    SAVE_STACK;
    564    N = N0*stride;
    565    ALLOC(tmp, N, celt_norm);
    566    celt_assert(stride>0);
    567    if (hadamard)
    568    {
    569       const int *ordery = ordery_table+stride-2;
    570       for (i=0;i<stride;i++)
    571       {
    572          for (j=0;j<N0;j++)
    573             tmp[ordery[i]*N0+j] = X[j*stride+i];
    574       }
    575    } else {
    576       for (i=0;i<stride;i++)
    577          for (j=0;j<N0;j++)
    578             tmp[i*N0+j] = X[j*stride+i];
    579    }
    580    OPUS_COPY(X, tmp, N);
    581    RESTORE_STACK;
    582 }
    583 
    584 static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
    585 {
    586    int i,j;
    587    VARDECL(celt_norm, tmp);
    588    int N;
    589    SAVE_STACK;
    590    N = N0*stride;
    591    ALLOC(tmp, N, celt_norm);
    592    if (hadamard)
    593    {
    594       const int *ordery = ordery_table+stride-2;
    595       for (i=0;i<stride;i++)
    596          for (j=0;j<N0;j++)
    597             tmp[j*stride+i] = X[ordery[i]*N0+j];
    598    } else {
    599       for (i=0;i<stride;i++)
    600          for (j=0;j<N0;j++)
    601             tmp[j*stride+i] = X[i*N0+j];
    602    }
    603    OPUS_COPY(X, tmp, N);
    604    RESTORE_STACK;
    605 }
    606 
    607 void haar1(celt_norm *X, int N0, int stride)
    608 {
    609    int i, j;
    610    N0 >>= 1;
    611    for (i=0;i<stride;i++)
    612       for (j=0;j<N0;j++)
    613       {
    614          opus_val32 tmp1, tmp2;
    615          tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
    616          tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
    617          X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
    618          X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
    619       }
    620 }
    621 
    622 static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
    623 {
    624    static const opus_int16 exp2_table8[8] =
    625       {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
    626    int qn, qb;
    627    int N2 = 2*N-1;
    628    if (stereo && N==2)
    629       N2--;
    630    /* The upper limit ensures that in a stereo split with itheta==16384, we'll
    631        always have enough bits left over to code at least one pulse in the
    632        side; otherwise it would collapse, since it doesn't get folded. */
    633    qb = celt_sudiv(b+N2*offset, N2);
    634    qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
    635 
    636    qb = IMIN(8<<BITRES, qb);
    637 
    638    if (qb<(1<<BITRES>>1)) {
    639       qn = 1;
    640    } else {
    641       qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
    642       qn = (qn+1)>>1<<1;
    643    }
    644    celt_assert(qn <= 256);
    645    return qn;
    646 }
    647 
    648 struct band_ctx {
    649    int encode;
    650    const CELTMode *m;
    651    int i;
    652    int intensity;
    653    int spread;
    654    int tf_change;
    655    ec_ctx *ec;
    656    opus_int32 remaining_bits;
    657    const celt_ener *bandE;
    658    opus_uint32 seed;
    659    int arch;
    660 };
    661 
    662 struct split_ctx {
    663    int inv;
    664    int imid;
    665    int iside;
    666    int delta;
    667    int itheta;
    668    int qalloc;
    669 };
    670 
    671 static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
    672       celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
    673       int LM,
    674       int stereo, int *fill)
    675 {
    676    int qn;
    677    int itheta=0;
    678    int delta;
    679    int imid, iside;
    680    int qalloc;
    681    int pulse_cap;
    682    int offset;
    683    opus_int32 tell;
    684    int inv=0;
    685    int encode;
    686    const CELTMode *m;
    687    int i;
    688    int intensity;
    689    ec_ctx *ec;
    690    const celt_ener *bandE;
    691 
    692    encode = ctx->encode;
    693    m = ctx->m;
    694    i = ctx->i;
    695    intensity = ctx->intensity;
    696    ec = ctx->ec;
    697    bandE = ctx->bandE;
    698 
    699    /* Decide on the resolution to give to the split parameter theta */
    700    pulse_cap = m->logN[i]+LM*(1<<BITRES);
    701    offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
    702    qn = compute_qn(N, *b, offset, pulse_cap, stereo);
    703    if (stereo && i>=intensity)
    704       qn = 1;
    705    if (encode)
    706    {
    707       /* theta is the atan() of the ratio between the (normalized)
    708          side and mid. With just that parameter, we can re-scale both
    709          mid and side because we know that 1) they have unit norm and
    710          2) they are orthogonal. */
    711       itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
    712    }
    713    tell = ec_tell_frac(ec);
    714    if (qn!=1)
    715    {
    716       if (encode)
    717          itheta = (itheta*(opus_int32)qn+8192)>>14;
    718 
    719       /* Entropy coding of the angle. We use a uniform pdf for the
    720          time split, a step for stereo, and a triangular one for the rest. */
    721       if (stereo && N>2)
    722       {
    723          int p0 = 3;
    724          int x = itheta;
    725          int x0 = qn/2;
    726          int ft = p0*(x0+1) + x0;
    727          /* Use a probability of p0 up to itheta=8192 and then use 1 after */
    728          if (encode)
    729          {
    730             ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
    731          } else {
    732             int fs;
    733             fs=ec_decode(ec,ft);
    734             if (fs<(x0+1)*p0)
    735                x=fs/p0;
    736             else
    737                x=x0+1+(fs-(x0+1)*p0);
    738             ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
    739             itheta = x;
    740          }
    741       } else if (B0>1 || stereo) {
    742          /* Uniform pdf */
    743          if (encode)
    744             ec_enc_uint(ec, itheta, qn+1);
    745          else
    746             itheta = ec_dec_uint(ec, qn+1);
    747       } else {
    748          int fs=1, ft;
    749          ft = ((qn>>1)+1)*((qn>>1)+1);
    750          if (encode)
    751          {
    752             int fl;
    753 
    754             fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
    755             fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
    756              ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
    757 
    758             ec_encode(ec, fl, fl+fs, ft);
    759          } else {
    760             /* Triangular pdf */
    761             int fl=0;
    762             int fm;
    763             fm = ec_decode(ec, ft);
    764 
    765             if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
    766             {
    767                itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
    768                fs = itheta + 1;
    769                fl = itheta*(itheta + 1)>>1;
    770             }
    771             else
    772             {
    773                itheta = (2*(qn + 1)
    774                 - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
    775                fs = qn + 1 - itheta;
    776                fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
    777             }
    778 
    779             ec_dec_update(ec, fl, fl+fs, ft);
    780          }
    781       }
    782       celt_assert(itheta>=0);
    783       itheta = celt_udiv((opus_int32)itheta*16384, qn);
    784       if (encode && stereo)
    785       {
    786          if (itheta==0)
    787             intensity_stereo(m, X, Y, bandE, i, N);
    788          else
    789             stereo_split(X, Y, N);
    790       }
    791       /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
    792                Let's do that at higher complexity */
    793    } else if (stereo) {
    794       if (encode)
    795       {
    796          inv = itheta > 8192;
    797          if (inv)
    798          {
    799             int j;
    800             for (j=0;j<N;j++)
    801                Y[j] = -Y[j];
    802          }
    803          intensity_stereo(m, X, Y, bandE, i, N);
    804       }
    805       if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
    806       {
    807          if (encode)
    808             ec_enc_bit_logp(ec, inv, 2);
    809          else
    810             inv = ec_dec_bit_logp(ec, 2);
    811       } else
    812          inv = 0;
    813       itheta = 0;
    814    }
    815    qalloc = ec_tell_frac(ec) - tell;
    816    *b -= qalloc;
    817 
    818    if (itheta == 0)
    819    {
    820       imid = 32767;
    821       iside = 0;
    822       *fill &= (1<<B)-1;
    823       delta = -16384;
    824    } else if (itheta == 16384)
    825    {
    826       imid = 0;
    827       iside = 32767;
    828       *fill &= ((1<<B)-1)<<B;
    829       delta = 16384;
    830    } else {
    831       imid = bitexact_cos((opus_int16)itheta);
    832       iside = bitexact_cos((opus_int16)(16384-itheta));
    833       /* This is the mid vs side allocation that minimizes squared error
    834          in that band. */
    835       delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
    836    }
    837 
    838    sctx->inv = inv;
    839    sctx->imid = imid;
    840    sctx->iside = iside;
    841    sctx->delta = delta;
    842    sctx->itheta = itheta;
    843    sctx->qalloc = qalloc;
    844 }
    845 static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
    846       celt_norm *lowband_out)
    847 {
    848 #ifdef RESYNTH
    849    int resynth = 1;
    850 #else
    851    int resynth = !ctx->encode;
    852 #endif
    853    int c;
    854    int stereo;
    855    celt_norm *x = X;
    856    int encode;
    857    ec_ctx *ec;
    858 
    859    encode = ctx->encode;
    860    ec = ctx->ec;
    861 
    862    stereo = Y != NULL;
    863    c=0; do {
    864       int sign=0;
    865       if (ctx->remaining_bits>=1<<BITRES)
    866       {
    867          if (encode)
    868          {
    869             sign = x[0]<0;
    870             ec_enc_bits(ec, sign, 1);
    871          } else {
    872             sign = ec_dec_bits(ec, 1);
    873          }
    874          ctx->remaining_bits -= 1<<BITRES;
    875          b-=1<<BITRES;
    876       }
    877       if (resynth)
    878          x[0] = sign ? -NORM_SCALING : NORM_SCALING;
    879       x = Y;
    880    } while (++c<1+stereo);
    881    if (lowband_out)
    882       lowband_out[0] = SHR16(X[0],4);
    883    return 1;
    884 }
    885 
    886 /* This function is responsible for encoding and decoding a mono partition.
    887    It can split the band in two and transmit the energy difference with
    888    the two half-bands. It can be called recursively so bands can end up being
    889    split in 8 parts. */
    890 static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
    891       int N, int b, int B, celt_norm *lowband,
    892       int LM,
    893       opus_val16 gain, int fill)
    894 {
    895    const unsigned char *cache;
    896    int q;
    897    int curr_bits;
    898    int imid=0, iside=0;
    899    int B0=B;
    900    opus_val16 mid=0, side=0;
    901    unsigned cm=0;
    902 #ifdef RESYNTH
    903    int resynth = 1;
    904 #else
    905    int resynth = !ctx->encode;
    906 #endif
    907    celt_norm *Y=NULL;
    908    int encode;
    909    const CELTMode *m;
    910    int i;
    911    int spread;
    912    ec_ctx *ec;
    913 
    914    encode = ctx->encode;
    915    m = ctx->m;
    916    i = ctx->i;
    917    spread = ctx->spread;
    918    ec = ctx->ec;
    919 
    920    /* If we need 1.5 more bit than we can produce, split the band in two. */
    921    cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
    922    if (LM != -1 && b > cache[cache[0]]+12 && N>2)
    923    {
    924       int mbits, sbits, delta;
    925       int itheta;
    926       int qalloc;
    927       struct split_ctx sctx;
    928       celt_norm *next_lowband2=NULL;
    929       opus_int32 rebalance;
    930 
    931       N >>= 1;
    932       Y = X+N;
    933       LM -= 1;
    934       if (B==1)
    935          fill = (fill&1)|(fill<<1);
    936       B = (B+1)>>1;
    937 
    938       compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
    939             LM, 0, &fill);
    940       imid = sctx.imid;
    941       iside = sctx.iside;
    942       delta = sctx.delta;
    943       itheta = sctx.itheta;
    944       qalloc = sctx.qalloc;
    945 #ifdef FIXED_POINT
    946       mid = imid;
    947       side = iside;
    948 #else
    949       mid = (1.f/32768)*imid;
    950       side = (1.f/32768)*iside;
    951 #endif
    952 
    953       /* Give more bits to low-energy MDCTs than they would otherwise deserve */
    954       if (B0>1 && (itheta&0x3fff))
    955       {
    956          if (itheta > 8192)
    957             /* Rough approximation for pre-echo masking */
    958             delta -= delta>>(4-LM);
    959          else
    960             /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
    961             delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
    962       }
    963       mbits = IMAX(0, IMIN(b, (b-delta)/2));
    964       sbits = b-mbits;
    965       ctx->remaining_bits -= qalloc;
    966 
    967       if (lowband)
    968          next_lowband2 = lowband+N; /* >32-bit split case */
    969 
    970       rebalance = ctx->remaining_bits;
    971       if (mbits >= sbits)
    972       {
    973          cm = quant_partition(ctx, X, N, mbits, B,
    974                lowband, LM,
    975                MULT16_16_P15(gain,mid), fill);
    976          rebalance = mbits - (rebalance-ctx->remaining_bits);
    977          if (rebalance > 3<<BITRES && itheta!=0)
    978             sbits += rebalance - (3<<BITRES);
    979          cm |= quant_partition(ctx, Y, N, sbits, B,
    980                next_lowband2, LM,
    981                MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
    982       } else {
    983          cm = quant_partition(ctx, Y, N, sbits, B,
    984                next_lowband2, LM,
    985                MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
    986          rebalance = sbits - (rebalance-ctx->remaining_bits);
    987          if (rebalance > 3<<BITRES && itheta!=16384)
    988             mbits += rebalance - (3<<BITRES);
    989          cm |= quant_partition(ctx, X, N, mbits, B,
    990                lowband, LM,
    991                MULT16_16_P15(gain,mid), fill);
    992       }
    993    } else {
    994       /* This is the basic no-split case */
    995       q = bits2pulses(m, i, LM, b);
    996       curr_bits = pulses2bits(m, i, LM, q);
    997       ctx->remaining_bits -= curr_bits;
    998 
    999       /* Ensures we can never bust the budget */
   1000       while (ctx->remaining_bits < 0 && q > 0)
   1001       {
   1002          ctx->remaining_bits += curr_bits;
   1003          q--;
   1004          curr_bits = pulses2bits(m, i, LM, q);
   1005          ctx->remaining_bits -= curr_bits;
   1006       }
   1007 
   1008       if (q!=0)
   1009       {
   1010          int K = get_pulses(q);
   1011 
   1012          /* Finally do the actual quantization */
   1013          if (encode)
   1014          {
   1015             cm = alg_quant(X, N, K, spread, B, ec
   1016 #ifdef RESYNTH
   1017                  , gain
   1018 #endif
   1019                  );
   1020          } else {
   1021             cm = alg_unquant(X, N, K, spread, B, ec, gain);
   1022          }
   1023       } else {
   1024          /* If there's no pulse, fill the band anyway */
   1025          int j;
   1026          if (resynth)
   1027          {
   1028             unsigned cm_mask;
   1029             /* B can be as large as 16, so this shift might overflow an int on a
   1030                16-bit platform; use a long to get defined behavior.*/
   1031             cm_mask = (unsigned)(1UL<<B)-1;
   1032             fill &= cm_mask;
   1033             if (!fill)
   1034             {
   1035                OPUS_CLEAR(X, N);
   1036             } else {
   1037                if (lowband == NULL)
   1038                {
   1039                   /* Noise */
   1040                   for (j=0;j<N;j++)
   1041                   {
   1042                      ctx->seed = celt_lcg_rand(ctx->seed);
   1043                      X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
   1044                   }
   1045                   cm = cm_mask;
   1046                } else {
   1047                   /* Folded spectrum */
   1048                   for (j=0;j<N;j++)
   1049                   {
   1050                      opus_val16 tmp;
   1051                      ctx->seed = celt_lcg_rand(ctx->seed);
   1052                      /* About 48 dB below the "normal" folding level */
   1053                      tmp = QCONST16(1.0f/256, 10);
   1054                      tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
   1055                      X[j] = lowband[j]+tmp;
   1056                   }
   1057                   cm = fill;
   1058                }
   1059                renormalise_vector(X, N, gain, ctx->arch);
   1060             }
   1061          }
   1062       }
   1063    }
   1064 
   1065    return cm;
   1066 }
   1067 
   1068 
   1069 /* This function is responsible for encoding and decoding a band for the mono case. */
   1070 static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
   1071       int N, int b, int B, celt_norm *lowband,
   1072       int LM, celt_norm *lowband_out,
   1073       opus_val16 gain, celt_norm *lowband_scratch, int fill)
   1074 {
   1075    int N0=N;
   1076    int N_B=N;
   1077    int N_B0;
   1078    int B0=B;
   1079    int time_divide=0;
   1080    int recombine=0;
   1081    int longBlocks;
   1082    unsigned cm=0;
   1083 #ifdef RESYNTH
   1084    int resynth = 1;
   1085 #else
   1086    int resynth = !ctx->encode;
   1087 #endif
   1088    int k;
   1089    int encode;
   1090    int tf_change;
   1091 
   1092    encode = ctx->encode;
   1093    tf_change = ctx->tf_change;
   1094 
   1095    longBlocks = B0==1;
   1096 
   1097    N_B = celt_udiv(N_B, B);
   1098 
   1099    /* Special case for one sample */
   1100    if (N==1)
   1101    {
   1102       return quant_band_n1(ctx, X, NULL, b, lowband_out);
   1103    }
   1104 
   1105    if (tf_change>0)
   1106       recombine = tf_change;
   1107    /* Band recombining to increase frequency resolution */
   1108 
   1109    if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
   1110    {
   1111       OPUS_COPY(lowband_scratch, lowband, N);
   1112       lowband = lowband_scratch;
   1113    }
   1114 
   1115    for (k=0;k<recombine;k++)
   1116    {
   1117       static const unsigned char bit_interleave_table[16]={
   1118             0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
   1119       };
   1120       if (encode)
   1121          haar1(X, N>>k, 1<<k);
   1122       if (lowband)
   1123          haar1(lowband, N>>k, 1<<k);
   1124       fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
   1125    }
   1126    B>>=recombine;
   1127    N_B<<=recombine;
   1128 
   1129    /* Increasing the time resolution */
   1130    while ((N_B&1) == 0 && tf_change<0)
   1131    {
   1132       if (encode)
   1133          haar1(X, N_B, B);
   1134       if (lowband)
   1135          haar1(lowband, N_B, B);
   1136       fill |= fill<<B;
   1137       B <<= 1;
   1138       N_B >>= 1;
   1139       time_divide++;
   1140       tf_change++;
   1141    }
   1142    B0=B;
   1143    N_B0 = N_B;
   1144 
   1145    /* Reorganize the samples in time order instead of frequency order */
   1146    if (B0>1)
   1147    {
   1148       if (encode)
   1149          deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
   1150       if (lowband)
   1151          deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
   1152    }
   1153 
   1154    cm = quant_partition(ctx, X, N, b, B, lowband,
   1155          LM, gain, fill);
   1156 
   1157    /* This code is used by the decoder and by the resynthesis-enabled encoder */
   1158    if (resynth)
   1159    {
   1160       /* Undo the sample reorganization going from time order to frequency order */
   1161       if (B0>1)
   1162          interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
   1163 
   1164       /* Undo time-freq changes that we did earlier */
   1165       N_B = N_B0;
   1166       B = B0;
   1167       for (k=0;k<time_divide;k++)
   1168       {
   1169          B >>= 1;
   1170          N_B <<= 1;
   1171          cm |= cm>>B;
   1172          haar1(X, N_B, B);
   1173       }
   1174 
   1175       for (k=0;k<recombine;k++)
   1176       {
   1177          static const unsigned char bit_deinterleave_table[16]={
   1178                0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
   1179                0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
   1180          };
   1181          cm = bit_deinterleave_table[cm];
   1182          haar1(X, N0>>k, 1<<k);
   1183       }
   1184       B<<=recombine;
   1185 
   1186       /* Scale output for later folding */
   1187       if (lowband_out)
   1188       {
   1189          int j;
   1190          opus_val16 n;
   1191          n = celt_sqrt(SHL32(EXTEND32(N0),22));
   1192          for (j=0;j<N0;j++)
   1193             lowband_out[j] = MULT16_16_Q15(n,X[j]);
   1194       }
   1195       cm &= (1<<B)-1;
   1196    }
   1197    return cm;
   1198 }
   1199 
   1200 
   1201 /* This function is responsible for encoding and decoding a band for the stereo case. */
   1202 static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
   1203       int N, int b, int B, celt_norm *lowband,
   1204       int LM, celt_norm *lowband_out,
   1205       celt_norm *lowband_scratch, int fill)
   1206 {
   1207    int imid=0, iside=0;
   1208    int inv = 0;
   1209    opus_val16 mid=0, side=0;
   1210    unsigned cm=0;
   1211 #ifdef RESYNTH
   1212    int resynth = 1;
   1213 #else
   1214    int resynth = !ctx->encode;
   1215 #endif
   1216    int mbits, sbits, delta;
   1217    int itheta;
   1218    int qalloc;
   1219    struct split_ctx sctx;
   1220    int orig_fill;
   1221    int encode;
   1222    ec_ctx *ec;
   1223 
   1224    encode = ctx->encode;
   1225    ec = ctx->ec;
   1226 
   1227    /* Special case for one sample */
   1228    if (N==1)
   1229    {
   1230       return quant_band_n1(ctx, X, Y, b, lowband_out);
   1231    }
   1232 
   1233    orig_fill = fill;
   1234 
   1235    compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
   1236          LM, 1, &fill);
   1237    inv = sctx.inv;
   1238    imid = sctx.imid;
   1239    iside = sctx.iside;
   1240    delta = sctx.delta;
   1241    itheta = sctx.itheta;
   1242    qalloc = sctx.qalloc;
   1243 #ifdef FIXED_POINT
   1244    mid = imid;
   1245    side = iside;
   1246 #else
   1247    mid = (1.f/32768)*imid;
   1248    side = (1.f/32768)*iside;
   1249 #endif
   1250 
   1251    /* This is a special case for N=2 that only works for stereo and takes
   1252       advantage of the fact that mid and side are orthogonal to encode
   1253       the side with just one bit. */
   1254    if (N==2)
   1255    {
   1256       int c;
   1257       int sign=0;
   1258       celt_norm *x2, *y2;
   1259       mbits = b;
   1260       sbits = 0;
   1261       /* Only need one bit for the side. */
   1262       if (itheta != 0 && itheta != 16384)
   1263          sbits = 1<<BITRES;
   1264       mbits -= sbits;
   1265       c = itheta > 8192;
   1266       ctx->remaining_bits -= qalloc+sbits;
   1267 
   1268       x2 = c ? Y : X;
   1269       y2 = c ? X : Y;
   1270       if (sbits)
   1271       {
   1272          if (encode)
   1273          {
   1274             /* Here we only need to encode a sign for the side. */
   1275             sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
   1276             ec_enc_bits(ec, sign, 1);
   1277          } else {
   1278             sign = ec_dec_bits(ec, 1);
   1279          }
   1280       }
   1281       sign = 1-2*sign;
   1282       /* We use orig_fill here because we want to fold the side, but if
   1283          itheta==16384, we'll have cleared the low bits of fill. */
   1284       cm = quant_band(ctx, x2, N, mbits, B, lowband,
   1285             LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
   1286       /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
   1287          and there's no need to worry about mixing with the other channel. */
   1288       y2[0] = -sign*x2[1];
   1289       y2[1] = sign*x2[0];
   1290       if (resynth)
   1291       {
   1292          celt_norm tmp;
   1293          X[0] = MULT16_16_Q15(mid, X[0]);
   1294          X[1] = MULT16_16_Q15(mid, X[1]);
   1295          Y[0] = MULT16_16_Q15(side, Y[0]);
   1296          Y[1] = MULT16_16_Q15(side, Y[1]);
   1297          tmp = X[0];
   1298          X[0] = SUB16(tmp,Y[0]);
   1299          Y[0] = ADD16(tmp,Y[0]);
   1300          tmp = X[1];
   1301          X[1] = SUB16(tmp,Y[1]);
   1302          Y[1] = ADD16(tmp,Y[1]);
   1303       }
   1304    } else {
   1305       /* "Normal" split code */
   1306       opus_int32 rebalance;
   1307 
   1308       mbits = IMAX(0, IMIN(b, (b-delta)/2));
   1309       sbits = b-mbits;
   1310       ctx->remaining_bits -= qalloc;
   1311 
   1312       rebalance = ctx->remaining_bits;
   1313       if (mbits >= sbits)
   1314       {
   1315          /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
   1316             mid for folding later. */
   1317          cm = quant_band(ctx, X, N, mbits, B,
   1318                lowband, LM, lowband_out,
   1319                Q15ONE, lowband_scratch, fill);
   1320          rebalance = mbits - (rebalance-ctx->remaining_bits);
   1321          if (rebalance > 3<<BITRES && itheta!=0)
   1322             sbits += rebalance - (3<<BITRES);
   1323 
   1324          /* For a stereo split, the high bits of fill are always zero, so no
   1325             folding will be done to the side. */
   1326          cm |= quant_band(ctx, Y, N, sbits, B,
   1327                NULL, LM, NULL,
   1328                side, NULL, fill>>B);
   1329       } else {
   1330          /* For a stereo split, the high bits of fill are always zero, so no
   1331             folding will be done to the side. */
   1332          cm = quant_band(ctx, Y, N, sbits, B,
   1333                NULL, LM, NULL,
   1334                side, NULL, fill>>B);
   1335          rebalance = sbits - (rebalance-ctx->remaining_bits);
   1336          if (rebalance > 3<<BITRES && itheta!=16384)
   1337             mbits += rebalance - (3<<BITRES);
   1338          /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
   1339             mid for folding later. */
   1340          cm |= quant_band(ctx, X, N, mbits, B,
   1341                lowband, LM, lowband_out,
   1342                Q15ONE, lowband_scratch, fill);
   1343       }
   1344    }
   1345 
   1346 
   1347    /* This code is used by the decoder and by the resynthesis-enabled encoder */
   1348    if (resynth)
   1349    {
   1350       if (N!=2)
   1351          stereo_merge(X, Y, mid, N, ctx->arch);
   1352       if (inv)
   1353       {
   1354          int j;
   1355          for (j=0;j<N;j++)
   1356             Y[j] = -Y[j];
   1357       }
   1358    }
   1359    return cm;
   1360 }
   1361 
   1362 
   1363 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
   1364       celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
   1365       const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
   1366       int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
   1367       opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
   1368       opus_uint32 *seed, int arch)
   1369 {
   1370    int i;
   1371    opus_int32 remaining_bits;
   1372    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
   1373    celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
   1374    VARDECL(celt_norm, _norm);
   1375    celt_norm *lowband_scratch;
   1376    int B;
   1377    int M;
   1378    int lowband_offset;
   1379    int update_lowband = 1;
   1380    int C = Y_ != NULL ? 2 : 1;
   1381    int norm_offset;
   1382 #ifdef RESYNTH
   1383    int resynth = 1;
   1384 #else
   1385    int resynth = !encode;
   1386 #endif
   1387    struct band_ctx ctx;
   1388    SAVE_STACK;
   1389 
   1390    M = 1<<LM;
   1391    B = shortBlocks ? M : 1;
   1392    norm_offset = M*eBands[start];
   1393    /* No need to allocate norm for the last band because we don't need an
   1394       output in that band. */
   1395    ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
   1396    norm = _norm;
   1397    norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
   1398    /* We can use the last band as scratch space because we don't need that
   1399       scratch space for the last band. */
   1400    lowband_scratch = X_+M*eBands[m->nbEBands-1];
   1401 
   1402    lowband_offset = 0;
   1403    ctx.bandE = bandE;
   1404    ctx.ec = ec;
   1405    ctx.encode = encode;
   1406    ctx.intensity = intensity;
   1407    ctx.m = m;
   1408    ctx.seed = *seed;
   1409    ctx.spread = spread;
   1410    ctx.arch = arch;
   1411    for (i=start;i<end;i++)
   1412    {
   1413       opus_int32 tell;
   1414       int b;
   1415       int N;
   1416       opus_int32 curr_balance;
   1417       int effective_lowband=-1;
   1418       celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
   1419       int tf_change=0;
   1420       unsigned x_cm;
   1421       unsigned y_cm;
   1422       int last;
   1423 
   1424       ctx.i = i;
   1425       last = (i==end-1);
   1426 
   1427       X = X_+M*eBands[i];
   1428       if (Y_!=NULL)
   1429          Y = Y_+M*eBands[i];
   1430       else
   1431          Y = NULL;
   1432       N = M*eBands[i+1]-M*eBands[i];
   1433       tell = ec_tell_frac(ec);
   1434 
   1435       /* Compute how many bits we want to allocate to this band */
   1436       if (i != start)
   1437          balance -= tell;
   1438       remaining_bits = total_bits-tell-1;
   1439       ctx.remaining_bits = remaining_bits;
   1440       if (i <= codedBands-1)
   1441       {
   1442          curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
   1443          b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
   1444       } else {
   1445          b = 0;
   1446       }
   1447 
   1448       if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
   1449             lowband_offset = i;
   1450 
   1451       tf_change = tf_res[i];
   1452       ctx.tf_change = tf_change;
   1453       if (i>=m->effEBands)
   1454       {
   1455          X=norm;
   1456          if (Y_!=NULL)
   1457             Y = norm;
   1458          lowband_scratch = NULL;
   1459       }
   1460       if (i==end-1)
   1461          lowband_scratch = NULL;
   1462 
   1463       /* Get a conservative estimate of the collapse_mask's for the bands we're
   1464          going to be folding from. */
   1465       if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
   1466       {
   1467          int fold_start;
   1468          int fold_end;
   1469          int fold_i;
   1470          /* This ensures we never repeat spectral content within one band */
   1471          effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
   1472          fold_start = lowband_offset;
   1473          while(M*eBands[--fold_start] > effective_lowband+norm_offset);
   1474          fold_end = lowband_offset-1;
   1475          while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
   1476          x_cm = y_cm = 0;
   1477          fold_i = fold_start; do {
   1478            x_cm |= collapse_masks[fold_i*C+0];
   1479            y_cm |= collapse_masks[fold_i*C+C-1];
   1480          } while (++fold_i<fold_end);
   1481       }
   1482       /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
   1483          always) be non-zero. */
   1484       else
   1485          x_cm = y_cm = (1<<B)-1;
   1486 
   1487       if (dual_stereo && i==intensity)
   1488       {
   1489          int j;
   1490 
   1491          /* Switch off dual stereo to do intensity. */
   1492          dual_stereo = 0;
   1493          if (resynth)
   1494             for (j=0;j<M*eBands[i]-norm_offset;j++)
   1495                norm[j] = HALF32(norm[j]+norm2[j]);
   1496       }
   1497       if (dual_stereo)
   1498       {
   1499          x_cm = quant_band(&ctx, X, N, b/2, B,
   1500                effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
   1501                last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
   1502          y_cm = quant_band(&ctx, Y, N, b/2, B,
   1503                effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
   1504                last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
   1505       } else {
   1506          if (Y!=NULL)
   1507          {
   1508             x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
   1509                   effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
   1510                         last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
   1511          } else {
   1512             x_cm = quant_band(&ctx, X, N, b, B,
   1513                   effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
   1514                         last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
   1515          }
   1516          y_cm = x_cm;
   1517       }
   1518       collapse_masks[i*C+0] = (unsigned char)x_cm;
   1519       collapse_masks[i*C+C-1] = (unsigned char)y_cm;
   1520       balance += pulses[i] + tell;
   1521 
   1522       /* Update the folding position only as long as we have 1 bit/sample depth. */
   1523       update_lowband = b>(N<<BITRES);
   1524    }
   1525    *seed = ctx.seed;
   1526 
   1527    RESTORE_STACK;
   1528 }
   1529 
   1530