Home | History | Annotate | Download | only in celt
      1 /* Copyright (c) 2007-2008 CSIRO
      2    Copyright (c) 2007-2009 Xiph.Org Foundation
      3    Written by Jean-Marc Valin */
      4 /*
      5    Redistribution and use in source and binary forms, with or without
      6    modification, are permitted provided that the following conditions
      7    are met:
      8 
      9    - Redistributions of source code must retain the above copyright
     10    notice, this list of conditions and the following disclaimer.
     11 
     12    - Redistributions in binary form must reproduce the above copyright
     13    notice, this list of conditions and the following disclaimer in the
     14    documentation and/or other materials provided with the distribution.
     15 
     16    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     20    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     21    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     22    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     23    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     24    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     25    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     26    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 */
     28 
     29 #ifdef HAVE_CONFIG_H
     30 #include "config.h"
     31 #endif
     32 
     33 #include "mathops.h"
     34 #include "cwrs.h"
     35 #include "vq.h"
     36 #include "arch.h"
     37 #include "os_support.h"
     38 #include "bands.h"
     39 #include "rate.h"
     40 
     41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
     42 {
     43    int i;
     44    celt_norm *Xptr;
     45    Xptr = X;
     46    for (i=0;i<len-stride;i++)
     47    {
     48       celt_norm x1, x2;
     49       x1 = Xptr[0];
     50       x2 = Xptr[stride];
     51       Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
     52       *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
     53    }
     54    Xptr = &X[len-2*stride-1];
     55    for (i=len-2*stride-1;i>=0;i--)
     56    {
     57       celt_norm x1, x2;
     58       x1 = Xptr[0];
     59       x2 = Xptr[stride];
     60       Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
     61       *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
     62    }
     63 }
     64 
     65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
     66 {
     67    static const int SPREAD_FACTOR[3]={15,10,5};
     68    int i;
     69    opus_val16 c, s;
     70    opus_val16 gain, theta;
     71    int stride2=0;
     72    int factor;
     73 
     74    if (2*K>=len || spread==SPREAD_NONE)
     75       return;
     76    factor = SPREAD_FACTOR[spread-1];
     77 
     78    gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
     79    theta = HALF16(MULT16_16_Q15(gain,gain));
     80 
     81    c = celt_cos_norm(EXTEND32(theta));
     82    s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
     83 
     84    if (len>=8*stride)
     85    {
     86       stride2 = 1;
     87       /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
     88          It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
     89       while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
     90          stride2++;
     91    }
     92    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
     93       extract_collapse_mask().*/
     94    len /= stride;
     95    for (i=0;i<stride;i++)
     96    {
     97       if (dir < 0)
     98       {
     99          if (stride2)
    100             exp_rotation1(X+i*len, len, stride2, s, c);
    101          exp_rotation1(X+i*len, len, 1, c, s);
    102       } else {
    103          exp_rotation1(X+i*len, len, 1, c, -s);
    104          if (stride2)
    105             exp_rotation1(X+i*len, len, stride2, s, -c);
    106       }
    107    }
    108 }
    109 
    110 /** Takes the pitch vector and the decoded residual vector, computes the gain
    111     that will give ||p+g*y||=1 and mixes the residual with the pitch. */
    112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
    113       int N, opus_val32 Ryy, opus_val16 gain)
    114 {
    115    int i;
    116 #ifdef FIXED_POINT
    117    int k;
    118 #endif
    119    opus_val32 t;
    120    opus_val16 g;
    121 
    122 #ifdef FIXED_POINT
    123    k = celt_ilog2(Ryy)>>1;
    124 #endif
    125    t = VSHR32(Ryy, 2*(k-7));
    126    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
    127 
    128    i=0;
    129    do
    130       X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
    131    while (++i < N);
    132 }
    133 
    134 static unsigned extract_collapse_mask(int *iy, int N, int B)
    135 {
    136    unsigned collapse_mask;
    137    int N0;
    138    int i;
    139    if (B<=1)
    140       return 1;
    141    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
    142       exp_rotation().*/
    143    N0 = N/B;
    144    collapse_mask = 0;
    145    i=0; do {
    146       int j;
    147       j=0; do {
    148          collapse_mask |= (iy[i*N0+j]!=0)<<i;
    149       } while (++j<N0);
    150    } while (++i<B);
    151    return collapse_mask;
    152 }
    153 
    154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
    155 #ifdef RESYNTH
    156    , opus_val16 gain
    157 #endif
    158    )
    159 {
    160    VARDECL(celt_norm, y);
    161    VARDECL(int, iy);
    162    VARDECL(opus_val16, signx);
    163    int i, j;
    164    opus_val16 s;
    165    int pulsesLeft;
    166    opus_val32 sum;
    167    opus_val32 xy;
    168    opus_val16 yy;
    169    unsigned collapse_mask;
    170    SAVE_STACK;
    171 
    172    celt_assert2(K>0, "alg_quant() needs at least one pulse");
    173    celt_assert2(N>1, "alg_quant() needs at least two dimensions");
    174 
    175    ALLOC(y, N, celt_norm);
    176    ALLOC(iy, N, int);
    177    ALLOC(signx, N, opus_val16);
    178 
    179    exp_rotation(X, N, 1, B, K, spread);
    180 
    181    /* Get rid of the sign */
    182    sum = 0;
    183    j=0; do {
    184       if (X[j]>0)
    185          signx[j]=1;
    186       else {
    187          signx[j]=-1;
    188          X[j]=-X[j];
    189       }
    190       iy[j] = 0;
    191       y[j] = 0;
    192    } while (++j<N);
    193 
    194    xy = yy = 0;
    195 
    196    pulsesLeft = K;
    197 
    198    /* Do a pre-search by projecting on the pyramid */
    199    if (K > (N>>1))
    200    {
    201       opus_val16 rcp;
    202       j=0; do {
    203          sum += X[j];
    204       }  while (++j<N);
    205 
    206       /* If X is too small, just replace it with a pulse at 0 */
    207 #ifdef FIXED_POINT
    208       if (sum <= K)
    209 #else
    210       /* Prevents infinities and NaNs from causing too many pulses
    211          to be allocated. 64 is an approximation of infinity here. */
    212       if (!(sum > EPSILON && sum < 64))
    213 #endif
    214       {
    215          X[0] = QCONST16(1.f,14);
    216          j=1; do
    217             X[j]=0;
    218          while (++j<N);
    219          sum = QCONST16(1.f,14);
    220       }
    221       rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
    222       j=0; do {
    223 #ifdef FIXED_POINT
    224          /* It's really important to round *towards zero* here */
    225          iy[j] = MULT16_16_Q15(X[j],rcp);
    226 #else
    227          iy[j] = (int)floor(rcp*X[j]);
    228 #endif
    229          y[j] = (celt_norm)iy[j];
    230          yy = MAC16_16(yy, y[j],y[j]);
    231          xy = MAC16_16(xy, X[j],y[j]);
    232          y[j] *= 2;
    233          pulsesLeft -= iy[j];
    234       }  while (++j<N);
    235    }
    236    celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
    237 
    238    /* This should never happen, but just in case it does (e.g. on silence)
    239       we fill the first bin with pulses. */
    240 #ifdef FIXED_POINT_DEBUG
    241    celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
    242 #endif
    243    if (pulsesLeft > N+3)
    244    {
    245       opus_val16 tmp = (opus_val16)pulsesLeft;
    246       yy = MAC16_16(yy, tmp, tmp);
    247       yy = MAC16_16(yy, tmp, y[0]);
    248       iy[0] += pulsesLeft;
    249       pulsesLeft=0;
    250    }
    251 
    252    s = 1;
    253    for (i=0;i<pulsesLeft;i++)
    254    {
    255       int best_id;
    256       opus_val32 best_num = -VERY_LARGE16;
    257       opus_val16 best_den = 0;
    258 #ifdef FIXED_POINT
    259       int rshift;
    260 #endif
    261 #ifdef FIXED_POINT
    262       rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
    263 #endif
    264       best_id = 0;
    265       /* The squared magnitude term gets added anyway, so we might as well
    266          add it outside the loop */
    267       yy = ADD32(yy, 1);
    268       j=0;
    269       do {
    270          opus_val16 Rxy, Ryy;
    271          /* Temporary sums of the new pulse(s) */
    272          Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
    273          /* We're multiplying y[j] by two so we don't have to do it here */
    274          Ryy = ADD16(yy, y[j]);
    275 
    276          /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
    277             Rxy is positive because the sign is pre-computed) */
    278          Rxy = MULT16_16_Q15(Rxy,Rxy);
    279          /* The idea is to check for num/den >= best_num/best_den, but that way
    280             we can do it without any division */
    281          /* OPT: Make sure to use conditional moves here */
    282          if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
    283          {
    284             best_den = Ryy;
    285             best_num = Rxy;
    286             best_id = j;
    287          }
    288       } while (++j<N);
    289 
    290       /* Updating the sums of the new pulse(s) */
    291       xy = ADD32(xy, EXTEND32(X[best_id]));
    292       /* We're multiplying y[j] by two so we don't have to do it here */
    293       yy = ADD16(yy, y[best_id]);
    294 
    295       /* Only now that we've made the final choice, update y/iy */
    296       /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
    297       y[best_id] += 2*s;
    298       iy[best_id]++;
    299    }
    300 
    301    /* Put the original sign back */
    302    j=0;
    303    do {
    304       X[j] = MULT16_16(signx[j],X[j]);
    305       if (signx[j] < 0)
    306          iy[j] = -iy[j];
    307    } while (++j<N);
    308    encode_pulses(iy, N, K, enc);
    309 
    310 #ifdef RESYNTH
    311    normalise_residual(iy, X, N, yy, gain);
    312    exp_rotation(X, N, -1, B, K, spread);
    313 #endif
    314 
    315    collapse_mask = extract_collapse_mask(iy, N, B);
    316    RESTORE_STACK;
    317    return collapse_mask;
    318 }
    319 
    320 /** Decode pulse vector and combine the result with the pitch vector to produce
    321     the final normalised signal in the current band. */
    322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
    323       ec_dec *dec, opus_val16 gain)
    324 {
    325    int i;
    326    opus_val32 Ryy;
    327    unsigned collapse_mask;
    328    VARDECL(int, iy);
    329    SAVE_STACK;
    330 
    331    celt_assert2(K>0, "alg_unquant() needs at least one pulse");
    332    celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
    333    ALLOC(iy, N, int);
    334    decode_pulses(iy, N, K, dec);
    335    Ryy = 0;
    336    i=0;
    337    do {
    338       Ryy = MAC16_16(Ryy, iy[i], iy[i]);
    339    } while (++i < N);
    340    normalise_residual(iy, X, N, Ryy, gain);
    341    exp_rotation(X, N, -1, B, K, spread);
    342    collapse_mask = extract_collapse_mask(iy, N, B);
    343    RESTORE_STACK;
    344    return collapse_mask;
    345 }
    346 
    347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
    348 {
    349    int i;
    350 #ifdef FIXED_POINT
    351    int k;
    352 #endif
    353    opus_val32 E = EPSILON;
    354    opus_val16 g;
    355    opus_val32 t;
    356    celt_norm *xptr = X;
    357    for (i=0;i<N;i++)
    358    {
    359       E = MAC16_16(E, *xptr, *xptr);
    360       xptr++;
    361    }
    362 #ifdef FIXED_POINT
    363    k = celt_ilog2(E)>>1;
    364 #endif
    365    t = VSHR32(E, 2*(k-7));
    366    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
    367 
    368    xptr = X;
    369    for (i=0;i<N;i++)
    370    {
    371       *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
    372       xptr++;
    373    }
    374    /*return celt_sqrt(E);*/
    375 }
    376 
    377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
    378 {
    379    int i;
    380    int itheta;
    381    opus_val16 mid, side;
    382    opus_val32 Emid, Eside;
    383 
    384    Emid = Eside = EPSILON;
    385    if (stereo)
    386    {
    387       for (i=0;i<N;i++)
    388       {
    389          celt_norm m, s;
    390          m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
    391          s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
    392          Emid = MAC16_16(Emid, m, m);
    393          Eside = MAC16_16(Eside, s, s);
    394       }
    395    } else {
    396       for (i=0;i<N;i++)
    397       {
    398          celt_norm m, s;
    399          m = X[i];
    400          s = Y[i];
    401          Emid = MAC16_16(Emid, m, m);
    402          Eside = MAC16_16(Eside, s, s);
    403       }
    404    }
    405    mid = celt_sqrt(Emid);
    406    side = celt_sqrt(Eside);
    407 #ifdef FIXED_POINT
    408    /* 0.63662 = 2/pi */
    409    itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
    410 #else
    411    itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
    412 #endif
    413 
    414    return itheta;
    415 }
    416