Home | History | Annotate | Download | only in celt
      1 /* Copyright (c) 2007-2008 CSIRO
      2    Copyright (c) 2007-2009 Xiph.Org Foundation
      3    Written by Jean-Marc Valin */
      4 /*
      5    Redistribution and use in source and binary forms, with or without
      6    modification, are permitted provided that the following conditions
      7    are met:
      8 
      9    - Redistributions of source code must retain the above copyright
     10    notice, this list of conditions and the following disclaimer.
     11 
     12    - Redistributions in binary form must reproduce the above copyright
     13    notice, this list of conditions and the following disclaimer in the
     14    documentation and/or other materials provided with the distribution.
     15 
     16    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     20    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     21    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     22    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     23    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     24    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     25    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     26    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 */
     28 
     29 #ifdef HAVE_CONFIG_H
     30 #include "config.h"
     31 #endif
     32 
     33 #include "mathops.h"
     34 #include "cwrs.h"
     35 #include "vq.h"
     36 #include "arch.h"
     37 #include "os_support.h"
     38 #include "bands.h"
     39 #include "rate.h"
     40 #include "pitch.h"
     41 
     42 #ifndef OVERRIDE_vq_exp_rotation1
     43 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
     44 {
     45    int i;
     46    opus_val16 ms;
     47    celt_norm *Xptr;
     48    Xptr = X;
     49    ms = NEG16(s);
     50    for (i=0;i<len-stride;i++)
     51    {
     52       celt_norm x1, x2;
     53       x1 = Xptr[0];
     54       x2 = Xptr[stride];
     55       Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
     56       *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
     57    }
     58    Xptr = &X[len-2*stride-1];
     59    for (i=len-2*stride-1;i>=0;i--)
     60    {
     61       celt_norm x1, x2;
     62       x1 = Xptr[0];
     63       x2 = Xptr[stride];
     64       Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
     65       *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
     66    }
     67 }
     68 #endif /* OVERRIDE_vq_exp_rotation1 */
     69 
     70 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
     71 {
     72    static const int SPREAD_FACTOR[3]={15,10,5};
     73    int i;
     74    opus_val16 c, s;
     75    opus_val16 gain, theta;
     76    int stride2=0;
     77    int factor;
     78 
     79    if (2*K>=len || spread==SPREAD_NONE)
     80       return;
     81    factor = SPREAD_FACTOR[spread-1];
     82 
     83    gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
     84    theta = HALF16(MULT16_16_Q15(gain,gain));
     85 
     86    c = celt_cos_norm(EXTEND32(theta));
     87    s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
     88 
     89    if (len>=8*stride)
     90    {
     91       stride2 = 1;
     92       /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
     93          It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
     94       while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
     95          stride2++;
     96    }
     97    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
     98       extract_collapse_mask().*/
     99    len = celt_udiv(len, stride);
    100    for (i=0;i<stride;i++)
    101    {
    102       if (dir < 0)
    103       {
    104          if (stride2)
    105             exp_rotation1(X+i*len, len, stride2, s, c);
    106          exp_rotation1(X+i*len, len, 1, c, s);
    107       } else {
    108          exp_rotation1(X+i*len, len, 1, c, -s);
    109          if (stride2)
    110             exp_rotation1(X+i*len, len, stride2, s, -c);
    111       }
    112    }
    113 }
    114 
    115 /** Takes the pitch vector and the decoded residual vector, computes the gain
    116     that will give ||p+g*y||=1 and mixes the residual with the pitch. */
    117 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
    118       int N, opus_val32 Ryy, opus_val16 gain)
    119 {
    120    int i;
    121 #ifdef FIXED_POINT
    122    int k;
    123 #endif
    124    opus_val32 t;
    125    opus_val16 g;
    126 
    127 #ifdef FIXED_POINT
    128    k = celt_ilog2(Ryy)>>1;
    129 #endif
    130    t = VSHR32(Ryy, 2*(k-7));
    131    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
    132 
    133    i=0;
    134    do
    135       X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
    136    while (++i < N);
    137 }
    138 
    139 static unsigned extract_collapse_mask(int *iy, int N, int B)
    140 {
    141    unsigned collapse_mask;
    142    int N0;
    143    int i;
    144    if (B<=1)
    145       return 1;
    146    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
    147       exp_rotation().*/
    148    N0 = celt_udiv(N, B);
    149    collapse_mask = 0;
    150    i=0; do {
    151       int j;
    152       unsigned tmp=0;
    153       j=0; do {
    154          tmp |= iy[i*N0+j];
    155       } while (++j<N0);
    156       collapse_mask |= (tmp!=0)<<i;
    157    } while (++i<B);
    158    return collapse_mask;
    159 }
    160 
    161 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
    162 #ifdef RESYNTH
    163    , opus_val16 gain
    164 #endif
    165    )
    166 {
    167    VARDECL(celt_norm, y);
    168    VARDECL(int, iy);
    169    VARDECL(opus_val16, signx);
    170    int i, j;
    171    opus_val16 s;
    172    int pulsesLeft;
    173    opus_val32 sum;
    174    opus_val32 xy;
    175    opus_val16 yy;
    176    unsigned collapse_mask;
    177    SAVE_STACK;
    178 
    179    celt_assert2(K>0, "alg_quant() needs at least one pulse");
    180    celt_assert2(N>1, "alg_quant() needs at least two dimensions");
    181 
    182    ALLOC(y, N, celt_norm);
    183    ALLOC(iy, N, int);
    184    ALLOC(signx, N, opus_val16);
    185 
    186    exp_rotation(X, N, 1, B, K, spread);
    187 
    188    /* Get rid of the sign */
    189    sum = 0;
    190    j=0; do {
    191       if (X[j]>0)
    192          signx[j]=1;
    193       else {
    194          signx[j]=-1;
    195          X[j]=-X[j];
    196       }
    197       iy[j] = 0;
    198       y[j] = 0;
    199    } while (++j<N);
    200 
    201    xy = yy = 0;
    202 
    203    pulsesLeft = K;
    204 
    205    /* Do a pre-search by projecting on the pyramid */
    206    if (K > (N>>1))
    207    {
    208       opus_val16 rcp;
    209       j=0; do {
    210          sum += X[j];
    211       }  while (++j<N);
    212 
    213       /* If X is too small, just replace it with a pulse at 0 */
    214 #ifdef FIXED_POINT
    215       if (sum <= K)
    216 #else
    217       /* Prevents infinities and NaNs from causing too many pulses
    218          to be allocated. 64 is an approximation of infinity here. */
    219       if (!(sum > EPSILON && sum < 64))
    220 #endif
    221       {
    222          X[0] = QCONST16(1.f,14);
    223          j=1; do
    224             X[j]=0;
    225          while (++j<N);
    226          sum = QCONST16(1.f,14);
    227       }
    228       rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
    229       j=0; do {
    230 #ifdef FIXED_POINT
    231          /* It's really important to round *towards zero* here */
    232          iy[j] = MULT16_16_Q15(X[j],rcp);
    233 #else
    234          iy[j] = (int)floor(rcp*X[j]);
    235 #endif
    236          y[j] = (celt_norm)iy[j];
    237          yy = MAC16_16(yy, y[j],y[j]);
    238          xy = MAC16_16(xy, X[j],y[j]);
    239          y[j] *= 2;
    240          pulsesLeft -= iy[j];
    241       }  while (++j<N);
    242    }
    243    celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
    244 
    245    /* This should never happen, but just in case it does (e.g. on silence)
    246       we fill the first bin with pulses. */
    247 #ifdef FIXED_POINT_DEBUG
    248    celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
    249 #endif
    250    if (pulsesLeft > N+3)
    251    {
    252       opus_val16 tmp = (opus_val16)pulsesLeft;
    253       yy = MAC16_16(yy, tmp, tmp);
    254       yy = MAC16_16(yy, tmp, y[0]);
    255       iy[0] += pulsesLeft;
    256       pulsesLeft=0;
    257    }
    258 
    259    s = 1;
    260    for (i=0;i<pulsesLeft;i++)
    261    {
    262       int best_id;
    263       opus_val32 best_num = -VERY_LARGE16;
    264       opus_val16 best_den = 0;
    265 #ifdef FIXED_POINT
    266       int rshift;
    267 #endif
    268 #ifdef FIXED_POINT
    269       rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
    270 #endif
    271       best_id = 0;
    272       /* The squared magnitude term gets added anyway, so we might as well
    273          add it outside the loop */
    274       yy = ADD16(yy, 1);
    275       j=0;
    276       do {
    277          opus_val16 Rxy, Ryy;
    278          /* Temporary sums of the new pulse(s) */
    279          Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
    280          /* We're multiplying y[j] by two so we don't have to do it here */
    281          Ryy = ADD16(yy, y[j]);
    282 
    283          /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
    284             Rxy is positive because the sign is pre-computed) */
    285          Rxy = MULT16_16_Q15(Rxy,Rxy);
    286          /* The idea is to check for num/den >= best_num/best_den, but that way
    287             we can do it without any division */
    288          /* OPT: Make sure to use conditional moves here */
    289          if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
    290          {
    291             best_den = Ryy;
    292             best_num = Rxy;
    293             best_id = j;
    294          }
    295       } while (++j<N);
    296 
    297       /* Updating the sums of the new pulse(s) */
    298       xy = ADD32(xy, EXTEND32(X[best_id]));
    299       /* We're multiplying y[j] by two so we don't have to do it here */
    300       yy = ADD16(yy, y[best_id]);
    301 
    302       /* Only now that we've made the final choice, update y/iy */
    303       /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
    304       y[best_id] += 2*s;
    305       iy[best_id]++;
    306    }
    307 
    308    /* Put the original sign back */
    309    j=0;
    310    do {
    311       X[j] = MULT16_16(signx[j],X[j]);
    312       if (signx[j] < 0)
    313          iy[j] = -iy[j];
    314    } while (++j<N);
    315    encode_pulses(iy, N, K, enc);
    316 
    317 #ifdef RESYNTH
    318    normalise_residual(iy, X, N, yy, gain);
    319    exp_rotation(X, N, -1, B, K, spread);
    320 #endif
    321 
    322    collapse_mask = extract_collapse_mask(iy, N, B);
    323    RESTORE_STACK;
    324    return collapse_mask;
    325 }
    326 
    327 /** Decode pulse vector and combine the result with the pitch vector to produce
    328     the final normalised signal in the current band. */
    329 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
    330       ec_dec *dec, opus_val16 gain)
    331 {
    332    opus_val32 Ryy;
    333    unsigned collapse_mask;
    334    VARDECL(int, iy);
    335    SAVE_STACK;
    336 
    337    celt_assert2(K>0, "alg_unquant() needs at least one pulse");
    338    celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
    339    ALLOC(iy, N, int);
    340    Ryy = decode_pulses(iy, N, K, dec);
    341    normalise_residual(iy, X, N, Ryy, gain);
    342    exp_rotation(X, N, -1, B, K, spread);
    343    collapse_mask = extract_collapse_mask(iy, N, B);
    344    RESTORE_STACK;
    345    return collapse_mask;
    346 }
    347 
    348 #ifndef OVERRIDE_renormalise_vector
    349 void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
    350 {
    351    int i;
    352 #ifdef FIXED_POINT
    353    int k;
    354 #endif
    355    opus_val32 E;
    356    opus_val16 g;
    357    opus_val32 t;
    358    celt_norm *xptr;
    359    E = EPSILON + celt_inner_prod(X, X, N, arch);
    360 #ifdef FIXED_POINT
    361    k = celt_ilog2(E)>>1;
    362 #endif
    363    t = VSHR32(E, 2*(k-7));
    364    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
    365 
    366    xptr = X;
    367    for (i=0;i<N;i++)
    368    {
    369       *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
    370       xptr++;
    371    }
    372    /*return celt_sqrt(E);*/
    373 }
    374 #endif /* OVERRIDE_renormalise_vector */
    375 
    376 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
    377 {
    378    int i;
    379    int itheta;
    380    opus_val16 mid, side;
    381    opus_val32 Emid, Eside;
    382 
    383    Emid = Eside = EPSILON;
    384    if (stereo)
    385    {
    386       for (i=0;i<N;i++)
    387       {
    388          celt_norm m, s;
    389          m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
    390          s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
    391          Emid = MAC16_16(Emid, m, m);
    392          Eside = MAC16_16(Eside, s, s);
    393       }
    394    } else {
    395       Emid += celt_inner_prod(X, X, N, arch);
    396       Eside += celt_inner_prod(Y, Y, N, arch);
    397    }
    398    mid = celt_sqrt(Emid);
    399    side = celt_sqrt(Eside);
    400 #ifdef FIXED_POINT
    401    /* 0.63662 = 2/pi */
    402    itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
    403 #else
    404    itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
    405 #endif
    406 
    407    return itheta;
    408 }
    409