Home | History | Annotate | Download | only in celt
      1 /* Copyright (c) 2001-2011 Timothy B. Terriberry
      2 */
      3 /*
      4    Redistribution and use in source and binary forms, with or without
      5    modification, are permitted provided that the following conditions
      6    are met:
      7 
      8    - Redistributions of source code must retain the above copyright
      9    notice, this list of conditions and the following disclaimer.
     10 
     11    - Redistributions in binary form must reproduce the above copyright
     12    notice, this list of conditions and the following disclaimer in the
     13    documentation and/or other materials provided with the distribution.
     14 
     15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
     19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 #include "entcode.h"
     33 #include "arch.h"
     34 
     35 #if !defined(EC_CLZ)
     36 /*This is a fallback for systems where we don't know how to access
     37    a BSR or CLZ instruction (see ecintrin.h).
     38   If you are optimizing Opus on a new platform and it has a native CLZ or
     39    BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
     40    an easy performance win.*/
     41 int ec_ilog(opus_uint32 _v){
     42   /*On a Pentium M, this branchless version tested as the fastest on
     43      1,000,000,000 random 32-bit integers, edging out a similar version with
     44      branches, and a 256-entry LUT version.*/
     45   int ret;
     46   int m;
     47   ret=!!_v;
     48   m=!!(_v&0xFFFF0000)<<4;
     49   _v>>=m;
     50   ret|=m;
     51   m=!!(_v&0xFF00)<<3;
     52   _v>>=m;
     53   ret|=m;
     54   m=!!(_v&0xF0)<<2;
     55   _v>>=m;
     56   ret|=m;
     57   m=!!(_v&0xC)<<1;
     58   _v>>=m;
     59   ret|=m;
     60   ret+=!!(_v&0x2);
     61   return ret;
     62 }
     63 #endif
     64 
     65 #if 1
     66 /* This is a faster version of ec_tell_frac() that takes advantage
     67    of the low (1/8 bit) resolution to use just a linear function
     68    followed by a lookup to determine the exact transition thresholds. */
     69 opus_uint32 ec_tell_frac(ec_ctx *_this){
     70   static const unsigned correction[8] =
     71     {35733, 38967, 42495, 46340,
     72      50535, 55109, 60097, 65535};
     73   opus_uint32 nbits;
     74   opus_uint32 r;
     75   int         l;
     76   unsigned    b;
     77   nbits=_this->nbits_total<<BITRES;
     78   l=EC_ILOG(_this->rng);
     79   r=_this->rng>>(l-16);
     80   b = (r>>12)-8;
     81   b += r>correction[b];
     82   l = (l<<3)+b;
     83   return nbits-l;
     84 }
     85 #else
     86 opus_uint32 ec_tell_frac(ec_ctx *_this){
     87   opus_uint32 nbits;
     88   opus_uint32 r;
     89   int         l;
     90   int         i;
     91   /*To handle the non-integral number of bits still left in the encoder/decoder
     92      state, we compute the worst-case number of bits of val that must be
     93      encoded to ensure that the value is inside the range for any possible
     94      subsequent bits.
     95     The computation here is independent of val itself (the decoder does not
     96      even track that value), even though the real number of bits used after
     97      ec_enc_done() may be 1 smaller if rng is a power of two and the
     98      corresponding trailing bits of val are all zeros.
     99     If we did try to track that special case, then coding a value with a
    100      probability of 1/(1<<n) might sometimes appear to use more than n bits.
    101     This may help explain the surprising result that a newly initialized
    102      encoder or decoder claims to have used 1 bit.*/
    103   nbits=_this->nbits_total<<BITRES;
    104   l=EC_ILOG(_this->rng);
    105   r=_this->rng>>(l-16);
    106   for(i=BITRES;i-->0;){
    107     int b;
    108     r=r*r>>15;
    109     b=(int)(r>>16);
    110     l=l<<1|b;
    111     r>>=b;
    112   }
    113   return nbits-l;
    114 }
    115 #endif
    116 
    117 #ifdef USE_SMALL_DIV_TABLE
    118 /* Result of 2^32/(2*i+1), except for i=0. */
    119 const opus_uint32 SMALL_DIV_TABLE[129] = {
    120    0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
    121    0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
    122    0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
    123    0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
    124    0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
    125    0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
    126    0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
    127    0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
    128    0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
    129    0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
    130    0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
    131    0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
    132    0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
    133    0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
    134    0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
    135    0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
    136    0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
    137    0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
    138    0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
    139    0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
    140    0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
    141    0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
    142    0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
    143    0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
    144    0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
    145    0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
    146    0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
    147    0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
    148    0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
    149    0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
    150    0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
    151    0x01073260, 0x0105197F, 0x0103091B, 0x01010101
    152 };
    153 #endif
    154