Home | History | Annotate | Download | only in srce
      1 /******************************************************************************
      2  *
      3  *  Copyright (C) 2014 The Android Open Source Project
      4  *  Copyright 2003 - 2004 Open Interface North America, Inc. All rights
      5  *                        reserved.
      6  *
      7  *  Licensed under the Apache License, Version 2.0 (the "License");
      8  *  you may not use this file except in compliance with the License.
      9  *  You may obtain a copy of the License at:
     10  *
     11  *  http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  *  Unless required by applicable law or agreed to in writing, software
     14  *  distributed under the License is distributed on an "AS IS" BASIS,
     15  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  *  See the License for the specific language governing permissions and
     17  *  limitations under the License.
     18  *
     19  ******************************************************************************/
     20 
     21 /*******************************************************************************
     22   $Revision: #1 $
     23  ******************************************************************************/
     24 
     25 /** @file
     26 @ingroup codec_internal
     27 */
     28 
     29 /**@addgroup codec_internal*/
     30 /**@{*/
     31 
     32 /*
     33  * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
     34  * factorization. The scaling factors are folded into the windowing
     35  * constants. 29 adds and 5 16x32 multiplies per 8 samples.
     36  */
     37 
     38 #include "oi_codec_sbc_private.h"
     39 
     40 #define AAN_C4_FIX (759250125) /* S1.30  759250125   0.707107*/
     41 
     42 #define AAN_C6_FIX (410903207) /* S1.30  410903207   0.382683*/
     43 
     44 #define AAN_Q0_FIX (581104888) /* S1.30  581104888   0.541196*/
     45 
     46 #define AAN_Q1_FIX (1402911301) /* S1.30 1402911301   1.306563*/
     47 
     48 /** Scales x by y bits to the right, adding a rounding factor.
     49  */
     50 #ifndef SCALE
     51 #define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y))
     52 #endif
     53 
     54 /**
     55  * Default C language implementation of a 32x32->32 multiply. This function may
     56  * be replaced by a platform-specific version for speed.
     57  *
     58  * @param u A signed 32-bit multiplicand
     59  * @param v A signed 32-bit multiplier
     60 
     61  * @return  A signed 32-bit value corresponding to the 32 most significant bits
     62  * of the 64-bit product of u and v.
     63  */
     64 INLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) {
     65   uint32_t u0, v0;
     66   int32_t u1, v1, w1, w2, t;
     67 
     68   u0 = u & 0xFFFF;
     69   u1 = u >> 16;
     70   v0 = v & 0xFFFF;
     71   v1 = v >> 16;
     72   t = u0 * v0;
     73   t = u1 * v0 + ((uint32_t)t >> 16);
     74   w1 = t & 0xFFFF;
     75   w2 = t >> 16;
     76   w1 = u0 * v1 + w1;
     77   return u1 * v1 + w2 + (w1 >> 16);
     78 }
     79 
     80 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
     81 
     82 #ifdef DEBUG_DCT
     83 PRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) {
     84 #define FIX(x, bits) \
     85   (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits)))
     86 #define FLOAT_BUTTERFLY(x, y) \
     87   x += y;                     \
     88   y = x - (y * 2);            \
     89   OI_ASSERT(VALID_INT32(x));  \
     90   OI_ASSERT(VALID_INT32(y));
     91 #define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample)
     92 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
     93 
     94   double L00, L01, L02, L03, L04, L05, L06, L07;
     95   double L25;
     96 
     97   double in0, in1, in2, in3;
     98   double in4, in5, in6, in7;
     99 
    100   in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN);
    101   OI_ASSERT(VALID_INT32(in0));
    102   in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN);
    103   OI_ASSERT(VALID_INT32(in1));
    104   in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN);
    105   OI_ASSERT(VALID_INT32(in2));
    106   in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN);
    107   OI_ASSERT(VALID_INT32(in3));
    108   in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN);
    109   OI_ASSERT(VALID_INT32(in4));
    110   in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN);
    111   OI_ASSERT(VALID_INT32(in5));
    112   in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN);
    113   OI_ASSERT(VALID_INT32(in6));
    114   in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN);
    115   OI_ASSERT(VALID_INT32(in7));
    116 
    117   L00 = (in0 + in7);
    118   OI_ASSERT(VALID_INT32(L00));
    119   L01 = (in1 + in6);
    120   OI_ASSERT(VALID_INT32(L01));
    121   L02 = (in2 + in5);
    122   OI_ASSERT(VALID_INT32(L02));
    123   L03 = (in3 + in4);
    124   OI_ASSERT(VALID_INT32(L03));
    125 
    126   L04 = (in3 - in4);
    127   OI_ASSERT(VALID_INT32(L04));
    128   L05 = (in2 - in5);
    129   OI_ASSERT(VALID_INT32(L05));
    130   L06 = (in1 - in6);
    131   OI_ASSERT(VALID_INT32(L06));
    132   L07 = (in0 - in7);
    133   OI_ASSERT(VALID_INT32(L07));
    134 
    135   FLOAT_BUTTERFLY(L00, L03);
    136   FLOAT_BUTTERFLY(L01, L02);
    137 
    138   L02 += L03;
    139   OI_ASSERT(VALID_INT32(L02));
    140 
    141   L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02);
    142   OI_ASSERT(VALID_INT32(L02));
    143 
    144   FLOAT_BUTTERFLY(L00, L01);
    145 
    146   out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0);
    147   OI_ASSERT(VALID_INT16(out[0]));
    148   out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4);
    149   OI_ASSERT(VALID_INT16(out[4]));
    150 
    151   FLOAT_BUTTERFLY(L03, L02);
    152   out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6);
    153   OI_ASSERT(VALID_INT16(out[6]));
    154   out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2);
    155   OI_ASSERT(VALID_INT16(out[2]));
    156 
    157   L04 += L05;
    158   OI_ASSERT(VALID_INT32(L04));
    159   L05 += L06;
    160   OI_ASSERT(VALID_INT32(L05));
    161   L06 += L07;
    162   OI_ASSERT(VALID_INT32(L06));
    163 
    164   L04 /= 2;
    165   L05 /= 2;
    166   L06 /= 2;
    167   L07 /= 2;
    168 
    169   L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05);
    170   OI_ASSERT(VALID_INT32(L05));
    171 
    172   L25 = L06 - L04;
    173   OI_ASSERT(VALID_INT32(L25));
    174   L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25);
    175   OI_ASSERT(VALID_INT32(L25));
    176 
    177   L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04);
    178   OI_ASSERT(VALID_INT32(L04));
    179   L04 -= L25;
    180   OI_ASSERT(VALID_INT32(L04));
    181 
    182   L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06);
    183   OI_ASSERT(VALID_INT32(L06));
    184   L06 -= L25;
    185   OI_ASSERT(VALID_INT32(L25));
    186 
    187   FLOAT_BUTTERFLY(L07, L05);
    188 
    189   FLOAT_BUTTERFLY(L05, L04);
    190   out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1));
    191   OI_ASSERT(VALID_INT16(out[3]));
    192   out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1));
    193   OI_ASSERT(VALID_INT16(out[5]));
    194 
    195   FLOAT_BUTTERFLY(L07, L06);
    196   out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1));
    197   OI_ASSERT(VALID_INT16(out[7]));
    198   out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1));
    199   OI_ASSERT(VALID_INT16(out[1]));
    200 }
    201 #undef BUTTERFLY
    202 #endif
    203 
    204 /*
    205  * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
    206  * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
    207  * (1244918057 integer). The function it computes is an approximation to the
    208  * array defined by:
    209  *
    210  * diag(aan_s) * AAN= C2
    211  *
    212  *   or
    213  *
    214  * AAN = diag(1/aan_s) * C2
    215  *
    216  * where C2 is as it is defined in the comment at the head of this file, and
    217  *
    218  * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
    219  *
    220  * aan_s[i] = [ 1.000  0.510  0.541  0.601  0.707  0.900  1.307  2.563 ]
    221  *
    222  * The output ranges are shown as follows:
    223  *
    224  * Let Y[0..7] = AAN * X[0..7]
    225  *
    226  * Without loss of generality, assume the input vector X consists of elements
    227  * between -1 and 1. The maximum possible value of a given output element occurs
    228  * with some particular combination of input vector elements each of which is -1
    229  * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y
    230  * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
    231  * positive contribution to the sum. Equivalently, one may simply sum
    232  * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
    233  *
    234  * This yields approximately:
    235  *  [8.00  10.05   9.66   8.52   8.00   5.70   4.00   2.00]
    236  *
    237  * Given the maximum magnitude sensible input value of +/-37992, this yields the
    238  * following vector of maximum output magnitudes:
    239  *
    240  * [ 303936  381820  367003  323692  303936  216555  151968   75984 ]
    241  *
    242  * Ultimately, these values must fit into 16 bit signed integers, so they must
    243  * be scaled. A non-uniform scaling helps maximize the kept precision. The
    244  * relative number of extra bits of precision maintainable with respect to the
    245  * largest value is given here:
    246  *
    247  * [ 0  0  0  0  0  0  1  2 ]
    248  *
    249  */
    250 PRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) {
    251 #define BUTTERFLY(x, y) \
    252   x += (y);             \
    253   (y) = (x) - ((y) << 1);
    254 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2)
    255 
    256   int32_t L00, L01, L02, L03, L04, L05, L06, L07;
    257   int32_t L25;
    258 
    259   int32_t in0, in1, in2, in3;
    260   int32_t in4, in5, in6, in7;
    261 
    262 #if DCTII_8_SHIFT_IN != 0
    263   in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
    264   in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
    265   in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
    266   in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
    267   in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
    268   in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
    269   in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
    270   in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
    271 #else
    272   in0 = in[0];
    273   in1 = in[1];
    274   in2 = in[2];
    275   in3 = in[3];
    276   in4 = in[4];
    277   in5 = in[5];
    278   in6 = in[6];
    279   in7 = in[7];
    280 #endif
    281 
    282   L00 = in0 + in7;
    283   L01 = in1 + in6;
    284   L02 = in2 + in5;
    285   L03 = in3 + in4;
    286 
    287   L04 = in3 - in4;
    288   L05 = in2 - in5;
    289   L06 = in1 - in6;
    290   L07 = in0 - in7;
    291 
    292   BUTTERFLY(L00, L03);
    293   BUTTERFLY(L01, L02);
    294 
    295   L02 += L03;
    296 
    297   L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
    298 
    299   BUTTERFLY(L00, L01);
    300 
    301   out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0);
    302   out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4);
    303 
    304   BUTTERFLY(L03, L02);
    305   out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6);
    306   out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2);
    307 
    308   L04 += L05;
    309   L05 += L06;
    310   L06 += L07;
    311 
    312   L04 /= 2;
    313   L05 /= 2;
    314   L06 /= 2;
    315   L07 /= 2;
    316 
    317   L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
    318 
    319   L25 = L06 - L04;
    320   L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
    321 
    322   L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
    323   L04 -= L25;
    324 
    325   L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
    326   L06 -= L25;
    327 
    328   BUTTERFLY(L07, L05);
    329 
    330   BUTTERFLY(L05, L04);
    331   out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1);
    332   out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1);
    333 
    334   BUTTERFLY(L07, L06);
    335   out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1);
    336   out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1);
    337 #undef BUTTERFLY
    338 
    339 #ifdef DEBUG_DCT
    340   {
    341     float float_out[8];
    342     float_dct2_8(float_out, in);
    343   }
    344 #endif
    345 }
    346 
    347 /**@}*/
    348