Home | History | Annotate | Download | only in srce
      1 /******************************************************************************
      2  *
      3  *  Copyright (C) 2014 The Android Open Source Project
      4  *  Copyright 2003 - 2004 Open Interface North America, Inc. All rights reserved.
      5  *
      6  *  Licensed under the Apache License, Version 2.0 (the "License");
      7  *  you may not use this file except in compliance with the License.
      8  *  You may obtain a copy of the License at:
      9  *
     10  *  http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  *  Unless required by applicable law or agreed to in writing, software
     13  *  distributed under the License is distributed on an "AS IS" BASIS,
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  *  See the License for the specific language governing permissions and
     16  *  limitations under the License.
     17  *
     18  ******************************************************************************/
     19 
     20 /**********************************************************************************
     21   $Revision: #1 $
     22 ***********************************************************************************/
     23 
     24 /** @file
     25 @ingroup codec_internal
     26 */
     27 
     28 /**@addgroup codec_internal*/
     29 /**@{*/
     30 
     31 /*
     32  * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
     33  * factorization. The scaling factors are folded into the windowing
     34  * constants. 29 adds and 5 16x32 multiplies per 8 samples.
     35  */
     36 
     37 #include "oi_codec_sbc_private.h"
     38 
     39 #define AAN_C4_FIX (759250125)/* S1.30  759250125   0.707107*/
     40 
     41 #define AAN_C6_FIX (410903207)/* S1.30  410903207   0.382683*/
     42 
     43 #define AAN_Q0_FIX (581104888)/* S1.30  581104888   0.541196*/
     44 
     45 #define AAN_Q1_FIX (1402911301)/* S1.30 1402911301   1.306563*/
     46 
     47 /** Scales x by y bits to the right, adding a rounding factor.
     48  */
     49 #ifndef SCALE
     50 #define SCALE(x, y) (((x) + (1 <<((y)-1))) >> (y))
     51 #endif
     52 
     53 /**
     54  * Default C language implementation of a 32x32->32 multiply. This function may
     55  * be replaced by a platform-specific version for speed.
     56  *
     57  * @param u A signed 32-bit multiplicand
     58  * @param v A signed 32-bit multiplier
     59 
     60  * @return  A signed 32-bit value corresponding to the 32 most significant bits
     61  * of the 64-bit product of u and v.
     62  */
     63 INLINE OI_INT32 default_mul_32s_32s_hi(OI_INT32 u, OI_INT32 v)
     64 {
     65     OI_UINT32 u0, v0;
     66     OI_INT32 u1, v1, w1, w2, t;
     67 
     68     u0 = u & 0xFFFF; u1 = u >> 16;
     69     v0 = v & 0xFFFF; v1 = v >> 16;
     70     t = u0*v0;
     71     t = u1*v0 + ((OI_UINT32)t >> 16);
     72     w1 = t & 0xFFFF;
     73     w2 = t >> 16;
     74     w1 = u0*v1 + w1;
     75     return u1*v1 + w2 + (w1 >> 16);
     76 }
     77 
     78 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
     79 
     80 
     81 #ifdef DEBUG_DCT
     82 PRIVATE void float_dct2_8(float * RESTRICT out, OI_INT32 const *RESTRICT in)
     83 {
     84 #define FIX(x,bits) (((int)floor(0.5f+((x)*((float)(1<<bits)))))/((float)(1<<bits)))
     85 #define FLOAT_BUTTERFLY(x,y) x += y; y = x - (y*2); OI_ASSERT(VALID_INT32(x)); OI_ASSERT(VALID_INT32(y));
     86 #define FLOAT_MULT_DCT(K, sample) (FIX(K,20) * sample)
     87 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
     88 
     89     double L00,L01,L02,L03,L04,L05,L06,L07;
     90     double L25;
     91 
     92     double in0,in1,in2,in3;
     93     double in4,in5,in6,in7;
     94 
     95     in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in0));
     96     in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in1));
     97     in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in2));
     98     in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in3));
     99     in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in4));
    100     in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in5));
    101     in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in6));
    102     in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in7));
    103 
    104     L00 = (in0 + in7); OI_ASSERT(VALID_INT32(L00));
    105     L01 = (in1 + in6); OI_ASSERT(VALID_INT32(L01));
    106     L02 = (in2 + in5); OI_ASSERT(VALID_INT32(L02));
    107     L03 = (in3 + in4); OI_ASSERT(VALID_INT32(L03));
    108 
    109     L04 = (in3 - in4); OI_ASSERT(VALID_INT32(L04));
    110     L05 = (in2 - in5); OI_ASSERT(VALID_INT32(L05));
    111     L06 = (in1 - in6); OI_ASSERT(VALID_INT32(L06));
    112     L07 = (in0 - in7); OI_ASSERT(VALID_INT32(L07));
    113 
    114     FLOAT_BUTTERFLY(L00, L03);
    115     FLOAT_BUTTERFLY(L01, L02);
    116 
    117     L02 += L03; OI_ASSERT(VALID_INT32(L02));
    118 
    119     L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); OI_ASSERT(VALID_INT32(L02));
    120 
    121     FLOAT_BUTTERFLY(L00, L01);
    122 
    123     out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); OI_ASSERT(VALID_INT16(out[0]));
    124     out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); OI_ASSERT(VALID_INT16(out[4]));
    125 
    126     FLOAT_BUTTERFLY(L03, L02);
    127     out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); OI_ASSERT(VALID_INT16(out[6]));
    128     out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); OI_ASSERT(VALID_INT16(out[2]));
    129 
    130     L04 += L05; OI_ASSERT(VALID_INT32(L04));
    131     L05 += L06; OI_ASSERT(VALID_INT32(L05));
    132     L06 += L07; OI_ASSERT(VALID_INT32(L06));
    133 
    134     L04/=2;
    135     L05/=2;
    136     L06/=2;
    137     L07/=2;
    138 
    139     L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); OI_ASSERT(VALID_INT32(L05));
    140 
    141     L25 = L06 - L04; OI_ASSERT(VALID_INT32(L25));
    142     L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); OI_ASSERT(VALID_INT32(L25));
    143 
    144     L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); OI_ASSERT(VALID_INT32(L04));
    145     L04 -= L25; OI_ASSERT(VALID_INT32(L04));
    146 
    147     L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); OI_ASSERT(VALID_INT32(L06));
    148     L06 -= L25; OI_ASSERT(VALID_INT32(L25));
    149 
    150     FLOAT_BUTTERFLY(L07, L05);
    151 
    152     FLOAT_BUTTERFLY(L05, L04);
    153     out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3-1)); OI_ASSERT(VALID_INT16(out[3]));
    154     out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5-1)); OI_ASSERT(VALID_INT16(out[5]));
    155 
    156     FLOAT_BUTTERFLY(L07, L06);
    157     out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7-1)); OI_ASSERT(VALID_INT16(out[7]));
    158     out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1-1)); OI_ASSERT(VALID_INT16(out[1]));
    159 }
    160 #undef BUTTERFLY
    161 #endif
    162 
    163 
    164 /*
    165  * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
    166  * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
    167  * (1244918057 integer). The function it computes is an approximation to the array defined
    168  * by:
    169  *
    170  * diag(aan_s) * AAN= C2
    171  *
    172  *   or
    173  *
    174  * AAN = diag(1/aan_s) * C2
    175  *
    176  * where C2 is as it is defined in the comment at the head of this file, and
    177  *
    178  * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
    179  *
    180  * aan_s[i] = [ 1.000  0.510  0.541  0.601  0.707  0.900  1.307  2.563 ]
    181  *
    182  * The output ranges are shown as follows:
    183  *
    184  * Let Y[0..7] = AAN * X[0..7]
    185  *
    186  * Without loss of generality, assume the input vector X consists of elements
    187  * between -1 and 1. The maximum possible value of a given output element occurs
    188  * with some particular combination of input vector elements each of which is -1
    189  * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y is
    190  * maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
    191  * positive contribution to the sum. Equivalently, one may simply sum
    192  * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
    193  *
    194  * This yields approximately [8.00  10.05   9.66   8.52   8.00   5.70   4.00   2.00]
    195  *
    196  * Given the maximum magnitude sensible input value of +/-37992, this yields the
    197  * following vector of maximum output magnitudes:
    198  *
    199  * [ 303936  381820  367003  323692  303936  216555  151968   75984 ]
    200  *
    201  * Ultimately, these values must fit into 16 bit signed integers, so they must
    202  * be scaled. A non-uniform scaling helps maximize the kept precision. The
    203  * relative number of extra bits of precision maintainable with respect to the
    204  * largest value is given here:
    205  *
    206  * [ 0  0  0  0  0  0  1  2 ]
    207  *
    208  */
    209 PRIVATE void dct2_8(SBC_BUFFER_T * RESTRICT out, OI_INT32 const *RESTRICT in)
    210 {
    211 #define BUTTERFLY(x,y) x += y; y = x - (y<<1);
    212 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K,x)<<2)
    213 
    214     OI_INT32 L00,L01,L02,L03,L04,L05,L06,L07;
    215     OI_INT32 L25;
    216 
    217     OI_INT32 in0,in1,in2,in3;
    218     OI_INT32 in4,in5,in6,in7;
    219 
    220 #if DCTII_8_SHIFT_IN != 0
    221     in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
    222     in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
    223     in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
    224     in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
    225     in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
    226     in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
    227     in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
    228     in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
    229 #else
    230     in0 = in[0];
    231     in1 = in[1];
    232     in2 = in[2];
    233     in3 = in[3];
    234     in4 = in[4];
    235     in5 = in[5];
    236     in6 = in[6];
    237     in7 = in[7];
    238 #endif
    239 
    240     L00 = in0 + in7;
    241     L01 = in1 + in6;
    242     L02 = in2 + in5;
    243     L03 = in3 + in4;
    244 
    245     L04 = in3 - in4;
    246     L05 = in2 - in5;
    247     L06 = in1 - in6;
    248     L07 = in0 - in7;
    249 
    250     BUTTERFLY(L00, L03);
    251     BUTTERFLY(L01, L02);
    252 
    253     L02 += L03;
    254 
    255     L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
    256 
    257     BUTTERFLY(L00, L01);
    258 
    259     out[0] = (OI_INT16)SCALE(L00, DCTII_8_SHIFT_0);
    260     out[4] = (OI_INT16)SCALE(L01, DCTII_8_SHIFT_4);
    261 
    262     BUTTERFLY(L03, L02);
    263     out[6] = (OI_INT16)SCALE(L02, DCTII_8_SHIFT_6);
    264     out[2] = (OI_INT16)SCALE(L03, DCTII_8_SHIFT_2);
    265 
    266     L04 += L05;
    267     L05 += L06;
    268     L06 += L07;
    269 
    270     L04/=2;
    271     L05/=2;
    272     L06/=2;
    273     L07/=2;
    274 
    275     L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
    276 
    277     L25 = L06 - L04;
    278     L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
    279 
    280     L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
    281     L04 -= L25;
    282 
    283     L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
    284     L06 -= L25;
    285 
    286     BUTTERFLY(L07, L05);
    287 
    288     BUTTERFLY(L05, L04);
    289     out[3] = (OI_INT16)SCALE(L04, DCTII_8_SHIFT_3-1);
    290     out[5] = (OI_INT16)SCALE(L05, DCTII_8_SHIFT_5-1);
    291 
    292     BUTTERFLY(L07, L06);
    293     out[7] = (OI_INT16)SCALE(L06, DCTII_8_SHIFT_7-1);
    294     out[1] = (OI_INT16)SCALE(L07, DCTII_8_SHIFT_1-1);
    295 #undef BUTTERFLY
    296 
    297 #ifdef DEBUG_DCT
    298     {
    299         float float_out[8];
    300         float_dct2_8(float_out, in);
    301     }
    302 #endif
    303 }
    304 
    305 /**@}*/
    306