Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 ------------------------------------------------------------------------------
     20  MODULE DESCRIPTION
     21 
     22  This file contains the functions that transform an 8r8 image block from
     23  dequantized DCT coefficients to spatial domain pirel values by calculating
     24  inverse discrete cosine transform (IDCT).
     25 
     26 ------------------------------------------------------------------------------
     27 */
     28 /*----------------------------------------------------------------------------
     29 ; INCLUDES
     30 ----------------------------------------------------------------------------*/
     31 #include "mp4dec_lib.h"
     32 #include "idct.h"
     33 #include "motion_comp.h"
     34 #ifndef FAST_IDCT
     35 
     36 /*
     37 ------------------------------------------------------------------------------
     38  FUNCTION NAME: idct
     39 ------------------------------------------------------------------------------
     40  INPUT AND OUTPUT DEFINITIONS FOR idct
     41 
     42  Inputs:
     43     blk = pointer to the buffer containing the dequantized DCT
     44           coefficients of type int for an 8r8 image block;
     45           values range from (-2048, 2047) which defined as standard.
     46 
     47  Local Stores/Buffers/Pointers Needed:
     48     None
     49 
     50  Global Stores/Buffers/Pointers Needed:
     51     None
     52 
     53  Outputs:
     54     None
     55 
     56  Pointers and Buffers Modified:
     57     blk points to the found IDCT values for an 8r8 image block.
     58 
     59  Local Stores Modified:
     60     None
     61 
     62  Global Stores Modified:
     63     None
     64 
     65 ------------------------------------------------------------------------------
     66  FUNCTION DESCRIPTION FOR idct
     67 
     68  This function transforms an 8r8 image block from dequantized DCT coefficients
     69  (F(u,v)) to spatial domain pirel values (f(r,y)) by performing the two
     70  dimensional inverse discrete cosine transform (IDCT).
     71 
     72          _7_ _7_      C(u) C(v)
     73     f(r,y) = \   \  F(u,v)---- ----cos[(2r+1)*u*pi/16]cos[(2y+1)*v*pi/16]
     74          /__ /__    2    2
     75          u=0 v=0
     76 
     77     where   C(i) = 1/sqrt(2)    if i=0
     78         C(i) = 1        otherwise
     79 
     80  2-D IDCT can be separated as horizontal(row-wise) and vertical(column-wise)
     81  1-D IDCTs. Therefore, 2-D IDCT values are found by the following two steps:
     82  1. Find horizontal 1-D IDCT values for each row from 8r8 dequantized DCT
     83     coefficients by row IDCT operation.
     84 
     85           _7_        C(u)
     86     g(r,v) =  \   F(u,v) ---- cos[(2r+1)*u*pi/16]
     87           /__         2
     88           u=0
     89 
     90  2. Find vertical 1-D IDCT values for each column from the results of 1
     91     by column IDCT operation.
     92 
     93               _7_        C(v)
     94     f(r,y) =  \   g(r,v) ---- cos[(2y+1)*v*pi/16]
     95           /__         2
     96           v=0
     97 
     98 ------------------------------------------------------------------------------
     99  REQUIREMENTS FOR idct
    100 
    101  None
    102 
    103 ------------------------------------------------------------------------------
    104 */
    105 /*  REFERENCES FOR idct */
    106 /* idct.c, inverse fast discrete cosine transform
    107  inverse two dimensional DCT, Chen-Wang algorithm
    108  (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984)
    109  32-bit integer arithmetic (8 bit coefficients)
    110  11 mults, 29 adds per DCT
    111  sE, 18.8.91
    112 
    113  coefficients ertended to 12 bit for IEEE1180-1990
    114  compliance                           sE,  2.1.94
    115 */
    116 
    117 
    118 /*----------------------------------------------------------------------------
    119 ; Function Code FOR idct
    120 ----------------------------------------------------------------------------*/
    121 void idct_intra(
    122     int *blk, uint8 *comp, int width
    123 )
    124 {
    125     /*----------------------------------------------------------------------------
    126     ; Define all local variables
    127     ----------------------------------------------------------------------------*/
    128     int i;
    129     int32   tmpBLK[64];
    130     int32   *tmpBLK32 = &tmpBLK[0];
    131     int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
    132     int32   a;
    133     int offset = width - 8;
    134     /*----------------------------------------------------------------------------
    135     ; Function body here
    136     ----------------------------------------------------------------------------*/
    137     /* two dimensional inverse discrete cosine transform */
    138 
    139 
    140     /* column (vertical) IDCT */
    141     for (i = B_SIZE - 1; i >= 0; i--)
    142     {
    143         /* initialize butterfly nodes at first stage */
    144 
    145         r1 = blk[B_SIZE * 4 + i] << 11;
    146         /* since row IDCT results have net left shift by 3 */
    147         /* this left shift by 8 gives net left shift by 11 */
    148         /* in order to maintain the same scale as that of  */
    149         /* coefficients Wi */
    150 
    151         r2 = blk[B_SIZE * 6 + i];
    152         r3 = blk[B_SIZE * 2 + i];
    153         r4 = blk[B_SIZE * 1 + i];
    154         r5 = blk[B_SIZE * 7 + i];
    155         r6 = blk[B_SIZE * 5 + i];
    156         r7 = blk[B_SIZE * 3 + i];
    157 
    158         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
    159         {
    160             /* shortcut */
    161             /* execute if values of g(r,1) to g(r,7) in a column*/
    162             /* are all zeros */
    163 
    164             /* make output of IDCT >>3 or scaled by 1/8 and */
    165             /* with the proper rounding */
    166             a = (blk[B_SIZE * 0 + i]) << 3;
    167             tmpBLK32[B_SIZE * 0 + i] = a;
    168             tmpBLK32[B_SIZE * 1 + i] = a;
    169             tmpBLK32[B_SIZE * 2 + i] = a;
    170             tmpBLK32[B_SIZE * 3 + i] = a;
    171             tmpBLK32[B_SIZE * 4 + i] = a;
    172             tmpBLK32[B_SIZE * 5 + i] = a;
    173             tmpBLK32[B_SIZE * 6 + i] = a;
    174             tmpBLK32[B_SIZE * 7 + i] = a;
    175         }
    176         else
    177         {
    178             r0 = (blk[8 * 0 + i] << 11) + 128;
    179 
    180             /* first stage */
    181 
    182             r8 = W7 * (r4 + r5);
    183             r4 = (r8 + (W1 - W7) * r4);
    184             /* Multiplication with Wi increases the net left */
    185             /* shift from 11 to 14,we have to shift back by 3*/
    186             r5 = (r8 - (W1 + W7) * r5);
    187             r8 = W3 * (r6 + r7);
    188             r6 = (r8 - (W3 - W5) * r6);
    189             r7 = (r8 - (W3 + W5) * r7);
    190 
    191             /* second stage */
    192             r8 = r0 + r1;
    193             r0 -= r1;
    194 
    195             r1 = W6 * (r3 + r2);
    196             r2 = (r1 - (W2 + W6) * r2);
    197             r3 = (r1 + (W2 - W6) * r3);
    198 
    199             r1 = r4 + r6;
    200             r4 -= r6;
    201             r6 = r5 + r7;
    202             r5 -= r7;
    203 
    204             /* third stage */
    205             r7 = r8 + r3;
    206             r8 -= r3;
    207             r3 = r0 + r2;
    208             r0 -= r2;
    209             r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
    210             r4 = (181 * (r4 - r5) + 128) >> 8;
    211 
    212             /* fourth stage */
    213             /* net shift of IDCT is >>3 after the following */
    214             /* shift operation, it makes output of 2-D IDCT */
    215             /* scaled by 1/8, that is scaled twice by       */
    216             /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
    217             /* see detail analysis in design doc.           */
    218             tmpBLK32[0 + i] = (r7 + r1) >> 8;
    219             tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
    220             tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
    221             tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
    222             tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
    223             tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
    224             tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
    225             tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
    226         }
    227     }
    228     /* row (horizontal) IDCT */
    229     for (i = 0 ; i < B_SIZE; i++)
    230     {
    231         /* initialize butterfly nodes at the first stage */
    232 
    233         r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
    234         /* r1 left shift by 11 is to maintain the same  */
    235         /* scale as that of coefficients (W1,...W7) */
    236         /* since blk[4] won't multiply with Wi.     */
    237         /* see detail diagram in design document.   */
    238 
    239         r2 = tmpBLK32[6+(i<<3)];
    240         r3 = tmpBLK32[2+(i<<3)];
    241         r4 = tmpBLK32[1+(i<<3)];
    242         r5 = tmpBLK32[7+(i<<3)];
    243         r6 = tmpBLK32[5+(i<<3)];
    244         r7 = tmpBLK32[3+(i<<3)];
    245 
    246         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
    247         {
    248             /* shortcut */
    249             /* execute if values of F(1,v) to F(7,v) in a row*/
    250             /* are all zeros */
    251 
    252             /* output of row IDCT scaled by 8 */
    253             a = (((int32)tmpBLK32[0+(i<<3)] + 32) >> 6);
    254             CLIP_RESULT(a)
    255             *comp++ = a;
    256             *comp++ = a;
    257             *comp++ = a;
    258             *comp++ = a;
    259             *comp++ = a;
    260             *comp++ = a;
    261             *comp++ = a;
    262             *comp++ = a;
    263 
    264             comp += offset;
    265         }
    266 
    267         else
    268         {
    269             /* for proper rounding in the fourth stage */
    270             r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
    271 
    272             /* first stage */
    273 
    274             r8 = W7 * (r4 + r5) + 4;
    275             r4 = (r8 + (W1 - W7) * r4) >> 3;
    276             r5 = (r8 - (W1 + W7) * r5) >> 3;
    277 
    278             r8 = W3 * (r6 + r7) + 4;
    279             r6 = (r8 - (W3 - W5) * r6) >> 3;
    280             r7 = (r8 - (W3 + W5) * r7) >> 3;
    281 
    282             /* second stage */
    283             r8 = r0 + r1;
    284             r0 -= r1;
    285 
    286             r1 = W6 * (r3 + r2) + 4;
    287             r2 = (r1 - (W2 + W6) * r2) >> 3;
    288             r3 = (r1 + (W2 - W6) * r3) >> 3;
    289 
    290             r1 = r4 + r6;
    291             r4 -= r6;
    292             r6 = r5 + r7;
    293             r5 -= r7;
    294 
    295             /* third stage */
    296             r7 = r8 + r3;
    297             r8 -= r3;
    298             r3 = r0 + r2;
    299             r0 -= r2;
    300             r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
    301             r4 = (181 * (r4 - r5) + 128) >> 8;
    302 
    303             /* fourth stage */
    304             /* net shift of this function is <<3 after the    */
    305             /* following shift operation, it makes output of  */
    306             /* row IDCT scaled by 8 to retain 3 bits precision*/
    307             a = ((r7 + r1) >> 14);
    308             CLIP_RESULT(a)
    309             *comp++ = a;
    310             a = ((r3 + r2) >> 14);
    311             CLIP_RESULT(a)
    312             *comp++ = a;
    313             a = ((r0 + r4) >> 14);
    314             CLIP_RESULT(a)
    315             *comp++ = a;
    316             a = ((r8 + r6) >> 14);
    317             CLIP_RESULT(a)
    318             *comp++ = a;
    319             a = ((r8 - r6) >> 14);
    320             CLIP_RESULT(a)
    321             *comp++ = a;
    322             a = ((r0 - r4) >> 14);
    323             CLIP_RESULT(a)
    324             *comp++ = a;
    325             a = ((r3 - r2) >> 14);
    326             CLIP_RESULT(a)
    327             *comp++ = a;
    328             a = ((r7 - r1) >> 14);
    329             CLIP_RESULT(a)
    330             *comp++ = a;
    331 
    332             comp += offset;
    333         }
    334     }
    335 
    336 
    337 
    338     /*----------------------------------------------------------------------------
    339     ; Return nothing or data or data pointer
    340     ----------------------------------------------------------------------------*/
    341     return;
    342 }
    343 
    344 void idct(
    345     int *blk, uint8 *pred, uint8 *dst, int width)
    346 {
    347     /*----------------------------------------------------------------------------
    348     ; Define all local variables
    349     ----------------------------------------------------------------------------*/
    350     int i;
    351     int32   tmpBLK[64];
    352     int32   *tmpBLK32 = &tmpBLK[0];
    353     int32   r0, r1, r2, r3, r4, r5, r6, r7, r8; /* butterfly nodes */
    354     int32   a;
    355     int res;
    356 
    357     /*----------------------------------------------------------------------------
    358     ; Function body here
    359     ----------------------------------------------------------------------------*/
    360     /* two dimensional inverse discrete cosine transform */
    361 
    362 
    363     /* column (vertical) IDCT */
    364     for (i = B_SIZE - 1; i >= 0; i--)
    365     {
    366         /* initialize butterfly nodes at first stage */
    367 
    368         r1 = blk[B_SIZE * 4 + i] << 11;
    369         /* since row IDCT results have net left shift by 3 */
    370         /* this left shift by 8 gives net left shift by 11 */
    371         /* in order to maintain the same scale as that of  */
    372         /* coefficients Wi */
    373 
    374         r2 = blk[B_SIZE * 6 + i];
    375         r3 = blk[B_SIZE * 2 + i];
    376         r4 = blk[B_SIZE * 1 + i];
    377         r5 = blk[B_SIZE * 7 + i];
    378         r6 = blk[B_SIZE * 5 + i];
    379         r7 = blk[B_SIZE * 3 + i];
    380 
    381         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
    382         {
    383             /* shortcut */
    384             /* execute if values of g(r,1) to g(r,7) in a column*/
    385             /* are all zeros */
    386 
    387             /* make output of IDCT >>3 or scaled by 1/8 and */
    388             /* with the proper rounding */
    389             a = (blk[B_SIZE * 0 + i]) << 3;
    390             tmpBLK32[B_SIZE * 0 + i] = a;
    391             tmpBLK32[B_SIZE * 1 + i] = a;
    392             tmpBLK32[B_SIZE * 2 + i] = a;
    393             tmpBLK32[B_SIZE * 3 + i] = a;
    394             tmpBLK32[B_SIZE * 4 + i] = a;
    395             tmpBLK32[B_SIZE * 5 + i] = a;
    396             tmpBLK32[B_SIZE * 6 + i] = a;
    397             tmpBLK32[B_SIZE * 7 + i] = a;
    398         }
    399         else
    400         {
    401             r0 = (blk[8 * 0 + i] << 11) + 128;
    402 
    403             /* first stage */
    404 
    405             r8 = W7 * (r4 + r5);
    406             r4 = (r8 + (W1 - W7) * r4);
    407             /* Multiplication with Wi increases the net left */
    408             /* shift from 11 to 14,we have to shift back by 3*/
    409             r5 = (r8 - (W1 + W7) * r5);
    410             r8 = W3 * (r6 + r7);
    411             r6 = (r8 - (W3 - W5) * r6);
    412             r7 = (r8 - (W3 + W5) * r7);
    413 
    414             /* second stage */
    415             r8 = r0 + r1;
    416             r0 -= r1;
    417 
    418             r1 = W6 * (r3 + r2);
    419             r2 = (r1 - (W2 + W6) * r2);
    420             r3 = (r1 + (W2 - W6) * r3);
    421 
    422             r1 = r4 + r6;
    423             r4 -= r6;
    424             r6 = r5 + r7;
    425             r5 -= r7;
    426 
    427             /* third stage */
    428             r7 = r8 + r3;
    429             r8 -= r3;
    430             r3 = r0 + r2;
    431             r0 -= r2;
    432             r2 = (181 * (r4 + r5) + 128) >> 8;  /* rounding */
    433             r4 = (181 * (r4 - r5) + 128) >> 8;
    434 
    435             /* fourth stage */
    436             /* net shift of IDCT is >>3 after the following */
    437             /* shift operation, it makes output of 2-D IDCT */
    438             /* scaled by 1/8, that is scaled twice by       */
    439             /* 1/(2*sqrt(2)) for row IDCT and column IDCT.  */
    440             /* see detail analysis in design doc.           */
    441             tmpBLK32[0 + i] = (r7 + r1) >> 8;
    442             tmpBLK32[(1<<3) + i] = (r3 + r2) >> 8;
    443             tmpBLK32[(2<<3) + i] = (r0 + r4) >> 8;
    444             tmpBLK32[(3<<3) + i] = (r8 + r6) >> 8;
    445             tmpBLK32[(4<<3) + i] = (r8 - r6) >> 8;
    446             tmpBLK32[(5<<3) + i] = (r0 - r4) >> 8;
    447             tmpBLK32[(6<<3) + i] = (r3 - r2) >> 8;
    448             tmpBLK32[(7<<3) + i] = (r7 - r1) >> 8;
    449         }
    450     }
    451     /* row (horizontal) IDCT */
    452     for (i = B_SIZE - 1; i >= 0; i--)
    453     {
    454         /* initialize butterfly nodes at the first stage */
    455 
    456         r1 = ((int32)tmpBLK32[4+(i<<3)]) << 8;
    457         /* r1 left shift by 11 is to maintain the same  */
    458         /* scale as that of coefficients (W1,...W7) */
    459         /* since blk[4] won't multiply with Wi.     */
    460         /* see detail diagram in design document.   */
    461 
    462         r2 = tmpBLK32[6+(i<<3)];
    463         r3 = tmpBLK32[2+(i<<3)];
    464         r4 = tmpBLK32[1+(i<<3)];
    465         r5 = tmpBLK32[7+(i<<3)];
    466         r6 = tmpBLK32[5+(i<<3)];
    467         r7 = tmpBLK32[3+(i<<3)];
    468 
    469         if (!(r1 | r2 | r3 | r4 | r5 | r6 | r7))
    470         {
    471             /* shortcut */
    472             /* execute if values of F(1,v) to F(7,v) in a row*/
    473             /* are all zeros */
    474 
    475             /* output of row IDCT scaled by 8 */
    476             a = (tmpBLK32[0+(i<<3)] + 32) >> 6;
    477             blk[0+(i<<3)] = a;
    478             blk[1+(i<<3)] = a;
    479             blk[2+(i<<3)] = a;
    480             blk[3+(i<<3)] = a;
    481             blk[4+(i<<3)] = a;
    482             blk[5+(i<<3)] = a;
    483             blk[6+(i<<3)] = a;
    484             blk[7+(i<<3)] = a;
    485 
    486         }
    487 
    488         else
    489         {
    490             /* for proper rounding in the fourth stage */
    491             r0 = (((int32)tmpBLK32[0+(i<<3)]) << 8) + 8192;
    492 
    493             /* first stage */
    494 
    495             r8 = W7 * (r4 + r5) + 4;
    496             r4 = (r8 + (W1 - W7) * r4) >> 3;
    497             r5 = (r8 - (W1 + W7) * r5) >> 3;
    498 
    499             r8 = W3 * (r6 + r7) + 4;
    500             r6 = (r8 - (W3 - W5) * r6) >> 3;
    501             r7 = (r8 - (W3 + W5) * r7) >> 3;
    502 
    503             /* second stage */
    504             r8 = r0 + r1;
    505             r0 -= r1;
    506 
    507             r1 = W6 * (r3 + r2) + 4;
    508             r2 = (r1 - (W2 + W6) * r2) >> 3;
    509             r3 = (r1 + (W2 - W6) * r3) >> 3;
    510 
    511             r1 = r4 + r6;
    512             r4 -= r6;
    513             r6 = r5 + r7;
    514             r5 -= r7;
    515 
    516             /* third stage */
    517             r7 = r8 + r3;
    518             r8 -= r3;
    519             r3 = r0 + r2;
    520             r0 -= r2;
    521             r2 = (181 * (r4 + r5) + 128) >> 8;    /* rounding */
    522             r4 = (181 * (r4 - r5) + 128) >> 8;
    523 
    524             /* fourth stage */
    525             /* net shift of this function is <<3 after the    */
    526             /* following shift operation, it makes output of  */
    527             /* row IDCT scaled by 8 to retain 3 bits precision*/
    528             blk[0+(i<<3)] = (r7 + r1) >> 14;
    529             blk[1+(i<<3)] = (r3 + r2) >> 14;
    530             blk[2+(i<<3)] = (r0 + r4) >> 14;
    531             blk[3+(i<<3)] = (r8 + r6) >> 14;
    532             blk[4+(i<<3)] = (r8 - r6) >> 14;
    533             blk[5+(i<<3)] = (r0 - r4) >> 14;
    534             blk[6+(i<<3)] = (r3 - r2) >> 14;
    535             blk[7+(i<<3)] = (r7 - r1) >> 14;
    536         }
    537         /*  add with prediction ,  08/03/05 */
    538         res = (*pred++ + block[0+(i<<3)]);
    539         CLIP_RESULT(res);
    540         *dst++ = res;
    541         res = (*pred++ + block[1+(i<<3)]);
    542         CLIP_RESULT(res);
    543         *dst++ = res;
    544         res = (*pred++ + block[2+(i<<3)]);
    545         CLIP_RESULT(res);
    546         *dst++ = res;
    547         res = (*pred++ + block[3+(i<<3)]);
    548         CLIP_RESULT(res);
    549         *dst++ = res;
    550         res = (*pred++ + block[4+(i<<3)]);
    551         CLIP_RESULT(res);
    552         *dst++ = res;
    553         res = (*pred++ + block[5+(i<<3)]);
    554         CLIP_RESULT(res);
    555         *dst++ = res;
    556         res = (*pred++ + block[6+(i<<3)]);
    557         CLIP_RESULT(res);
    558         *dst++ = res;
    559         res = (*pred++ + block[7+(i<<3)]);
    560         CLIP_RESULT(res);
    561         *dst++ = res;
    562 
    563         pred += 8;
    564         dst += (width - 8);
    565     }
    566 
    567 
    568 
    569     /*----------------------------------------------------------------------------
    570     ; Return nothing or data or data pointer
    571     ----------------------------------------------------------------------------*/
    572     return;
    573 }
    574 
    575 #endif
    576 /*----------------------------------------------------------------------------
    577 ; End Function: idct
    578 ----------------------------------------------------------------------------*/
    579 
    580