Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 ------------------------------------------------------------------------------
     20  INPUT AND OUTPUT DEFINITIONS
     21 
     22  Inputs:
     23     [input_variable_name] = [description of the input to module, its type
     24                  definition, and length (when applicable)]
     25 
     26  Local Stores/Buffers/Pointers Needed:
     27     [local_store_name] = [description of the local store, its type
     28                   definition, and length (when applicable)]
     29     [local_buffer_name] = [description of the local buffer, its type
     30                    definition, and length (when applicable)]
     31     [local_ptr_name] = [description of the local pointer, its type
     32                 definition, and length (when applicable)]
     33 
     34  Global Stores/Buffers/Pointers Needed:
     35     [global_store_name] = [description of the global store, its type
     36                    definition, and length (when applicable)]
     37     [global_buffer_name] = [description of the global buffer, its type
     38                 definition, and length (when applicable)]
     39     [global_ptr_name] = [description of the global pointer, its type
     40                  definition, and length (when applicable)]
     41 
     42  Outputs:
     43     [return_variable_name] = [description of data/pointer returned
     44                   by module, its type definition, and length
     45                   (when applicable)]
     46 
     47  Pointers and Buffers Modified:
     48     [variable_bfr_ptr] points to the [describe where the
     49       variable_bfr_ptr points to, its type definition, and length
     50       (when applicable)]
     51     [variable_bfr] contents are [describe the new contents of
     52       variable_bfr]
     53 
     54  Local Stores Modified:
     55     [local_store_name] = [describe new contents, its type
     56                   definition, and length (when applicable)]
     57 
     58  Global Stores Modified:
     59     [global_store_name] = [describe new contents, its type
     60                    definition, and length (when applicable)]
     61 
     62 ------------------------------------------------------------------------------
     63  FUNCTION DESCRIPTION
     64 
     65 ------------------------------------------------------------------------------
     66  REQUIREMENTS
     67 
     68 ------------------------------------------------------------------------------
     69  REFERENCES
     70 
     71 ------------------------------------------------------------------------------
     72  PSEUDO-CODE
     73 
     74 ------------------------------------------------------------------------------
     75  RESOURCES USED
     76    When the code is written for a specific target processor the
     77      the resources used should be documented below.
     78 
     79  STACK USAGE: [stack count for this module] + [variable to represent
     80           stack usage for each subroutine called]
     81 
     82      where: [stack usage variable] = stack usage for [subroutine
     83          name] (see [filename].ext)
     84 
     85  DATA MEMORY USED: x words
     86 
     87  PROGRAM MEMORY USED: x words
     88 
     89  CLOCK CYCLES: [cycle count equation for this module] + [variable
     90            used to represent cycle count for each subroutine
     91            called]
     92 
     93      where: [cycle count variable] = cycle count for [subroutine
     94         name] (see [filename].ext)
     95 
     96 ------------------------------------------------------------------------------
     97 */
     98 
     99 /*----------------------------------------------------------------------------
    100 ; INCLUDES
    101 ----------------------------------------------------------------------------*/
    102 #include "mp4dec_lib.h"
    103 #include "idct.h"
    104 #include "motion_comp.h"
    105 
    106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
    107 /*----------------------------------------------------------------------------
    108 ; MACROS
    109 ; Define module specific macros here
    110 ----------------------------------------------------------------------------*/
    111 
    112 /*----------------------------------------------------------------------------
    113 ; DEFINES
    114 ; Include all pre-processor statements here. Include conditional
    115 ; compile variables also.
    116 ----------------------------------------------------------------------------*/
    117 
    118 /*----------------------------------------------------------------------------
    119 ; LOCAL FUNCTION DEFINITIONS
    120 ; Function Prototype declaration
    121 ----------------------------------------------------------------------------*/
    122 /* private prototypes */
    123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
    124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
    125 static void idctcol(int16 *blk);
    126 
    127 #ifdef FAST_IDCT
    128 // mapping from nz_coefs to functions to be used
    129 
    130 
    131 // ARM4 does not allow global data when they are not constant hence
    132 // an array of function pointers cannot be considered as array of constants
    133 // (actual addresses are only known when the dll is loaded).
    134 // So instead of arrays of function pointers, we'll store here
    135 // arrays of rows or columns and then call the idct function
    136 // corresponding to such the row/column number:
    137 
    138 
    139 static void (*const idctcolVCA[10][4])(int16*) =
    140 {
    141     {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
    142     {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
    143     {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
    144     {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
    145     {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
    146     {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
    147     {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
    148     {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
    149     {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
    150     {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
    151 };
    152 
    153 
    154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
    155 {
    156     &idctrow1,
    157     &idctrow2,
    158     &idctrow2,
    159     &idctrow2,
    160     &idctrow2,
    161     &idctrow3,
    162     &idctrow4,
    163     &idctrow4,
    164     &idctrow4,
    165     &idctrow4
    166 };
    167 
    168 
    169 static void (*const idctcolVCA2[16])(int16*) =
    170 {
    171     &idctcol0, &idctcol4, &idctcol3, &idctcol4,
    172     &idctcol2, &idctcol4, &idctcol3, &idctcol4,
    173     &idctcol1, &idctcol4, &idctcol3, &idctcol4,
    174     &idctcol2, &idctcol4, &idctcol3, &idctcol4
    175 };
    176 
    177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
    178 {
    179     &idctrow1, &idctrow4, &idctrow3, &idctrow4,
    180     &idctrow2, &idctrow4, &idctrow3, &idctrow4
    181 };
    182 
    183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
    184 {
    185     &idctrow1_intra,
    186     &idctrow2_intra,
    187     &idctrow2_intra,
    188     &idctrow2_intra,
    189     &idctrow2_intra,
    190     &idctrow3_intra,
    191     &idctrow4_intra,
    192     &idctrow4_intra,
    193     &idctrow4_intra,
    194     &idctrow4_intra
    195 };
    196 
    197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
    198 {
    199     &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
    200     &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
    201 };
    202 #endif
    203 
    204 /*----------------------------------------------------------------------------
    205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
    206 ; Variable declaration - defined here and used outside this module
    207 ----------------------------------------------------------------------------*/
    208 
    209 /*----------------------------------------------------------------------------
    210 ; EXTERNAL FUNCTION REFERENCES
    211 ; Declare functions defined elsewhere and referenced in this module
    212 ----------------------------------------------------------------------------*/
    213 
    214 /*----------------------------------------------------------------------------
    215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
    216 ; Declare variables used in this module but defined elsewhere
    217 ----------------------------------------------------------------------------*/
    218 
    219 /*----------------------------------------------------------------------------
    220 ; FUNCTION CODE
    221 ----------------------------------------------------------------------------*/
    222 void MBlockIDCT(VideoDecData *video)
    223 {
    224     Vop *currVop = video->currVop;
    225     MacroBlock *mblock = video->mblock;
    226     PIXEL *c_comp;
    227     PIXEL *cu_comp;
    228     PIXEL *cv_comp;
    229     int x_pos = video->mbnum_col;
    230     int y_pos = video->mbnum_row;
    231     int width, width_uv;
    232     int32 offset;
    233     width = video->width;
    234     width_uv = width >> 1;
    235     offset = (int32)(y_pos << 4) * width + (x_pos << 4);
    236 
    237     c_comp  = currVop->yChan + offset;
    238     cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
    239     cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
    240 
    241     BlockIDCT_intra(mblock, c_comp, 0, width);
    242     BlockIDCT_intra(mblock, c_comp + 8, 1, width);
    243     BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
    244     BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
    245     BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
    246     BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
    247 }
    248 
    249 
    250 void BlockIDCT_intra(
    251     MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
    252 {
    253     /*----------------------------------------------------------------------------
    254     ; Define all local variables
    255     ----------------------------------------------------------------------------*/
    256     int16 *coeff_in = mblock->block[comp];
    257 #ifdef INTEGER_IDCT
    258 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
    259     int i, bmapr;
    260     int nz_coefs = mblock->no_coeff[comp];
    261     uint8 *bitmapcol = mblock->bitmapcol[comp];
    262     uint8 bitmaprow = mblock->bitmaprow[comp];
    263 
    264     /*----------------------------------------------------------------------------
    265     ; Function body here
    266     ----------------------------------------------------------------------------*/
    267     if (nz_coefs <= 10)
    268     {
    269         bmapr = (nz_coefs - 1);
    270 
    271         (*(idctcolVCA[bmapr]))(coeff_in);
    272         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
    273         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
    274         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
    275 
    276         (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
    277     }
    278     else
    279     {
    280         i = 8;
    281         while (i--)
    282         {
    283             bmapr = (int)bitmapcol[i];
    284             if (bmapr)
    285             {
    286                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
    287                 {
    288                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
    289                 }
    290                 else
    291                 {
    292                     idctcol(coeff_in + i);
    293                 }
    294             }
    295         }
    296         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
    297         {
    298             bitmaprow >>= 4;
    299             (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
    300         }
    301         else
    302         {
    303             idctrow_intra(coeff_in, c_comp, width);
    304         }
    305     }
    306 #else
    307     void idct_intra(int *block, uint8 *comp, int width);
    308     idct_intra(coeff_in, c_comp, width);
    309 #endif
    310 #else
    311     void idctref_intra(int *block, uint8 *comp, int width);
    312     idctref_intra(coeff_in, c_comp, width);
    313 #endif
    314 
    315 
    316     /*----------------------------------------------------------------------------
    317     ; Return nothing or data or data pointer
    318     ----------------------------------------------------------------------------*/
    319     return;
    320 }
    321 
    322 /*  08/04/05, no residue, just copy from pred to output */
    323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
    324 {
    325     /* copy 4 bytes at a time */
    326     width -= 4;
    327     *((uint32*)dst) = *((uint32*)pred);
    328     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    329     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    330     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    331     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    332     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    333     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    334     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    335     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    336     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    337     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    338     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    339     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    340     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    341     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    342     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    343 
    344     return ;
    345 }
    346 
    347 /*  08/04/05 compute IDCT and add prediction at the end  */
    348 void BlockIDCT(
    349     uint8 *dst,  /* destination */
    350     uint8 *pred, /* prediction block, pitch 16 */
    351     int16   *coeff_in,  /* DCT data, size 64 */
    352     int width, /* width of dst */
    353     int nz_coefs,
    354     uint8 *bitmapcol,
    355     uint8 bitmaprow
    356 )
    357 {
    358 #ifdef INTEGER_IDCT
    359 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
    360     int i, bmapr;
    361     /*----------------------------------------------------------------------------
    362     ; Function body here
    363     ----------------------------------------------------------------------------*/
    364     if (nz_coefs <= 10)
    365     {
    366         bmapr = (nz_coefs - 1);
    367         (*(idctcolVCA[bmapr]))(coeff_in);
    368         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
    369         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
    370         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
    371 
    372         (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
    373         return ;
    374     }
    375     else
    376     {
    377         i = 8;
    378 
    379         while (i--)
    380         {
    381             bmapr = (int)bitmapcol[i];
    382             if (bmapr)
    383             {
    384                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
    385                 {
    386                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
    387                 }
    388                 else
    389                 {
    390                     idctcol(coeff_in + i);
    391                 }
    392             }
    393         }
    394         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
    395         {
    396             (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
    397         }
    398         else
    399         {
    400             idctrow(coeff_in, pred, dst, width);
    401         }
    402         return ;
    403     }
    404 #else // FAST_IDCT
    405     void idct(int *block, uint8 *pred, uint8 *dst, int width);
    406     idct(coeff_in, pred, dst, width);
    407     return;
    408 #endif // FAST_IDCT
    409 #else // INTEGER_IDCT
    410     void idctref(int *block, uint8 *pred, uint8 *dst, int width);
    411     idctref(coeff_in, pred, dst, width);
    412     return;
    413 #endif // INTEGER_IDCT
    414 
    415 }
    416 /*----------------------------------------------------------------------------
    417 ;  End Function: block_idct
    418 ----------------------------------------------------------------------------*/
    419 
    420 
    421 /****************************************************************************/
    422 
    423 /*
    424 ------------------------------------------------------------------------------
    425  FUNCTION NAME: idctrow
    426 ------------------------------------------------------------------------------
    427  INPUT AND OUTPUT DEFINITIONS FOR idctrow
    428 
    429  Inputs:
    430     [input_variable_name] = [description of the input to module, its type
    431                  definition, and length (when applicable)]
    432 
    433  Local Stores/Buffers/Pointers Needed:
    434     [local_store_name] = [description of the local store, its type
    435                   definition, and length (when applicable)]
    436     [local_buffer_name] = [description of the local buffer, its type
    437                    definition, and length (when applicable)]
    438     [local_ptr_name] = [description of the local pointer, its type
    439                 definition, and length (when applicable)]
    440 
    441  Global Stores/Buffers/Pointers Needed:
    442     [global_store_name] = [description of the global store, its type
    443                    definition, and length (when applicable)]
    444     [global_buffer_name] = [description of the global buffer, its type
    445                 definition, and length (when applicable)]
    446     [global_ptr_name] = [description of the global pointer, its type
    447                  definition, and length (when applicable)]
    448 
    449  Outputs:
    450     [return_variable_name] = [description of data/pointer returned
    451                   by module, its type definition, and length
    452                   (when applicable)]
    453 
    454  Pointers and Buffers Modified:
    455     [variable_bfr_ptr] points to the [describe where the
    456       variable_bfr_ptr points to, its type definition, and length
    457       (when applicable)]
    458     [variable_bfr] contents are [describe the new contents of
    459       variable_bfr]
    460 
    461  Local Stores Modified:
    462     [local_store_name] = [describe new contents, its type
    463                   definition, and length (when applicable)]
    464 
    465  Global Stores Modified:
    466     [global_store_name] = [describe new contents, its type
    467                    definition, and length (when applicable)]
    468 
    469 ------------------------------------------------------------------------------
    470  FUNCTION DESCRIPTION FOR idctrow
    471 
    472 ------------------------------------------------------------------------------
    473  REQUIREMENTS FOR idctrow
    474 
    475 ------------------------------------------------------------------------------
    476  REFERENCES FOR idctrow
    477 
    478 ------------------------------------------------------------------------------
    479  PSEUDO-CODE FOR idctrow
    480 
    481 ------------------------------------------------------------------------------
    482  RESOURCES USED FOR idctrow
    483    When the code is written for a specific target processor the
    484      the resources used should be documented below.
    485 
    486  STACK USAGE: [stack count for this module] + [variable to represent
    487           stack usage for each subroutine called]
    488 
    489      where: [stack usage variable] = stack usage for [subroutine
    490          name] (see [filename].ext)
    491 
    492  DATA MEMORY USED: x words
    493 
    494  PROGRAM MEMORY USED: x words
    495 
    496  CLOCK CYCLES: [cycle count equation for this module] + [variable
    497            used to represent cycle count for each subroutine
    498            called]
    499 
    500      where: [cycle count variable] = cycle count for [subroutine
    501         name] (see [filename].ext)
    502 
    503 ------------------------------------------------------------------------------
    504 */
    505 
    506 /*----------------------------------------------------------------------------
    507 ; Function Code FOR idctrow
    508 ----------------------------------------------------------------------------*/
    509 void idctrow(
    510     int16 *blk, uint8 *pred, uint8 *dst, int width
    511 )
    512 {
    513     /*----------------------------------------------------------------------------
    514     ; Define all local variables
    515     ----------------------------------------------------------------------------*/
    516     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    517     int i = 8;
    518     uint32 pred_word, dst_word;
    519     int res, res2;
    520 
    521     /*----------------------------------------------------------------------------
    522     ; Function body here
    523     ----------------------------------------------------------------------------*/
    524     /* row (horizontal) IDCT
    525     *
    526     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
    527     * ( k + - ) * l ) l=0                      8          2
    528     *
    529     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
    530 
    531     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
    532     width -= 4;
    533     dst -= width;
    534     pred -= 12;
    535     blk -= 8;
    536 
    537     while (i--)
    538     {
    539         x1 = (int32)blk[12] << 8;
    540         blk[12] = 0;
    541         x2 = blk[14];
    542         blk[14] = 0;
    543         x3 = blk[10];
    544         blk[10] = 0;
    545         x4 = blk[9];
    546         blk[9] = 0;
    547         x5 = blk[15];
    548         blk[15] = 0;
    549         x6 = blk[13];
    550         blk[13] = 0;
    551         x7 = blk[11];
    552         blk[11] = 0;
    553         x0 = ((*(blk += 8)) << 8) + 8192;
    554         blk[0] = 0;   /* for proper rounding in the fourth stage */
    555 
    556         /* first stage */
    557         x8 = W7 * (x4 + x5) + 4;
    558         x4 = (x8 + (W1 - W7) * x4) >> 3;
    559         x5 = (x8 - (W1 + W7) * x5) >> 3;
    560         x8 = W3 * (x6 + x7) + 4;
    561         x6 = (x8 - (W3 - W5) * x6) >> 3;
    562         x7 = (x8 - (W3 + W5) * x7) >> 3;
    563 
    564         /* second stage */
    565         x8 = x0 + x1;
    566         x0 -= x1;
    567         x1 = W6 * (x3 + x2) + 4;
    568         x2 = (x1 - (W2 + W6) * x2) >> 3;
    569         x3 = (x1 + (W2 - W6) * x3) >> 3;
    570         x1 = x4 + x6;
    571         x4 -= x6;
    572         x6 = x5 + x7;
    573         x5 -= x7;
    574 
    575         /* third stage */
    576         x7 = x8 + x3;
    577         x8 -= x3;
    578         x3 = x0 + x2;
    579         x0 -= x2;
    580         x2 = (181 * (x4 + x5) + 128) >> 8;
    581         x4 = (181 * (x4 - x5) + 128) >> 8;
    582 
    583         /* fourth stage */
    584         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
    585 
    586         res = (x7 + x1) >> 14;
    587         ADD_AND_CLIP1(res);
    588         res2 = (x3 + x2) >> 14;
    589         ADD_AND_CLIP2(res2);
    590         dst_word = (res2 << 8) | res;
    591         res = (x0 + x4) >> 14;
    592         ADD_AND_CLIP3(res);
    593         dst_word |= (res << 16);
    594         res = (x8 + x6) >> 14;
    595         ADD_AND_CLIP4(res);
    596         dst_word |= (res << 24);
    597         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
    598 
    599         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
    600 
    601         res = (x8 - x6) >> 14;
    602         ADD_AND_CLIP1(res);
    603         res2 = (x0 - x4) >> 14;
    604         ADD_AND_CLIP2(res2);
    605         dst_word = (res2 << 8) | res;
    606         res = (x3 - x2) >> 14;
    607         ADD_AND_CLIP3(res);
    608         dst_word |= (res << 16);
    609         res = (x7 - x1) >> 14;
    610         ADD_AND_CLIP4(res);
    611         dst_word |= (res << 24);
    612         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
    613     }
    614     /*----------------------------------------------------------------------------
    615     ; Return nothing or data or data pointer
    616     ----------------------------------------------------------------------------*/
    617     return;
    618 }
    619 
    620 void idctrow_intra(
    621     int16 *blk, PIXEL *comp, int width
    622 )
    623 {
    624     /*----------------------------------------------------------------------------
    625     ; Define all local variables
    626     ----------------------------------------------------------------------------*/
    627     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
    628     int i = 8;
    629     int offset = width;
    630     int32 word;
    631 
    632     /*----------------------------------------------------------------------------
    633     ; Function body here
    634     ----------------------------------------------------------------------------*/
    635     /* row (horizontal) IDCT
    636     *
    637     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
    638     * ( k + - ) * l ) l=0                      8          2
    639     *
    640     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
    641     while (i--)
    642     {
    643         x1 = (int32)blk[4] << 8;
    644         blk[4] = 0;
    645         x2 = blk[6];
    646         blk[6] = 0;
    647         x3 = blk[2];
    648         blk[2] = 0;
    649         x4 = blk[1];
    650         blk[1] = 0;
    651         x5 = blk[7];
    652         blk[7] = 0;
    653         x6 = blk[5];
    654         blk[5] = 0;
    655         x7 = blk[3];
    656         blk[3] = 0;
    657 #ifndef FAST_IDCT
    658         /* shortcut */  /* covered by idctrow1  01/9/2001 */
    659         if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
    660         {
    661             blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
    662             return;
    663         }
    664 #endif
    665         x0 = ((int32)blk[0] << 8) + 8192;
    666         blk[0] = 0;  /* for proper rounding in the fourth stage */
    667 
    668         /* first stage */
    669         x8 = W7 * (x4 + x5) + 4;
    670         x4 = (x8 + (W1 - W7) * x4) >> 3;
    671         x5 = (x8 - (W1 + W7) * x5) >> 3;
    672         x8 = W3 * (x6 + x7) + 4;
    673         x6 = (x8 - (W3 - W5) * x6) >> 3;
    674         x7 = (x8 - (W3 + W5) * x7) >> 3;
    675 
    676         /* second stage */
    677         x8 = x0 + x1;
    678         x0 -= x1;
    679         x1 = W6 * (x3 + x2) + 4;
    680         x2 = (x1 - (W2 + W6) * x2) >> 3;
    681         x3 = (x1 + (W2 - W6) * x3) >> 3;
    682         x1 = x4 + x6;
    683         x4 -= x6;
    684         x6 = x5 + x7;
    685         x5 -= x7;
    686 
    687         /* third stage */
    688         x7 = x8 + x3;
    689         x8 -= x3;
    690         x3 = x0 + x2;
    691         x0 -= x2;
    692         x2 = (181 * (x4 + x5) + 128) >> 8;
    693         x4 = (181 * (x4 - x5) + 128) >> 8;
    694 
    695         /* fourth stage */
    696         word = ((x7 + x1) >> 14);
    697         CLIP_RESULT(word)
    698 
    699         temp = ((x3 + x2) >> 14);
    700         CLIP_RESULT(temp)
    701         word = word | (temp << 8);
    702 
    703         temp = ((x0 + x4) >> 14);
    704         CLIP_RESULT(temp)
    705         word = word | (temp << 16);
    706 
    707         temp = ((x8 + x6) >> 14);
    708         CLIP_RESULT(temp)
    709         word = word | (temp << 24);
    710         *((int32*)(comp)) = word;
    711 
    712         word = ((x8 - x6) >> 14);
    713         CLIP_RESULT(word)
    714 
    715         temp = ((x0 - x4) >> 14);
    716         CLIP_RESULT(temp)
    717         word = word | (temp << 8);
    718 
    719         temp = ((x3 - x2) >> 14);
    720         CLIP_RESULT(temp)
    721         word = word | (temp << 16);
    722 
    723         temp = ((x7 - x1) >> 14);
    724         CLIP_RESULT(temp)
    725         word = word | (temp << 24);
    726         *((int32*)(comp + 4)) = word;
    727         comp += offset;
    728 
    729         blk += B_SIZE;
    730     }
    731     /*----------------------------------------------------------------------------
    732     ; Return nothing or data or data pointer
    733     ----------------------------------------------------------------------------*/
    734     return;
    735 }
    736 
    737 /*----------------------------------------------------------------------------
    738 ; End Function: idctrow
    739 ----------------------------------------------------------------------------*/
    740 
    741 
    742 /****************************************************************************/
    743 
    744 /*
    745 ------------------------------------------------------------------------------
    746  FUNCTION NAME: idctcol
    747 ------------------------------------------------------------------------------
    748  INPUT AND OUTPUT DEFINITIONS FOR idctcol
    749 
    750  Inputs:
    751     [input_variable_name] = [description of the input to module, its type
    752                  definition, and length (when applicable)]
    753 
    754  Local Stores/Buffers/Pointers Needed:
    755     [local_store_name] = [description of the local store, its type
    756                   definition, and length (when applicable)]
    757     [local_buffer_name] = [description of the local buffer, its type
    758                    definition, and length (when applicable)]
    759     [local_ptr_name] = [description of the local pointer, its type
    760                 definition, and length (when applicable)]
    761 
    762  Global Stores/Buffers/Pointers Needed:
    763     [global_store_name] = [description of the global store, its type
    764                    definition, and length (when applicable)]
    765     [global_buffer_name] = [description of the global buffer, its type
    766                 definition, and length (when applicable)]
    767     [global_ptr_name] = [description of the global pointer, its type
    768                  definition, and length (when applicable)]
    769 
    770  Outputs:
    771     [return_variable_name] = [description of data/pointer returned
    772                   by module, its type definition, and length
    773                   (when applicable)]
    774 
    775  Pointers and Buffers Modified:
    776     [variable_bfr_ptr] points to the [describe where the
    777       variable_bfr_ptr points to, its type definition, and length
    778       (when applicable)]
    779     [variable_bfr] contents are [describe the new contents of
    780       variable_bfr]
    781 
    782  Local Stores Modified:
    783     [local_store_name] = [describe new contents, its type
    784                   definition, and length (when applicable)]
    785 
    786  Global Stores Modified:
    787     [global_store_name] = [describe new contents, its type
    788                    definition, and length (when applicable)]
    789 
    790 ------------------------------------------------------------------------------
    791  FUNCTION DESCRIPTION FOR idctcol
    792 
    793 ------------------------------------------------------------------------------
    794  REQUIREMENTS FOR idctcol
    795 
    796 ------------------------------------------------------------------------------
    797  REFERENCES FOR idctcol
    798 
    799 ------------------------------------------------------------------------------
    800  PSEUDO-CODE FOR idctcol
    801 
    802 ------------------------------------------------------------------------------
    803  RESOURCES USED FOR idctcol
    804    When the code is written for a specific target processor the
    805      the resources used should be documented below.
    806 
    807  STACK USAGE: [stack count for this module] + [variable to represent
    808           stack usage for each subroutine called]
    809 
    810      where: [stack usage variable] = stack usage for [subroutine
    811          name] (see [filename].ext)
    812 
    813  DATA MEMORY USED: x words
    814 
    815  PROGRAM MEMORY USED: x words
    816 
    817  CLOCK CYCLES: [cycle count equation for this module] + [variable
    818            used to represent cycle count for each subroutine
    819            called]
    820 
    821      where: [cycle count variable] = cycle count for [subroutine
    822         name] (see [filename].ext)
    823 
    824 ------------------------------------------------------------------------------
    825 */
    826 
    827 /*----------------------------------------------------------------------------
    828 ; Function Code FOR idctcol
    829 ----------------------------------------------------------------------------*/
    830 void idctcol(
    831     int16 *blk
    832 )
    833 {
    834     /*----------------------------------------------------------------------------
    835     ; Define all local variables
    836     ----------------------------------------------------------------------------*/
    837     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    838 
    839     /*----------------------------------------------------------------------------
    840     ; Function body here
    841     ----------------------------------------------------------------------------*/
    842     /* column (vertical) IDCT
    843     *
    844     * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
    845     * cos( -- * ( k + - ) * l ) l=0                        8          2
    846     *
    847     * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
    848     x1 = (int32)blk[32] << 11;
    849     x2 = blk[48];
    850     x3 = blk[16];
    851     x4 = blk[8];
    852     x5 = blk[56];
    853     x6 = blk[40];
    854     x7 = blk[24];
    855 #ifndef FAST_IDCT
    856     /* shortcut */        /* covered by idctcolumn1  01/9/2001 */
    857     if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
    858     {
    859         blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
    860                                               = blk[0] << 3;
    861         return;
    862     }
    863 #endif
    864 
    865     x0 = ((int32)blk[0] << 11) + 128;
    866 
    867     /* first stage */
    868     x8 = W7 * (x4 + x5);
    869     x4 = x8 + (W1 - W7) * x4;
    870     x5 = x8 - (W1 + W7) * x5;
    871     x8 = W3 * (x6 + x7);
    872     x6 = x8 - (W3 - W5) * x6;
    873     x7 = x8 - (W3 + W5) * x7;
    874 
    875     /* second stage */
    876     x8 = x0 + x1;
    877     x0 -= x1;
    878     x1 = W6 * (x3 + x2);
    879     x2 = x1 - (W2 + W6) * x2;
    880     x3 = x1 + (W2 - W6) * x3;
    881     x1 = x4 + x6;
    882     x4 -= x6;
    883     x6 = x5 + x7;
    884     x5 -= x7;
    885 
    886     /* third stage */
    887     x7 = x8 + x3;
    888     x8 -= x3;
    889     x3 = x0 + x2;
    890     x0 -= x2;
    891     x2 = (181 * (x4 + x5) + 128) >> 8;
    892     x4 = (181 * (x4 - x5) + 128) >> 8;
    893 
    894     /* fourth stage */
    895     blk[0]    = (x7 + x1) >> 8;
    896     blk[8] = (x3 + x2) >> 8;
    897     blk[16] = (x0 + x4) >> 8;
    898     blk[24] = (x8 + x6) >> 8;
    899     blk[32] = (x8 - x6) >> 8;
    900     blk[40] = (x0 - x4) >> 8;
    901     blk[48] = (x3 - x2) >> 8;
    902     blk[56] = (x7 - x1) >> 8;
    903     /*----------------------------------------------------------------------------
    904     ; Return nothing or data or data pointer
    905     ----------------------------------------------------------------------------*/
    906     return;
    907 }
    908 /*----------------------------------------------------------------------------
    909 ;  End Function: idctcol
    910 ----------------------------------------------------------------------------*/
    911 
    912