Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 /*
     19 ------------------------------------------------------------------------------
     20  INPUT AND OUTPUT DEFINITIONS
     21 
     22  Inputs:
     23     [input_variable_name] = [description of the input to module, its type
     24                  definition, and length (when applicable)]
     25 
     26  Local Stores/Buffers/Pointers Needed:
     27     [local_store_name] = [description of the local store, its type
     28                   definition, and length (when applicable)]
     29     [local_buffer_name] = [description of the local buffer, its type
     30                    definition, and length (when applicable)]
     31     [local_ptr_name] = [description of the local pointer, its type
     32                 definition, and length (when applicable)]
     33 
     34  Global Stores/Buffers/Pointers Needed:
     35     [global_store_name] = [description of the global store, its type
     36                    definition, and length (when applicable)]
     37     [global_buffer_name] = [description of the global buffer, its type
     38                 definition, and length (when applicable)]
     39     [global_ptr_name] = [description of the global pointer, its type
     40                  definition, and length (when applicable)]
     41 
     42  Outputs:
     43     [return_variable_name] = [description of data/pointer returned
     44                   by module, its type definition, and length
     45                   (when applicable)]
     46 
     47  Pointers and Buffers Modified:
     48     [variable_bfr_ptr] points to the [describe where the
     49       variable_bfr_ptr points to, its type definition, and length
     50       (when applicable)]
     51     [variable_bfr] contents are [describe the new contents of
     52       variable_bfr]
     53 
     54  Local Stores Modified:
     55     [local_store_name] = [describe new contents, its type
     56                   definition, and length (when applicable)]
     57 
     58  Global Stores Modified:
     59     [global_store_name] = [describe new contents, its type
     60                    definition, and length (when applicable)]
     61 
     62 ------------------------------------------------------------------------------
     63  FUNCTION DESCRIPTION
     64 
     65 ------------------------------------------------------------------------------
     66  REQUIREMENTS
     67 
     68 ------------------------------------------------------------------------------
     69  REFERENCES
     70 
     71 ------------------------------------------------------------------------------
     72  PSEUDO-CODE
     73 
     74 ------------------------------------------------------------------------------
     75  RESOURCES USED
     76    When the code is written for a specific target processor the
     77      the resources used should be documented below.
     78 
     79  STACK USAGE: [stack count for this module] + [variable to represent
     80           stack usage for each subroutine called]
     81 
     82      where: [stack usage variable] = stack usage for [subroutine
     83          name] (see [filename].ext)
     84 
     85  DATA MEMORY USED: x words
     86 
     87  PROGRAM MEMORY USED: x words
     88 
     89  CLOCK CYCLES: [cycle count equation for this module] + [variable
     90            used to represent cycle count for each subroutine
     91            called]
     92 
     93      where: [cycle count variable] = cycle count for [subroutine
     94         name] (see [filename].ext)
     95 
     96 ------------------------------------------------------------------------------
     97 */
     98 
     99 /*----------------------------------------------------------------------------
    100 ; INCLUDES
    101 ----------------------------------------------------------------------------*/
    102 #include "mp4dec_lib.h"
    103 #include "idct.h"
    104 #include "motion_comp.h"
    105 
    106 #define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
    107 /*----------------------------------------------------------------------------
    108 ; MACROS
    109 ; Define module specific macros here
    110 ----------------------------------------------------------------------------*/
    111 
    112 /*----------------------------------------------------------------------------
    113 ; DEFINES
    114 ; Include all pre-processor statements here. Include conditional
    115 ; compile variables also.
    116 ----------------------------------------------------------------------------*/
    117 
    118 /*----------------------------------------------------------------------------
    119 ; LOCAL FUNCTION DEFINITIONS
    120 ; Function Prototype declaration
    121 ----------------------------------------------------------------------------*/
    122 /* private prototypes */
    123 static void idctrow(int16 *blk, uint8 *pred, uint8 *dst, int width);
    124 static void idctrow_intra(int16 *blk, PIXEL *, int width);
    125 static void idctcol(int16 *blk);
    126 
    127 #ifdef FAST_IDCT
    128 // mapping from nz_coefs to functions to be used
    129 
    130 
    131 // ARM4 does not allow global data when they are not constant hence
    132 // an array of function pointers cannot be considered as array of constants
    133 // (actual addresses are only known when the dll is loaded).
    134 // So instead of arrays of function pointers, we'll store here
    135 // arrays of rows or columns and then call the idct function
    136 // corresponding to such the row/column number:
    137 
    138 
    139 static void (*const idctcolVCA[10][4])(int16*) =
    140 {
    141     {&idctcol1, &idctcol0, &idctcol0, &idctcol0},
    142     {&idctcol1, &idctcol1, &idctcol0, &idctcol0},
    143     {&idctcol2, &idctcol1, &idctcol0, &idctcol0},
    144     {&idctcol3, &idctcol1, &idctcol0, &idctcol0},
    145     {&idctcol3, &idctcol2, &idctcol0, &idctcol0},
    146     {&idctcol3, &idctcol2, &idctcol1, &idctcol0},
    147     {&idctcol3, &idctcol2, &idctcol1, &idctcol1},
    148     {&idctcol3, &idctcol2, &idctcol2, &idctcol1},
    149     {&idctcol3, &idctcol3, &idctcol2, &idctcol1},
    150     {&idctcol4, &idctcol3, &idctcol2, &idctcol1}
    151 };
    152 
    153 
    154 static void (*const idctrowVCA[10])(int16*, uint8*, uint8*, int) =
    155 {
    156     &idctrow1,
    157     &idctrow2,
    158     &idctrow2,
    159     &idctrow2,
    160     &idctrow2,
    161     &idctrow3,
    162     &idctrow4,
    163     &idctrow4,
    164     &idctrow4,
    165     &idctrow4
    166 };
    167 
    168 
    169 static void (*const idctcolVCA2[16])(int16*) =
    170 {
    171     &idctcol0, &idctcol4, &idctcol3, &idctcol4,
    172     &idctcol2, &idctcol4, &idctcol3, &idctcol4,
    173     &idctcol1, &idctcol4, &idctcol3, &idctcol4,
    174     &idctcol2, &idctcol4, &idctcol3, &idctcol4
    175 };
    176 
    177 static void (*const idctrowVCA2[8])(int16*, uint8*, uint8*, int) =
    178 {
    179     &idctrow1, &idctrow4, &idctrow3, &idctrow4,
    180     &idctrow2, &idctrow4, &idctrow3, &idctrow4
    181 };
    182 
    183 static void (*const idctrowVCA_intra[10])(int16*, PIXEL *, int) =
    184 {
    185     &idctrow1_intra,
    186     &idctrow2_intra,
    187     &idctrow2_intra,
    188     &idctrow2_intra,
    189     &idctrow2_intra,
    190     &idctrow3_intra,
    191     &idctrow4_intra,
    192     &idctrow4_intra,
    193     &idctrow4_intra,
    194     &idctrow4_intra
    195 };
    196 
    197 static void (*const idctrowVCA2_intra[8])(int16*, PIXEL *, int) =
    198 {
    199     &idctrow1_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra,
    200     &idctrow2_intra, &idctrow4_intra, &idctrow3_intra, &idctrow4_intra
    201 };
    202 #endif
    203 
    204 /*----------------------------------------------------------------------------
    205 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
    206 ; Variable declaration - defined here and used outside this module
    207 ----------------------------------------------------------------------------*/
    208 
    209 /*----------------------------------------------------------------------------
    210 ; EXTERNAL FUNCTION REFERENCES
    211 ; Declare functions defined elsewhere and referenced in this module
    212 ----------------------------------------------------------------------------*/
    213 
    214 /*----------------------------------------------------------------------------
    215 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
    216 ; Declare variables used in this module but defined elsewhere
    217 ----------------------------------------------------------------------------*/
    218 
    219 /*----------------------------------------------------------------------------
    220 ; FUNCTION CODE
    221 ----------------------------------------------------------------------------*/
    222 void MBlockIDCT(VideoDecData *video)
    223 {
    224     Vop *currVop = video->currVop;
    225     MacroBlock *mblock = video->mblock;
    226     PIXEL *c_comp;
    227     PIXEL *cu_comp;
    228     PIXEL *cv_comp;
    229     int x_pos = video->mbnum_col;
    230     int y_pos = video->mbnum_row;
    231     int width, width_uv;
    232     int32 offset;
    233     width = video->width;
    234     width_uv = width >> 1;
    235     offset = (int32)(y_pos << 4) * width + (x_pos << 4);
    236 
    237     c_comp  = currVop->yChan + offset;
    238     cu_comp = currVop->uChan + (offset >> 2) + (x_pos << 2);
    239     cv_comp = currVop->vChan + (offset >> 2) + (x_pos << 2);
    240 
    241     BlockIDCT_intra(mblock, c_comp, 0, width);
    242     BlockIDCT_intra(mblock, c_comp + 8, 1, width);
    243     BlockIDCT_intra(mblock, c_comp + (width << 3), 2, width);
    244     BlockIDCT_intra(mblock, c_comp + (width << 3) + 8, 3, width);
    245     BlockIDCT_intra(mblock, cu_comp, 4, width_uv);
    246     BlockIDCT_intra(mblock, cv_comp, 5, width_uv);
    247 }
    248 
    249 
    250 void BlockIDCT_intra(
    251     MacroBlock *mblock, PIXEL *c_comp, int comp, int width)
    252 {
    253     /*----------------------------------------------------------------------------
    254     ; Define all local variables
    255     ----------------------------------------------------------------------------*/
    256     int16 *coeff_in = mblock->block[comp];
    257 #ifdef INTEGER_IDCT
    258 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
    259     int i, bmapr;
    260     int nz_coefs = mblock->no_coeff[comp];
    261     uint8 *bitmapcol = mblock->bitmapcol[comp];
    262     uint8 bitmaprow = mblock->bitmaprow[comp];
    263 
    264     /*----------------------------------------------------------------------------
    265     ; Function body here
    266     ----------------------------------------------------------------------------*/
    267     if (nz_coefs <= 10)
    268     {
    269         bmapr = (nz_coefs - 1);
    270 
    271         (*(idctcolVCA[bmapr]))(coeff_in);
    272         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
    273         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
    274         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
    275 
    276         (*idctrowVCA_intra[nz_coefs-1])(coeff_in, c_comp, width);
    277     }
    278     else
    279     {
    280         i = 8;
    281         while (i--)
    282         {
    283             bmapr = (int)bitmapcol[i];
    284             if (bmapr)
    285             {
    286                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
    287                 {
    288                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
    289                 }
    290                 else
    291                 {
    292                     idctcol(coeff_in + i);
    293                 }
    294             }
    295         }
    296         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
    297         {
    298             bitmaprow >>= 4;
    299             (*(idctrowVCA2_intra[(int)bitmaprow]))(coeff_in, c_comp, width);
    300         }
    301         else
    302         {
    303             idctrow_intra(coeff_in, c_comp, width);
    304         }
    305     }
    306 #else
    307     void idct_intra(int *block, uint8 *comp, int width);
    308     idct_intra(coeff_in, c_comp, width);
    309 #endif
    310 #else
    311     void idctref_intra(int *block, uint8 *comp, int width);
    312     idctref_intra(coeff_in, c_comp, width);
    313 #endif
    314 
    315 
    316     /*----------------------------------------------------------------------------
    317     ; Return nothing or data or data pointer
    318     ----------------------------------------------------------------------------*/
    319     return;
    320 }
    321 
    322 /*  08/04/05, no residue, just copy from pred to output */
    323 void Copy_Blk_to_Vop(uint8 *dst, uint8 *pred, int width)
    324 {
    325     /* copy 4 bytes at a time */
    326     width -= 4;
    327     *((uint32*)dst) = *((uint32*)pred);
    328     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    329     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    330     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    331     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    332     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    333     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    334     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    335     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    336     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    337     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    338     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    339     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    340     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    341     *((uint32*)(dst += width)) = *((uint32*)(pred += 12));
    342     *((uint32*)(dst += 4)) = *((uint32*)(pred += 4));
    343 
    344     return ;
    345 }
    346 
    347 /*  08/04/05 compute IDCT and add prediction at the end  */
    348 void BlockIDCT(
    349     uint8 *dst,  /* destination */
    350     uint8 *pred, /* prediction block, pitch 16 */
    351     int16   *coeff_in,  /* DCT data, size 64 */
    352     int width, /* width of dst */
    353     int nz_coefs,
    354     uint8 *bitmapcol,
    355     uint8 bitmaprow
    356 )
    357 {
    358 #ifdef INTEGER_IDCT
    359 #ifdef FAST_IDCT  /* VCA IDCT using nzcoefs and bitmaps*/
    360     int i, bmapr;
    361     /*----------------------------------------------------------------------------
    362     ; Function body here
    363     ----------------------------------------------------------------------------*/
    364     if (nz_coefs <= 10)
    365     {
    366         bmapr = (nz_coefs - 1);
    367         (*(idctcolVCA[bmapr]))(coeff_in);
    368         (*(idctcolVCA[bmapr][1]))(coeff_in + 1);
    369         (*(idctcolVCA[bmapr][2]))(coeff_in + 2);
    370         (*(idctcolVCA[bmapr][3]))(coeff_in + 3);
    371 
    372         (*idctrowVCA[nz_coefs-1])(coeff_in, pred, dst, width);
    373         return ;
    374     }
    375     else
    376     {
    377         i = 8;
    378 
    379         while (i--)
    380         {
    381             bmapr = (int)bitmapcol[i];
    382             if (bmapr)
    383             {
    384                 if ((bmapr&0xf) == 0)         /*  07/18/01 */
    385                 {
    386                     (*(idctcolVCA2[bmapr>>4]))(coeff_in + i);
    387                 }
    388                 else
    389                 {
    390                     idctcol(coeff_in + i);
    391                 }
    392             }
    393         }
    394         if ((bitmapcol[4] | bitmapcol[5] | bitmapcol[6] | bitmapcol[7]) == 0)
    395         {
    396             (*(idctrowVCA2[bitmaprow>>4]))(coeff_in, pred, dst, width);
    397         }
    398         else
    399         {
    400             idctrow(coeff_in, pred, dst, width);
    401         }
    402         return ;
    403     }
    404 #else // FAST_IDCT
    405     void idct(int *block, uint8 *pred, uint8 *dst, int width);
    406     idct(coeff_in, pred, dst, width);
    407     return;
    408 #endif // FAST_IDCT
    409 #else // INTEGER_IDCT
    410     void idctref(int *block, uint8 *pred, uint8 *dst, int width);
    411     idctref(coeff_in, pred, dst, width);
    412     return;
    413 #endif // INTEGER_IDCT
    414 
    415 }
    416 /*----------------------------------------------------------------------------
    417 ;  End Function: block_idct
    418 ----------------------------------------------------------------------------*/
    419 
    420 
    421 /****************************************************************************/
    422 
    423 /*
    424 ------------------------------------------------------------------------------
    425  FUNCTION NAME: idctrow
    426 ------------------------------------------------------------------------------
    427  INPUT AND OUTPUT DEFINITIONS FOR idctrow
    428 
    429  Inputs:
    430     [input_variable_name] = [description of the input to module, its type
    431                  definition, and length (when applicable)]
    432 
    433  Local Stores/Buffers/Pointers Needed:
    434     [local_store_name] = [description of the local store, its type
    435                   definition, and length (when applicable)]
    436     [local_buffer_name] = [description of the local buffer, its type
    437                    definition, and length (when applicable)]
    438     [local_ptr_name] = [description of the local pointer, its type
    439                 definition, and length (when applicable)]
    440 
    441  Global Stores/Buffers/Pointers Needed:
    442     [global_store_name] = [description of the global store, its type
    443                    definition, and length (when applicable)]
    444     [global_buffer_name] = [description of the global buffer, its type
    445                 definition, and length (when applicable)]
    446     [global_ptr_name] = [description of the global pointer, its type
    447                  definition, and length (when applicable)]
    448 
    449  Outputs:
    450     [return_variable_name] = [description of data/pointer returned
    451                   by module, its type definition, and length
    452                   (when applicable)]
    453 
    454  Pointers and Buffers Modified:
    455     [variable_bfr_ptr] points to the [describe where the
    456       variable_bfr_ptr points to, its type definition, and length
    457       (when applicable)]
    458     [variable_bfr] contents are [describe the new contents of
    459       variable_bfr]
    460 
    461  Local Stores Modified:
    462     [local_store_name] = [describe new contents, its type
    463                   definition, and length (when applicable)]
    464 
    465  Global Stores Modified:
    466     [global_store_name] = [describe new contents, its type
    467                    definition, and length (when applicable)]
    468 
    469 ------------------------------------------------------------------------------
    470  FUNCTION DESCRIPTION FOR idctrow
    471 
    472 ------------------------------------------------------------------------------
    473  REQUIREMENTS FOR idctrow
    474 
    475 ------------------------------------------------------------------------------
    476  REFERENCES FOR idctrow
    477 
    478 ------------------------------------------------------------------------------
    479  PSEUDO-CODE FOR idctrow
    480 
    481 ------------------------------------------------------------------------------
    482  RESOURCES USED FOR idctrow
    483    When the code is written for a specific target processor the
    484      the resources used should be documented below.
    485 
    486  STACK USAGE: [stack count for this module] + [variable to represent
    487           stack usage for each subroutine called]
    488 
    489      where: [stack usage variable] = stack usage for [subroutine
    490          name] (see [filename].ext)
    491 
    492  DATA MEMORY USED: x words
    493 
    494  PROGRAM MEMORY USED: x words
    495 
    496  CLOCK CYCLES: [cycle count equation for this module] + [variable
    497            used to represent cycle count for each subroutine
    498            called]
    499 
    500      where: [cycle count variable] = cycle count for [subroutine
    501         name] (see [filename].ext)
    502 
    503 ------------------------------------------------------------------------------
    504 */
    505 
    506 /*----------------------------------------------------------------------------
    507 ; Function Code FOR idctrow
    508 ----------------------------------------------------------------------------*/
    509 void idctrow(
    510     int16 *blk, uint8 *pred, uint8 *dst, int width
    511 )
    512 {
    513     /*----------------------------------------------------------------------------
    514     ; Define all local variables
    515     ----------------------------------------------------------------------------*/
    516     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    517     int i = 8;
    518     uint32 pred_word, dst_word;
    519     int res, res2;
    520 
    521     /*----------------------------------------------------------------------------
    522     ; Function body here
    523     ----------------------------------------------------------------------------*/
    524     /* row (horizontal) IDCT
    525     *
    526     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
    527     * ( k + - ) * l ) l=0                      8          2
    528     *
    529     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
    530 
    531     /* preset the offset, such that we can take advantage pre-offset addressing mode   */
    532     width -= 4;
    533     dst -= width;
    534     pred -= 12;
    535     blk -= 8;
    536 
    537     while (i--)
    538     {
    539         x1 = (int32)blk[12] << 8;
    540         blk[12] = 0;
    541         x2 = blk[14];
    542         blk[14] = 0;
    543         x3 = blk[10];
    544         blk[10] = 0;
    545         x4 = blk[9];
    546         blk[9] = 0;
    547         x5 = blk[15];
    548         blk[15] = 0;
    549         x6 = blk[13];
    550         blk[13] = 0;
    551         x7 = blk[11];
    552         blk[11] = 0;
    553         x0 = ((*(blk += 8)) << 8) + 8192;
    554         blk[0] = 0;   /* for proper rounding in the fourth stage */
    555 
    556         /* first stage */
    557         x8 = W7 * (x4 + x5) + 4;
    558         x4 = (x8 + (W1 - W7) * x4) >> 3;
    559         x5 = (x8 - (W1 + W7) * x5) >> 3;
    560         x8 = W3 * (x6 + x7) + 4;
    561         x6 = (x8 - (W3 - W5) * x6) >> 3;
    562         x7 = (x8 - (W3 + W5) * x7) >> 3;
    563 
    564         /* second stage */
    565         x8 = x0 + x1;
    566         x0 -= x1;
    567         x1 = W6 * (x3 + x2) + 4;
    568         x2 = (x1 - (W2 + W6) * x2) >> 3;
    569         x3 = (x1 + (W2 - W6) * x3) >> 3;
    570         x1 = x4 + x6;
    571         x4 -= x6;
    572         x6 = x5 + x7;
    573         x5 -= x7;
    574 
    575         /* third stage */
    576         x7 = x8 + x3;
    577         x8 -= x3;
    578         x3 = x0 + x2;
    579         x0 -= x2;
    580         x2 = (181 * (x4 + x5) + 128) >> 8;
    581         x4 = (181 * (x4 - x5) + 128) >> 8;
    582 
    583         /* fourth stage */
    584         pred_word = *((uint32*)(pred += 12)); /* read 4 bytes from pred */
    585 
    586         res = (x7 + x1) >> 14;
    587         ADD_AND_CLIP1(res);
    588         res2 = (x3 + x2) >> 14;
    589         ADD_AND_CLIP2(res2);
    590         dst_word = (res2 << 8) | res;
    591         res = (x0 + x4) >> 14;
    592         ADD_AND_CLIP3(res);
    593         dst_word |= (res << 16);
    594         res = (x8 + x6) >> 14;
    595         ADD_AND_CLIP4(res);
    596         dst_word |= (res << 24);
    597         *((uint32*)(dst += width)) = dst_word; /* save 4 bytes to dst */
    598 
    599         pred_word = *((uint32*)(pred += 4)); /* read 4 bytes from pred */
    600 
    601         res = (x8 - x6) >> 14;
    602         ADD_AND_CLIP1(res);
    603         res2 = (x0 - x4) >> 14;
    604         ADD_AND_CLIP2(res2);
    605         dst_word = (res2 << 8) | res;
    606         res = (x3 - x2) >> 14;
    607         ADD_AND_CLIP3(res);
    608         dst_word |= (res << 16);
    609         res = (x7 - x1) >> 14;
    610         ADD_AND_CLIP4(res);
    611         dst_word |= (res << 24);
    612         *((uint32*)(dst += 4)) = dst_word; /* save 4 bytes to dst */
    613     }
    614     /*----------------------------------------------------------------------------
    615     ; Return nothing or data or data pointer
    616     ----------------------------------------------------------------------------*/
    617     return;
    618 }
    619 
    620 __attribute__((no_sanitize("signed-integer-overflow")))
    621 void idctrow_intra(
    622     int16 *blk, PIXEL *comp, int width
    623 )
    624 {
    625     /*----------------------------------------------------------------------------
    626     ; Define all local variables
    627     ----------------------------------------------------------------------------*/
    628     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8, temp;
    629     int i = 8;
    630     int offset = width;
    631     int32 word;
    632 
    633     /*----------------------------------------------------------------------------
    634     ; Function body here
    635     ----------------------------------------------------------------------------*/
    636     /* row (horizontal) IDCT
    637     *
    638     * 7                       pi         1 dst[k] = sum c[l] * src[l] * cos( -- *
    639     * ( k + - ) * l ) l=0                      8          2
    640     *
    641     * where: c[0]    = 128 c[1..7] = 128*sqrt(2) */
    642     while (i--)
    643     {
    644         x1 = (int32)blk[4] << 8;
    645         blk[4] = 0;
    646         x2 = blk[6];
    647         blk[6] = 0;
    648         x3 = blk[2];
    649         blk[2] = 0;
    650         x4 = blk[1];
    651         blk[1] = 0;
    652         x5 = blk[7];
    653         blk[7] = 0;
    654         x6 = blk[5];
    655         blk[5] = 0;
    656         x7 = blk[3];
    657         blk[3] = 0;
    658 #ifndef FAST_IDCT
    659         /* shortcut */  /* covered by idctrow1  01/9/2001 */
    660         if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
    661         {
    662             blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = (blk[0] + 32) >> 6;
    663             return;
    664         }
    665 #endif
    666         x0 = ((int32)blk[0] << 8) + 8192;
    667         blk[0] = 0;  /* for proper rounding in the fourth stage */
    668 
    669         /* first stage */
    670         x8 = W7 * (x4 + x5) + 4;
    671         x4 = (x8 + (W1 - W7) * x4) >> 3;
    672         x5 = (x8 - (W1 + W7) * x5) >> 3;
    673         x8 = W3 * (x6 + x7) + 4;
    674         x6 = (x8 - (W3 - W5) * x6) >> 3;
    675         x7 = (x8 - (W3 + W5) * x7) >> 3;
    676 
    677         /* second stage */
    678         x8 = x0 + x1;
    679         x0 -= x1;
    680         x1 = W6 * (x3 + x2) + 4;
    681         x2 = (x1 - (W2 + W6) * x2) >> 3;
    682         x3 = (x1 + (W2 - W6) * x3) >> 3;
    683         x1 = x4 + x6;
    684         x4 -= x6;
    685         x6 = x5 + x7;
    686         x5 -= x7;
    687 
    688         /* third stage */
    689         x7 = x8 + x3;
    690         x8 -= x3;
    691         x3 = x0 + x2;
    692         x0 -= x2;
    693         x2 = (181 * (x4 + x5) + 128) >> 8;
    694         x4 = (181 * (x4 - x5) + 128) >> 8;
    695 
    696         /* fourth stage */
    697         word = ((x7 + x1) >> 14);
    698         CLIP_RESULT(word)
    699 
    700         temp = ((x3 + x2) >> 14);
    701         CLIP_RESULT(temp)
    702         word = word | (temp << 8);
    703 
    704         temp = ((x0 + x4) >> 14);
    705         CLIP_RESULT(temp)
    706         word = word | (temp << 16);
    707 
    708         temp = ((x8 + x6) >> 14);
    709         CLIP_RESULT(temp)
    710         word = word | (temp << 24);
    711         *((int32*)(comp)) = word;
    712 
    713         word = ((x8 - x6) >> 14);
    714         CLIP_RESULT(word)
    715 
    716         temp = ((x0 - x4) >> 14);
    717         CLIP_RESULT(temp)
    718         word = word | (temp << 8);
    719 
    720         temp = ((x3 - x2) >> 14);
    721         CLIP_RESULT(temp)
    722         word = word | (temp << 16);
    723 
    724         temp = ((x7 - x1) >> 14);
    725         CLIP_RESULT(temp)
    726         word = word | (temp << 24);
    727         *((int32*)(comp + 4)) = word;
    728         comp += offset;
    729 
    730         blk += B_SIZE;
    731     }
    732     /*----------------------------------------------------------------------------
    733     ; Return nothing or data or data pointer
    734     ----------------------------------------------------------------------------*/
    735     return;
    736 }
    737 
    738 /*----------------------------------------------------------------------------
    739 ; End Function: idctrow
    740 ----------------------------------------------------------------------------*/
    741 
    742 
    743 /****************************************************************************/
    744 
    745 /*
    746 ------------------------------------------------------------------------------
    747  FUNCTION NAME: idctcol
    748 ------------------------------------------------------------------------------
    749  INPUT AND OUTPUT DEFINITIONS FOR idctcol
    750 
    751  Inputs:
    752     [input_variable_name] = [description of the input to module, its type
    753                  definition, and length (when applicable)]
    754 
    755  Local Stores/Buffers/Pointers Needed:
    756     [local_store_name] = [description of the local store, its type
    757                   definition, and length (when applicable)]
    758     [local_buffer_name] = [description of the local buffer, its type
    759                    definition, and length (when applicable)]
    760     [local_ptr_name] = [description of the local pointer, its type
    761                 definition, and length (when applicable)]
    762 
    763  Global Stores/Buffers/Pointers Needed:
    764     [global_store_name] = [description of the global store, its type
    765                    definition, and length (when applicable)]
    766     [global_buffer_name] = [description of the global buffer, its type
    767                 definition, and length (when applicable)]
    768     [global_ptr_name] = [description of the global pointer, its type
    769                  definition, and length (when applicable)]
    770 
    771  Outputs:
    772     [return_variable_name] = [description of data/pointer returned
    773                   by module, its type definition, and length
    774                   (when applicable)]
    775 
    776  Pointers and Buffers Modified:
    777     [variable_bfr_ptr] points to the [describe where the
    778       variable_bfr_ptr points to, its type definition, and length
    779       (when applicable)]
    780     [variable_bfr] contents are [describe the new contents of
    781       variable_bfr]
    782 
    783  Local Stores Modified:
    784     [local_store_name] = [describe new contents, its type
    785                   definition, and length (when applicable)]
    786 
    787  Global Stores Modified:
    788     [global_store_name] = [describe new contents, its type
    789                    definition, and length (when applicable)]
    790 
    791 ------------------------------------------------------------------------------
    792  FUNCTION DESCRIPTION FOR idctcol
    793 
    794 ------------------------------------------------------------------------------
    795  REQUIREMENTS FOR idctcol
    796 
    797 ------------------------------------------------------------------------------
    798  REFERENCES FOR idctcol
    799 
    800 ------------------------------------------------------------------------------
    801  PSEUDO-CODE FOR idctcol
    802 
    803 ------------------------------------------------------------------------------
    804  RESOURCES USED FOR idctcol
    805    When the code is written for a specific target processor the
    806      the resources used should be documented below.
    807 
    808  STACK USAGE: [stack count for this module] + [variable to represent
    809           stack usage for each subroutine called]
    810 
    811      where: [stack usage variable] = stack usage for [subroutine
    812          name] (see [filename].ext)
    813 
    814  DATA MEMORY USED: x words
    815 
    816  PROGRAM MEMORY USED: x words
    817 
    818  CLOCK CYCLES: [cycle count equation for this module] + [variable
    819            used to represent cycle count for each subroutine
    820            called]
    821 
    822      where: [cycle count variable] = cycle count for [subroutine
    823         name] (see [filename].ext)
    824 
    825 ------------------------------------------------------------------------------
    826 */
    827 
    828 /*----------------------------------------------------------------------------
    829 ; Function Code FOR idctcol
    830 ----------------------------------------------------------------------------*/
    831 void idctcol(
    832     int16 *blk
    833 )
    834 {
    835     /*----------------------------------------------------------------------------
    836     ; Define all local variables
    837     ----------------------------------------------------------------------------*/
    838     int32 x0, x1, x2, x3, x4, x5, x6, x7, x8;
    839 
    840     /*----------------------------------------------------------------------------
    841     ; Function body here
    842     ----------------------------------------------------------------------------*/
    843     /* column (vertical) IDCT
    844     *
    845     * 7                         pi         1 dst[8*k] = sum c[l] * src[8*l] *
    846     * cos( -- * ( k + - ) * l ) l=0                        8          2
    847     *
    848     * where: c[0]    = 1/1024 c[1..7] = (1/1024)*sqrt(2) */
    849     x1 = (int32)blk[32] << 11;
    850     x2 = blk[48];
    851     x3 = blk[16];
    852     x4 = blk[8];
    853     x5 = blk[56];
    854     x6 = blk[40];
    855     x7 = blk[24];
    856 #ifndef FAST_IDCT
    857     /* shortcut */        /* covered by idctcolumn1  01/9/2001 */
    858     if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
    859     {
    860         blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56]
    861                                               = blk[0] << 3;
    862         return;
    863     }
    864 #endif
    865 
    866     x0 = ((int32)blk[0] << 11) + 128;
    867 
    868     /* first stage */
    869     x8 = W7 * (x4 + x5);
    870     x4 = x8 + (W1 - W7) * x4;
    871     x5 = x8 - (W1 + W7) * x5;
    872     x8 = W3 * (x6 + x7);
    873     x6 = x8 - (W3 - W5) * x6;
    874     x7 = x8 - (W3 + W5) * x7;
    875 
    876     /* second stage */
    877     x8 = x0 + x1;
    878     x0 -= x1;
    879     x1 = W6 * (x3 + x2);
    880     x2 = x1 - (W2 + W6) * x2;
    881     x3 = x1 + (W2 - W6) * x3;
    882     x1 = x4 + x6;
    883     x4 -= x6;
    884     x6 = x5 + x7;
    885     x5 -= x7;
    886 
    887     /* third stage */
    888     x7 = x8 + x3;
    889     x8 -= x3;
    890     x3 = x0 + x2;
    891     x0 -= x2;
    892     x2 = (181 * (x4 + x5) + 128) >> 8;
    893     x4 = (181 * (x4 - x5) + 128) >> 8;
    894 
    895     /* fourth stage */
    896     blk[0]    = (x7 + x1) >> 8;
    897     blk[8] = (x3 + x2) >> 8;
    898     blk[16] = (x0 + x4) >> 8;
    899     blk[24] = (x8 + x6) >> 8;
    900     blk[32] = (x8 - x6) >> 8;
    901     blk[40] = (x0 - x4) >> 8;
    902     blk[48] = (x3 - x2) >> 8;
    903     blk[56] = (x7 - x1) >> 8;
    904     /*----------------------------------------------------------------------------
    905     ; Return nothing or data or data pointer
    906     ----------------------------------------------------------------------------*/
    907     return;
    908 }
    909 /*----------------------------------------------------------------------------
    910 ;  End Function: idctcol
    911 ----------------------------------------------------------------------------*/
    912 
    913