Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "avcenc_lib.h"
     19 
     20 /* subtract with the prediction and do transformation */
     21 void trans(uint8 *cur, int pitch, uint8 *predBlock, int16 *dataBlock)
     22 {
     23     int16 *ptr = dataBlock;
     24     int r0, r1, r2, r3, j;
     25     int curpitch = (uint)pitch >> 16;
     26     int predpitch = (pitch & 0xFFFF);
     27 
     28     /* horizontal */
     29     j = 4;
     30     while (j > 0)
     31     {
     32         /* calculate the residue first */
     33         r0 = cur[0] - predBlock[0];
     34         r1 = cur[1] - predBlock[1];
     35         r2 = cur[2] - predBlock[2];
     36         r3 = cur[3] - predBlock[3];
     37 
     38         r0 += r3;           //ptr[0] + ptr[3];
     39         r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
     40         r1 += r2;           //ptr[1] + ptr[2];
     41         r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
     42 
     43         ptr[0] = r0 + r1;
     44         ptr[2] = r0 - r1;
     45         ptr[1] = (r3 << 1) + r2;
     46         ptr[3] = r3 - (r2 << 1);
     47 
     48         ptr += 16;
     49         predBlock += predpitch;
     50         cur += curpitch;
     51         j--;
     52     }
     53     /* vertical */
     54     ptr = dataBlock;
     55     j = 4;
     56     while (j > 0)
     57     {
     58         r0 = ptr[0] + ptr[48];
     59         r3 = ptr[0] - ptr[48];
     60         r1 = ptr[16] + ptr[32];
     61         r2 = ptr[16] - ptr[32];
     62 
     63         ptr[0] = r0 + r1;
     64         ptr[32] = r0 - r1;
     65         ptr[16] = (r3 << 1) + r2;
     66         ptr[48] = r3 - (r2 << 1);
     67 
     68         ptr++;
     69         j--;
     70     }
     71 
     72     return ;
     73 }
     74 
     75 
     76 /* do residue transform quant invquant, invtrans and write output out */
     77 int dct_luma(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org, int *coef_cost)
     78 {
     79     AVCCommonObj *video = encvid->common;
     80     int org_pitch = encvid->currInput->pitch;
     81     int pitch = video->currPic->pitch;
     82     int16 *coef = video->block;
     83     uint8 *pred = video->pred_block; // size 16 for a 4x4 block
     84     int pred_pitch = video->pred_pitch;
     85     int r0, r1, r2, r3, j, k, idx;
     86     int *level, *run;
     87     int Qq, Rq, q_bits, qp_const, quant;
     88     int data, lev, zero_run;
     89     int numcoeff;
     90 
     91     coef += ((blkidx & 0x3) << 2) + ((blkidx >> 2) << 6); /* point to the 4x4 block */
     92 
     93     /* first take a 4x4 transform */
     94     /* horizontal */
     95     j = 4;
     96     while (j > 0)
     97     {
     98         /* calculate the residue first */
     99         r0 = org[0] - pred[0];   /* OPTIMIZEABLE */
    100         r1 = org[1] - pred[1];
    101         r2 = org[2] - pred[2];
    102         r3 = org[3] - pred[3];
    103 
    104         r0 += r3;           //ptr[0] + ptr[3];
    105         r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
    106         r1 += r2;           //ptr[1] + ptr[2];
    107         r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
    108 
    109         coef[0] = r0 + r1;
    110         coef[2] = r0 - r1;
    111         coef[1] = (r3 << 1) + r2;
    112         coef[3] = r3 - (r2 << 1);
    113 
    114         coef += 16;
    115         org += org_pitch;
    116         pred += pred_pitch;
    117         j--;
    118     }
    119     /* vertical */
    120     coef -= 64;
    121     pred -= (pred_pitch << 2);
    122     j = 4;
    123     while (j > 0)   /* OPTIMIZABLE */
    124     {
    125         r0 = coef[0] + coef[48];
    126         r3 = coef[0] - coef[48];
    127         r1 = coef[16] + coef[32];
    128         r2 = coef[16] - coef[32];
    129 
    130         coef[0] = r0 + r1;
    131         coef[32] = r0 - r1;
    132         coef[16] = (r3 << 1) + r2;
    133         coef[48] = r3 - (r2 << 1);
    134 
    135         coef++;
    136         j--;
    137     }
    138 
    139     coef -= 4;
    140 
    141     /* quant */
    142     level = encvid->level[ras2dec[blkidx]];
    143     run = encvid->run[ras2dec[blkidx]];
    144 
    145     Rq = video->QPy_mod_6;
    146     Qq = video->QPy_div_6;
    147     qp_const = encvid->qp_const;
    148     q_bits = 15 + Qq;
    149 
    150     zero_run = 0;
    151     numcoeff = 0;
    152     for (k = 0; k < 16; k++)
    153     {
    154         idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
    155         data = coef[idx];
    156         quant = quant_coef[Rq][k];
    157         if (data > 0)
    158         {
    159             lev = data * quant + qp_const;
    160         }
    161         else
    162         {
    163             lev = -data * quant + qp_const;
    164         }
    165         lev >>= q_bits;
    166         if (lev)
    167         {
    168             *coef_cost += ((lev > 1) ? MAX_VALUE : COEFF_COST[DISABLE_THRESHOLDING][zero_run]);
    169 
    170             /* dequant */
    171             quant = dequant_coefres[Rq][k];
    172             if (data > 0)
    173             {
    174                 level[numcoeff] = lev;
    175                 coef[idx] = (lev * quant) << Qq;
    176             }
    177             else
    178             {
    179                 level[numcoeff] = -lev;
    180                 coef[idx] = (-lev * quant) << Qq;
    181             }
    182             run[numcoeff++] = zero_run;
    183             zero_run = 0;
    184         }
    185         else
    186         {
    187             zero_run++;
    188             coef[idx] = 0;
    189         }
    190     }
    191 
    192     if (video->currMB->mb_intra) // only do inverse transform with intra block
    193     {
    194         if (numcoeff) /* then do inverse transform */
    195         {
    196             for (j = 4; j > 0; j--) /* horizontal */
    197             {
    198                 r0 = coef[0] + coef[2];
    199                 r1 = coef[0] - coef[2];
    200                 r2 = (coef[1] >> 1) - coef[3];
    201                 r3 = coef[1] + (coef[3] >> 1);
    202 
    203                 coef[0] = r0 + r3;
    204                 coef[1] = r1 + r2;
    205                 coef[2] = r1 - r2;
    206                 coef[3] = r0 - r3;
    207 
    208                 coef += 16;
    209             }
    210 
    211             coef -= 64;
    212             for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
    213             {
    214                 r0 = coef[0] + coef[32];
    215                 r1 = coef[0] - coef[32];
    216                 r2 = (coef[16] >> 1) - coef[48];
    217                 r3 = coef[16] + (coef[48] >> 1);
    218                 r0 += r3;
    219                 r3 = (r0 - (r3 << 1)); /* r0-r3 */
    220                 r1 += r2;
    221                 r2 = (r1 - (r2 << 1)); /* r1-r2 */
    222                 r0 += 32;
    223                 r1 += 32;
    224                 r2 += 32;
    225                 r3 += 32;
    226 
    227                 r0 = pred[0] + (r0 >> 6);
    228                 if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
    229                 r1 = *(pred += pred_pitch) + (r1 >> 6);
    230                 if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
    231                 r2 = *(pred += pred_pitch) + (r2 >> 6);
    232                 if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
    233                 r3 = pred[pred_pitch] + (r3 >> 6);
    234                 if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
    235 
    236                 *cur = r0;
    237                 *(cur += pitch) = r1;
    238                 *(cur += pitch) = r2;
    239                 cur[pitch] = r3;
    240                 cur -= (pitch << 1);
    241                 cur++;
    242                 pred -= (pred_pitch << 1);
    243                 pred++;
    244                 coef++;
    245             }
    246         }
    247         else  // copy from pred to cur
    248         {
    249             *((uint32*)cur) = *((uint32*)pred);
    250             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
    251             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
    252             *((uint32*)(cur += pitch)) = *((uint32*)(pred += pred_pitch));
    253         }
    254     }
    255 
    256     return numcoeff;
    257 }
    258 
    259 
    260 void MBInterIdct(AVCCommonObj *video, uint8 *curL, AVCMacroblock *currMB, int picPitch)
    261 {
    262     int16 *coef, *coef8 = video->block;
    263     uint8 *cur;  // the same as curL
    264     int b8, b4;
    265     int r0, r1, r2, r3, j, blkidx;
    266 
    267     for (b8 = 0; b8 < 4; b8++)
    268     {
    269         cur = curL;
    270         coef = coef8;
    271 
    272         if (currMB->CBP&(1 << b8))
    273         {
    274             for (b4 = 0; b4 < 4; b4++)
    275             {
    276                 blkidx = blkIdx2blkXY[b8][b4];
    277                 /* do IDCT */
    278                 if (currMB->nz_coeff[blkidx])
    279                 {
    280                     for (j = 4; j > 0; j--) /* horizontal */
    281                     {
    282                         r0 = coef[0] + coef[2];
    283                         r1 = coef[0] - coef[2];
    284                         r2 = (coef[1] >> 1) - coef[3];
    285                         r3 = coef[1] + (coef[3] >> 1);
    286 
    287                         coef[0] = r0 + r3;
    288                         coef[1] = r1 + r2;
    289                         coef[2] = r1 - r2;
    290                         coef[3] = r0 - r3;
    291 
    292                         coef += 16;
    293                     }
    294 
    295                     coef -= 64;
    296                     for (j = 4; j > 0; j--) /* vertical, has to be done after horizontal */
    297                     {
    298                         r0 = coef[0] + coef[32];
    299                         r1 = coef[0] - coef[32];
    300                         r2 = (coef[16] >> 1) - coef[48];
    301                         r3 = coef[16] + (coef[48] >> 1);
    302                         r0 += r3;
    303                         r3 = (r0 - (r3 << 1)); /* r0-r3 */
    304                         r1 += r2;
    305                         r2 = (r1 - (r2 << 1)); /* r1-r2 */
    306                         r0 += 32;
    307                         r1 += 32;
    308                         r2 += 32;
    309                         r3 += 32;
    310 
    311                         r0 = cur[0] + (r0 >> 6);
    312                         if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
    313                         *cur = r0;
    314                         r1 = *(cur += picPitch) + (r1 >> 6);
    315                         if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
    316                         *cur = r1;
    317                         r2 = *(cur += picPitch) + (r2 >> 6);
    318                         if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
    319                         *cur = r2;
    320                         r3 = cur[picPitch] + (r3 >> 6);
    321                         if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
    322                         cur[picPitch] = r3;
    323 
    324                         cur -= (picPitch << 1);
    325                         cur++;
    326                         coef++;
    327                     }
    328                     cur -= 4;
    329                     coef -= 4;
    330                 }
    331                 if (b4&1)
    332                 {
    333                     cur += ((picPitch << 2) - 4);
    334                     coef += 60;
    335                 }
    336                 else
    337                 {
    338                     cur += 4;
    339                     coef += 4;
    340                 }
    341             }
    342         }
    343 
    344         if (b8&1)
    345         {
    346             curL += ((picPitch << 3) - 8);
    347             coef8 += 120;
    348         }
    349         else
    350         {
    351             curL += 8;
    352             coef8 += 8;
    353         }
    354     }
    355 
    356     return ;
    357 }
    358 
    359 /* performa dct, quant, iquant, idct for the entire MB */
    360 void dct_luma_16x16(AVCEncObject *encvid, uint8 *curL, uint8 *orgL)
    361 {
    362     AVCCommonObj *video = encvid->common;
    363     int pitch = video->currPic->pitch;
    364     int org_pitch = encvid->currInput->pitch;
    365     AVCMacroblock *currMB = video->currMB;
    366     int16 *coef = video->block;
    367     uint8 *pred = encvid->pred_i16[currMB->i16Mode];
    368     int blk_x, blk_y, j, k, idx, b8, b4;
    369     int r0, r1, r2, r3, m0, m1, m2 , m3;
    370     int data, lev;
    371     int *level, *run, zero_run, ncoeff;
    372     int Rq, Qq, quant, q_bits, qp_const;
    373     int offset_cur[4], offset_pred[4], offset;
    374 
    375     /* horizontal */
    376     for (j = 16; j > 0; j--)
    377     {
    378         for (blk_x = 4; blk_x > 0; blk_x--)
    379         {
    380             /* calculate the residue first */
    381             r0 = *orgL++ - *pred++;
    382             r1 = *orgL++ - *pred++;
    383             r2 = *orgL++ - *pred++;
    384             r3 = *orgL++ - *pred++;
    385 
    386             r0 += r3;           //ptr[0] + ptr[3];
    387             r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
    388             r1 += r2;           //ptr[1] + ptr[2];
    389             r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
    390 
    391             *coef++ = r0 + r1;
    392             *coef++ = (r3 << 1) + r2;
    393             *coef++ = r0 - r1;
    394             *coef++ = r3 - (r2 << 1);
    395         }
    396         orgL += (org_pitch - 16);
    397     }
    398     pred -= 256;
    399     coef -= 256;
    400     /* vertical */
    401     for (blk_y = 4; blk_y > 0; blk_y--)
    402     {
    403         for (j = 16; j > 0; j--)
    404         {
    405             r0 = coef[0] + coef[48];
    406             r3 = coef[0] - coef[48];
    407             r1 = coef[16] + coef[32];
    408             r2 = coef[16] - coef[32];
    409 
    410             coef[0] = r0 + r1;
    411             coef[32] = r0 - r1;
    412             coef[16] = (r3 << 1) + r2;
    413             coef[48] = r3 - (r2 << 1);
    414 
    415             coef++;
    416         }
    417         coef += 48;
    418     }
    419 
    420     /* then perform DC transform */
    421     coef -= 256;
    422     for (j = 4; j > 0; j--)
    423     {
    424         r0 = coef[0] + coef[12];
    425         r3 = coef[0] - coef[12];
    426         r1 = coef[4] + coef[8];
    427         r2 = coef[4] - coef[8];
    428 
    429         coef[0] = r0 + r1;
    430         coef[8] = r0 - r1;
    431         coef[4] = r3 + r2;
    432         coef[12] = r3 - r2;
    433         coef += 64;
    434     }
    435     coef -= 256;
    436     for (j = 4; j > 0; j--)
    437     {
    438         r0 = coef[0] + coef[192];
    439         r3 = coef[0] - coef[192];
    440         r1 = coef[64] + coef[128];
    441         r2 = coef[64] - coef[128];
    442 
    443         coef[0] = (r0 + r1) >> 1;
    444         coef[128] = (r0 - r1) >> 1;
    445         coef[64] = (r3 + r2) >> 1;
    446         coef[192] = (r3 - r2) >> 1;
    447         coef += 4;
    448     }
    449 
    450     coef -= 16;
    451     // then quantize DC
    452     level = encvid->leveldc;
    453     run = encvid->rundc;
    454 
    455     Rq = video->QPy_mod_6;
    456     Qq = video->QPy_div_6;
    457     quant = quant_coef[Rq][0];
    458     q_bits = 15 + Qq;
    459     qp_const = encvid->qp_const;
    460 
    461     zero_run = 0;
    462     ncoeff = 0;
    463     for (k = 0; k < 16; k++) /* in zigzag scan order */
    464     {
    465         idx = ZIGZAG2RASTERDC[k];
    466         data = coef[idx];
    467         if (data > 0)   // quant
    468         {
    469             lev = data * quant + (qp_const << 1);
    470         }
    471         else
    472         {
    473             lev = -data * quant + (qp_const << 1);
    474         }
    475         lev >>= (q_bits + 1);
    476         if (lev) // dequant
    477         {
    478             if (data > 0)
    479             {
    480                 level[ncoeff] = lev;
    481                 coef[idx] = lev;
    482             }
    483             else
    484             {
    485                 level[ncoeff] = -lev;
    486                 coef[idx] = -lev;
    487             }
    488             run[ncoeff++] = zero_run;
    489             zero_run = 0;
    490         }
    491         else
    492         {
    493             zero_run++;
    494             coef[idx] = 0;
    495         }
    496     }
    497 
    498     /* inverse transform DC */
    499     encvid->numcoefdc = ncoeff;
    500     if (ncoeff)
    501     {
    502         quant = dequant_coefres[Rq][0];
    503 
    504         for (j = 0; j < 4; j++)
    505         {
    506             m0 = coef[0] + coef[4];
    507             m1 = coef[0] - coef[4];
    508             m2 = coef[8] + coef[12];
    509             m3 = coef[8] - coef[12];
    510 
    511 
    512             coef[0] = m0 + m2;
    513             coef[4] = m0 - m2;
    514             coef[8] = m1 - m3;
    515             coef[12] = m1 + m3;
    516             coef += 64;
    517         }
    518 
    519         coef -= 256;
    520 
    521         if (Qq >= 2)  /* this way should be faster than JM */
    522         {           /* they use (((m4*scale)<<(QPy/6))+2)>>2 for both cases. */
    523             Qq -= 2;
    524             for (j = 0; j < 4; j++)
    525             {
    526                 m0 = coef[0] + coef[64];
    527                 m1 = coef[0] - coef[64];
    528                 m2 = coef[128] + coef[192];
    529                 m3 = coef[128] - coef[192];
    530 
    531                 coef[0] = ((m0 + m2) * quant) << Qq;
    532                 coef[64] = ((m0 - m2) * quant) << Qq;
    533                 coef[128] = ((m1 - m3) * quant) << Qq;
    534                 coef[192] = ((m1 + m3) * quant) << Qq;
    535                 coef += 4;
    536             }
    537             Qq += 2; /* restore the value */
    538         }
    539         else
    540         {
    541             Qq = 2 - Qq;
    542             offset = 1 << (Qq - 1);
    543 
    544             for (j = 0; j < 4; j++)
    545             {
    546                 m0 = coef[0] + coef[64];
    547                 m1 = coef[0] - coef[64];
    548                 m2 = coef[128] + coef[192];
    549                 m3 = coef[128] - coef[192];
    550 
    551                 coef[0] = (((m0 + m2) * quant + offset) >> Qq);
    552                 coef[64] = (((m0 - m2) * quant + offset) >> Qq);
    553                 coef[128] = (((m1 - m3) * quant + offset) >> Qq);
    554                 coef[192] = (((m1 + m3) * quant + offset) >> Qq);
    555                 coef += 4;
    556             }
    557             Qq = 2 - Qq; /* restore the value */
    558         }
    559         coef -= 16; /* back to the origin */
    560     }
    561 
    562     /* now zigzag scan ac coefs, quant, iquant and itrans */
    563     run = encvid->run[0];
    564     level = encvid->level[0];
    565 
    566     /* offset btw 4x4 block */
    567     offset_cur[0] = 0;
    568     offset_cur[1] = (pitch << 2) - 8;
    569 
    570     /* offset btw 8x8 block */
    571     offset_cur[2] = 8 - (pitch << 3);
    572     offset_cur[3] = -8;
    573 
    574     /* similarly for pred */
    575     offset_pred[0] = 0;
    576     offset_pred[1] = 56;
    577     offset_pred[2] = -120;
    578     offset_pred[3] = -8;
    579 
    580     currMB->CBP = 0;
    581 
    582     for (b8 = 0; b8 < 4; b8++)
    583     {
    584         for (b4 = 0; b4 < 4; b4++)
    585         {
    586 
    587             zero_run = 0;
    588             ncoeff = 0;
    589 
    590             for (k = 1; k < 16; k++)
    591             {
    592                 idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
    593                 data = coef[idx];
    594                 quant = quant_coef[Rq][k];
    595                 if (data > 0)
    596                 {
    597                     lev = data * quant + qp_const;
    598                 }
    599                 else
    600                 {
    601                     lev = -data * quant + qp_const;
    602                 }
    603                 lev >>= q_bits;
    604                 if (lev)
    605                 {   /* dequant */
    606                     quant = dequant_coefres[Rq][k];
    607                     if (data > 0)
    608                     {
    609                         level[ncoeff] = lev;
    610                         coef[idx] = (lev * quant) << Qq;
    611                     }
    612                     else
    613                     {
    614                         level[ncoeff] = -lev;
    615                         coef[idx] = (-lev * quant) << Qq;
    616                     }
    617                     run[ncoeff++] = zero_run;
    618                     zero_run = 0;
    619                 }
    620                 else
    621                 {
    622                     zero_run++;
    623                     coef[idx] = 0;
    624                 }
    625             }
    626 
    627             currMB->nz_coeff[blkIdx2blkXY[b8][b4]] = ncoeff; /* in raster scan !!! */
    628             if (ncoeff)
    629             {
    630                 currMB->CBP |= (1 << b8);
    631 
    632                 // do inverse transform here
    633                 for (j = 4; j > 0; j--)
    634                 {
    635                     r0 = coef[0] + coef[2];
    636                     r1 = coef[0] - coef[2];
    637                     r2 = (coef[1] >> 1) - coef[3];
    638                     r3 = coef[1] + (coef[3] >> 1);
    639 
    640                     coef[0] = r0 + r3;
    641                     coef[1] = r1 + r2;
    642                     coef[2] = r1 - r2;
    643                     coef[3] = r0 - r3;
    644 
    645                     coef += 16;
    646                 }
    647                 coef -= 64;
    648                 for (j = 4; j > 0; j--)
    649                 {
    650                     r0 = coef[0] + coef[32];
    651                     r1 = coef[0] - coef[32];
    652                     r2 = (coef[16] >> 1) - coef[48];
    653                     r3 = coef[16] + (coef[48] >> 1);
    654 
    655                     r0 += r3;
    656                     r3 = (r0 - (r3 << 1)); /* r0-r3 */
    657                     r1 += r2;
    658                     r2 = (r1 - (r2 << 1)); /* r1-r2 */
    659                     r0 += 32;
    660                     r1 += 32;
    661                     r2 += 32;
    662                     r3 += 32;
    663                     r0 = pred[0] + (r0 >> 6);
    664                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
    665                     r1 = pred[16] + (r1 >> 6);
    666                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
    667                     r2 = pred[32] + (r2 >> 6);
    668                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
    669                     r3 = pred[48] + (r3 >> 6);
    670                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
    671                     *curL = r0;
    672                     *(curL += pitch) = r1;
    673                     *(curL += pitch) = r2;
    674                     curL[pitch] = r3;
    675                     curL -= (pitch << 1);
    676                     curL++;
    677                     pred++;
    678                     coef++;
    679                 }
    680             }
    681             else  // do DC-only inverse
    682             {
    683                 m0 = coef[0] + 32;
    684 
    685                 for (j = 4; j > 0; j--)
    686                 {
    687                     r0 = pred[0] + (m0 >> 6);
    688                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
    689                     r1 = pred[16] + (m0 >> 6);
    690                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
    691                     r2 = pred[32] + (m0 >> 6);
    692                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
    693                     r3 = pred[48] + (m0 >> 6);
    694                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
    695                     *curL = r0;
    696                     *(curL += pitch) = r1;
    697                     *(curL += pitch) = r2;
    698                     curL[pitch] = r3;
    699                     curL -= (pitch << 1);
    700                     curL++;
    701                     pred++;
    702                 }
    703                 coef += 4;
    704             }
    705 
    706             run += 16;  // follow coding order
    707             level += 16;
    708             curL += offset_cur[b4&1];
    709             pred += offset_pred[b4&1];
    710             coef += offset_pred[b4&1];
    711         }
    712 
    713         curL += offset_cur[2 + (b8&1)];
    714         pred += offset_pred[2 + (b8&1)];
    715         coef += offset_pred[2 + (b8&1)];
    716     }
    717 
    718     return ;
    719 }
    720 
    721 
    722 void dct_chroma(AVCEncObject *encvid, uint8 *curC, uint8 *orgC, int cr)
    723 {
    724     AVCCommonObj *video = encvid->common;
    725     AVCMacroblock *currMB = video->currMB;
    726     int org_pitch = (encvid->currInput->pitch) >> 1;
    727     int pitch = (video->currPic->pitch) >> 1;
    728     int pred_pitch = 16;
    729     int16 *coef = video->block + 256;
    730     uint8 *pred = video->pred_block;
    731     int j, blk_x, blk_y, k, idx, b4;
    732     int r0, r1, r2, r3, m0;
    733     int Qq, Rq, qp_const, q_bits, quant;
    734     int *level, *run, zero_run, ncoeff;
    735     int data, lev;
    736     int offset_cur[2], offset_pred[2], offset_coef[2];
    737     uint8 nz_temp[4];
    738     int  coeff_cost;
    739 
    740     if (cr)
    741     {
    742         coef += 8;
    743         pred += 8;
    744     }
    745 
    746     if (currMB->mb_intra == 0) // inter mode
    747     {
    748         pred = curC;
    749         pred_pitch = pitch;
    750     }
    751 
    752     /* do 4x4 transform */
    753     /* horizontal */
    754     for (j = 8; j > 0; j--)
    755     {
    756         for (blk_x = 2; blk_x > 0; blk_x--)
    757         {
    758             /* calculate the residue first */
    759             r0 = *orgC++ - *pred++;
    760             r1 = *orgC++ - *pred++;
    761             r2 = *orgC++ - *pred++;
    762             r3 = *orgC++ - *pred++;
    763 
    764             r0 += r3;           //ptr[0] + ptr[3];
    765             r3 = r0 - (r3 << 1);    //ptr[0] - ptr[3];
    766             r1 += r2;           //ptr[1] + ptr[2];
    767             r2 = r1 - (r2 << 1);    //ptr[1] - ptr[2];
    768 
    769             *coef++ = r0 + r1;
    770             *coef++ = (r3 << 1) + r2;
    771             *coef++ = r0 - r1;
    772             *coef++ = r3 - (r2 << 1);
    773 
    774         }
    775         coef += 8; // coef pitch is 16
    776         pred += (pred_pitch - 8); // pred_pitch is 16
    777         orgC += (org_pitch - 8);
    778     }
    779     pred -= (pred_pitch << 3);
    780     coef -= 128;
    781     /* vertical */
    782     for (blk_y = 2; blk_y > 0; blk_y--)
    783     {
    784         for (j = 8; j > 0; j--)
    785         {
    786             r0 = coef[0] + coef[48];
    787             r3 = coef[0] - coef[48];
    788             r1 = coef[16] + coef[32];
    789             r2 = coef[16] - coef[32];
    790 
    791             coef[0] = r0 + r1;
    792             coef[32] = r0 - r1;
    793             coef[16] = (r3 << 1) + r2;
    794             coef[48] = r3 - (r2 << 1);
    795 
    796             coef++;
    797         }
    798         coef += 56;
    799     }
    800     /* then perform DC transform */
    801     coef -= 128;
    802 
    803     /* 2x2 transform of DC components*/
    804     r0 = coef[0];
    805     r1 = coef[4];
    806     r2 = coef[64];
    807     r3 = coef[68];
    808 
    809     coef[0] = r0 + r1 + r2 + r3;
    810     coef[4] = r0 - r1 + r2 - r3;
    811     coef[64] = r0 + r1 - r2 - r3;
    812     coef[68] = r0 - r1 - r2 + r3;
    813 
    814     Qq    = video->QPc_div_6;
    815     Rq    = video->QPc_mod_6;
    816     quant = quant_coef[Rq][0];
    817     q_bits    = 15 + Qq;
    818     qp_const = encvid->qp_const_c;
    819 
    820     zero_run = 0;
    821     ncoeff = 0;
    822     run = encvid->runcdc + (cr << 2);
    823     level = encvid->levelcdc + (cr << 2);
    824 
    825     /* in zigzag scan order */
    826     for (k = 0; k < 4; k++)
    827     {
    828         idx = ((k >> 1) << 6) + ((k & 1) << 2);
    829         data = coef[idx];
    830         if (data > 0)
    831         {
    832             lev = data * quant + (qp_const << 1);
    833         }
    834         else
    835         {
    836             lev = -data * quant + (qp_const << 1);
    837         }
    838         lev >>= (q_bits + 1);
    839         if (lev)
    840         {
    841             if (data > 0)
    842             {
    843                 level[ncoeff] = lev;
    844                 coef[idx] = lev;
    845             }
    846             else
    847             {
    848                 level[ncoeff] = -lev;
    849                 coef[idx] = -lev;
    850             }
    851             run[ncoeff++] = zero_run;
    852             zero_run = 0;
    853         }
    854         else
    855         {
    856             zero_run++;
    857             coef[idx] = 0;
    858         }
    859     }
    860 
    861     encvid->numcoefcdc[cr] = ncoeff;
    862 
    863     if (ncoeff)
    864     {
    865         currMB->CBP |= (1 << 4); // DC present
    866         // do inverse transform
    867         quant = dequant_coefres[Rq][0];
    868 
    869         r0 = coef[0] + coef[4];
    870         r1 = coef[0] - coef[4];
    871         r2 = coef[64] + coef[68];
    872         r3 = coef[64] - coef[68];
    873 
    874         r0 += r2;
    875         r2 = r0 - (r2 << 1);
    876         r1 += r3;
    877         r3 = r1 - (r3 << 1);
    878 
    879         if (Qq >= 1)
    880         {
    881             Qq -= 1;
    882             coef[0] = (r0 * quant) << Qq;
    883             coef[4] = (r1 * quant) << Qq;
    884             coef[64] = (r2 * quant) << Qq;
    885             coef[68] = (r3 * quant) << Qq;
    886             Qq++;
    887         }
    888         else
    889         {
    890             coef[0] = (r0 * quant) >> 1;
    891             coef[4] = (r1 * quant) >> 1;
    892             coef[64] = (r2 * quant) >> 1;
    893             coef[68] = (r3 * quant) >> 1;
    894         }
    895     }
    896 
    897     /* now do AC zigzag scan, quant, iquant and itrans */
    898     if (cr)
    899     {
    900         run = encvid->run[20];
    901         level = encvid->level[20];
    902     }
    903     else
    904     {
    905         run = encvid->run[16];
    906         level = encvid->level[16];
    907     }
    908 
    909     /* offset btw 4x4 block */
    910     offset_cur[0] = 0;
    911     offset_cur[1] = (pitch << 2) - 8;
    912     offset_pred[0] = 0;
    913     offset_pred[1] = (pred_pitch << 2) - 8;
    914     offset_coef[0] = 0;
    915     offset_coef[1] = 56;
    916 
    917     coeff_cost = 0;
    918 
    919     for (b4 = 0; b4 < 4; b4++)
    920     {
    921         zero_run = 0;
    922         ncoeff = 0;
    923         for (k = 1; k < 16; k++) /* in zigzag scan order */
    924         {
    925             idx = ZZ_SCAN_BLOCK[k]; /* map back to raster scan order */
    926             data = coef[idx];
    927             quant = quant_coef[Rq][k];
    928             if (data > 0)
    929             {
    930                 lev = data * quant + qp_const;
    931             }
    932             else
    933             {
    934                 lev = -data * quant + qp_const;
    935             }
    936             lev >>= q_bits;
    937             if (lev)
    938             {
    939                 /* for RD performance*/
    940                 if (lev > 1)
    941                     coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
    942                 else
    943                     coeff_cost += COEFF_COST[DISABLE_THRESHOLDING][zero_run];
    944 
    945                 /* dequant */
    946                 quant = dequant_coefres[Rq][k];
    947                 if (data > 0)
    948                 {
    949                     level[ncoeff] = lev;
    950                     coef[idx] = (lev * quant) << Qq;
    951                 }
    952                 else
    953                 {
    954                     level[ncoeff] = -lev;
    955                     coef[idx] = (-lev * quant) << Qq;
    956                 }
    957                 run[ncoeff++] = zero_run;
    958                 zero_run = 0;
    959             }
    960             else
    961             {
    962                 zero_run++;
    963                 coef[idx] = 0;
    964             }
    965         }
    966 
    967         nz_temp[b4] = ncoeff; // raster scan
    968 
    969         // just advance the pointers for now, do IDCT later
    970         coef += 4;
    971         run += 16;
    972         level += 16;
    973         coef += offset_coef[b4&1];
    974     }
    975 
    976     /* rewind the pointers */
    977     coef -= 128;
    978 
    979     if (coeff_cost < _CHROMA_COEFF_COST_)
    980     {
    981         /* if it's not efficient to encode any blocks.
    982         Just do DC only */
    983         /* We can reset level and run also, but setting nz to zero should be enough. */
    984         currMB->nz_coeff[16+(cr<<1)] = 0;
    985         currMB->nz_coeff[17+(cr<<1)] = 0;
    986         currMB->nz_coeff[20+(cr<<1)] = 0;
    987         currMB->nz_coeff[21+(cr<<1)] = 0;
    988 
    989         for (b4 = 0; b4 < 4; b4++)
    990         {
    991             // do DC-only inverse
    992             m0 = coef[0] + 32;
    993 
    994             for (j = 4; j > 0; j--)
    995             {
    996                 r0 = pred[0] + (m0 >> 6);
    997                 if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
    998                 r1 = *(pred += pred_pitch) + (m0 >> 6);
    999                 if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
   1000                 r2 = pred[pred_pitch] + (m0 >> 6);
   1001                 if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
   1002                 r3 = pred[pred_pitch<<1] + (m0 >> 6);
   1003                 if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
   1004                 *curC = r0;
   1005                 *(curC += pitch) = r1;
   1006                 *(curC += pitch) = r2;
   1007                 curC[pitch] = r3;
   1008                 curC -= (pitch << 1);
   1009                 curC++;
   1010                 pred += (1 - pred_pitch);
   1011             }
   1012             coef += 4;
   1013             curC += offset_cur[b4&1];
   1014             pred += offset_pred[b4&1];
   1015             coef += offset_coef[b4&1];
   1016         }
   1017     }
   1018     else // not dropping anything, continue with the IDCT
   1019     {
   1020         for (b4 = 0; b4 < 4; b4++)
   1021         {
   1022             ncoeff = nz_temp[b4] ; // in raster scan
   1023             currMB->nz_coeff[16+(b4&1)+(cr<<1)+((b4>>1)<<2)] = ncoeff; // in raster scan
   1024 
   1025             if (ncoeff) // do a check on the nonzero-coeff
   1026             {
   1027                 currMB->CBP |= (2 << 4);
   1028 
   1029                 // do inverse transform here
   1030                 for (j = 4; j > 0; j--)
   1031                 {
   1032                     r0 = coef[0] + coef[2];
   1033                     r1 = coef[0] - coef[2];
   1034                     r2 = (coef[1] >> 1) - coef[3];
   1035                     r3 = coef[1] + (coef[3] >> 1);
   1036 
   1037                     coef[0] = r0 + r3;
   1038                     coef[1] = r1 + r2;
   1039                     coef[2] = r1 - r2;
   1040                     coef[3] = r0 - r3;
   1041 
   1042                     coef += 16;
   1043                 }
   1044                 coef -= 64;
   1045                 for (j = 4; j > 0; j--)
   1046                 {
   1047                     r0 = coef[0] + coef[32];
   1048                     r1 = coef[0] - coef[32];
   1049                     r2 = (coef[16] >> 1) - coef[48];
   1050                     r3 = coef[16] + (coef[48] >> 1);
   1051 
   1052                     r0 += r3;
   1053                     r3 = (r0 - (r3 << 1)); /* r0-r3 */
   1054                     r1 += r2;
   1055                     r2 = (r1 - (r2 << 1)); /* r1-r2 */
   1056                     r0 += 32;
   1057                     r1 += 32;
   1058                     r2 += 32;
   1059                     r3 += 32;
   1060                     r0 = pred[0] + (r0 >> 6);
   1061                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
   1062                     r1 = *(pred += pred_pitch) + (r1 >> 6);
   1063                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
   1064                     r2 = pred[pred_pitch] + (r2 >> 6);
   1065                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
   1066                     r3 = pred[pred_pitch<<1] + (r3 >> 6);
   1067                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
   1068                     *curC = r0;
   1069                     *(curC += pitch) = r1;
   1070                     *(curC += pitch) = r2;
   1071                     curC[pitch] = r3;
   1072                     curC -= (pitch << 1);
   1073                     curC++;
   1074                     pred += (1 - pred_pitch);
   1075                     coef++;
   1076                 }
   1077             }
   1078             else
   1079             {
   1080                 // do DC-only inverse
   1081                 m0 = coef[0] + 32;
   1082 
   1083                 for (j = 4; j > 0; j--)
   1084                 {
   1085                     r0 = pred[0] + (m0 >> 6);
   1086                     if ((uint)r0 > 0xFF)   r0 = 0xFF & (~(r0 >> 31));  /* clip */
   1087                     r1 = *(pred += pred_pitch) + (m0 >> 6);
   1088                     if ((uint)r1 > 0xFF)   r1 = 0xFF & (~(r1 >> 31));  /* clip */
   1089                     r2 = pred[pred_pitch] + (m0 >> 6);
   1090                     if ((uint)r2 > 0xFF)   r2 = 0xFF & (~(r2 >> 31));  /* clip */
   1091                     r3 = pred[pred_pitch<<1] + (m0 >> 6);
   1092                     if ((uint)r3 > 0xFF)   r3 = 0xFF & (~(r3 >> 31));  /* clip */
   1093                     *curC = r0;
   1094                     *(curC += pitch) = r1;
   1095                     *(curC += pitch) = r2;
   1096                     curC[pitch] = r3;
   1097                     curC -= (pitch << 1);
   1098                     curC++;
   1099                     pred += (1 - pred_pitch);
   1100                 }
   1101                 coef += 4;
   1102             }
   1103             curC += offset_cur[b4&1];
   1104             pred += offset_pred[b4&1];
   1105             coef += offset_coef[b4&1];
   1106         }
   1107     }
   1108 
   1109     return ;
   1110 }
   1111 
   1112 
   1113 /* only DC transform */
   1114 int TransQuantIntra16DC(AVCEncObject *encvid)
   1115 {
   1116     AVCCommonObj *video = encvid->common;
   1117     int16 *block = video->block;
   1118     int *level = encvid->leveldc;
   1119     int *run = encvid->rundc;
   1120     int16 *ptr = block;
   1121     int r0, r1, r2, r3, j;
   1122     int Qq = video->QPy_div_6;
   1123     int Rq = video->QPy_mod_6;
   1124     int q_bits, qp_const, quant;
   1125     int data, lev, zero_run;
   1126     int k, ncoeff, idx;
   1127 
   1128     /* DC transform */
   1129     /* horizontal */
   1130     j = 4;
   1131     while (j)
   1132     {
   1133         r0 = ptr[0] + ptr[12];
   1134         r3 = ptr[0] - ptr[12];
   1135         r1 = ptr[4] + ptr[8];
   1136         r2 = ptr[4] - ptr[8];
   1137 
   1138         ptr[0] = r0 + r1;
   1139         ptr[8] = r0 - r1;
   1140         ptr[4] = r3 + r2;
   1141         ptr[12] = r3 - r2;
   1142         ptr += 64;
   1143         j--;
   1144     }
   1145     /* vertical */
   1146     ptr = block;
   1147     j = 4;
   1148     while (j)
   1149     {
   1150         r0 = ptr[0] + ptr[192];
   1151         r3 = ptr[0] - ptr[192];
   1152         r1 = ptr[64] + ptr[128];
   1153         r2 = ptr[64] - ptr[128];
   1154 
   1155         ptr[0] = (r0 + r1) >> 1;
   1156         ptr[128] = (r0 - r1) >> 1;
   1157         ptr[64] = (r3 + r2) >> 1;
   1158         ptr[192] = (r3 - r2) >> 1;
   1159         ptr += 4;
   1160         j--;
   1161     }
   1162 
   1163     quant = quant_coef[Rq][0];
   1164     q_bits    = 15 + Qq;
   1165     qp_const = (1 << q_bits) / 3;    // intra
   1166 
   1167     zero_run = 0;
   1168     ncoeff = 0;
   1169 
   1170     for (k = 0; k < 16; k++) /* in zigzag scan order */
   1171     {
   1172         idx = ZIGZAG2RASTERDC[k];
   1173         data = block[idx];
   1174         if (data > 0)
   1175         {
   1176             lev = data * quant + (qp_const << 1);
   1177         }
   1178         else
   1179         {
   1180             lev = -data * quant + (qp_const << 1);
   1181         }
   1182         lev >>= (q_bits + 1);
   1183         if (lev)
   1184         {
   1185             if (data > 0)
   1186             {
   1187                 level[ncoeff] = lev;
   1188                 block[idx] = lev;
   1189             }
   1190             else
   1191             {
   1192                 level[ncoeff] = -lev;
   1193                 block[idx] = -lev;
   1194             }
   1195             run[ncoeff++] = zero_run;
   1196             zero_run = 0;
   1197         }
   1198         else
   1199         {
   1200             zero_run++;
   1201             block[idx] = 0;
   1202         }
   1203     }
   1204     return ncoeff;
   1205 }
   1206 
   1207 int TransQuantChromaDC(AVCEncObject *encvid, int16 *block, int slice_type, int cr)
   1208 {
   1209     AVCCommonObj *video = encvid->common;
   1210     int *level, *run;
   1211     int r0, r1, r2, r3;
   1212     int Qq, Rq, q_bits, qp_const, quant;
   1213     int data, lev, zero_run;
   1214     int k, ncoeff, idx;
   1215 
   1216     level = encvid->levelcdc + (cr << 2); /* cb or cr */
   1217     run = encvid->runcdc + (cr << 2);
   1218 
   1219     /* 2x2 transform of DC components*/
   1220     r0 = block[0];
   1221     r1 = block[4];
   1222     r2 = block[64];
   1223     r3 = block[68];
   1224 
   1225     block[0] = r0 + r1 + r2 + r3;
   1226     block[4] = r0 - r1 + r2 - r3;
   1227     block[64] = r0 + r1 - r2 - r3;
   1228     block[68] = r0 - r1 - r2 + r3;
   1229 
   1230     Qq    = video->QPc_div_6;
   1231     Rq    = video->QPc_mod_6;
   1232     quant = quant_coef[Rq][0];
   1233     q_bits    = 15 + Qq;
   1234     if (slice_type == AVC_I_SLICE)
   1235     {
   1236         qp_const = (1 << q_bits) / 3;
   1237     }
   1238     else
   1239     {
   1240         qp_const = (1 << q_bits) / 6;
   1241     }
   1242 
   1243     zero_run = 0;
   1244     ncoeff = 0;
   1245 
   1246     for (k = 0; k < 4; k++) /* in zigzag scan order */
   1247     {
   1248         idx = ((k >> 1) << 6) + ((k & 1) << 2);
   1249         data = block[idx];
   1250         if (data > 0)
   1251         {
   1252             lev = data * quant + (qp_const << 1);
   1253         }
   1254         else
   1255         {
   1256             lev = -data * quant + (qp_const << 1);
   1257         }
   1258         lev >>= (q_bits + 1);
   1259         if (lev)
   1260         {
   1261             if (data > 0)
   1262             {
   1263                 level[ncoeff] = lev;
   1264                 block[idx] = lev;
   1265             }
   1266             else
   1267             {
   1268                 level[ncoeff] = -lev;
   1269                 block[idx] = -lev;
   1270             }
   1271             run[ncoeff++] = zero_run;
   1272             zero_run = 0;
   1273         }
   1274         else
   1275         {
   1276             zero_run++;
   1277             block[idx] = 0;
   1278         }
   1279     }
   1280     return ncoeff;
   1281 }
   1282 
   1283 
   1284