Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "avcdec_lib.h"
     19 
     20 #define CLIP_COMP  *comp++ = (uint8)(((uint)temp>0xFF)? 0xFF&(~(temp>>31)): temp)
     21 #define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
     22                  x = 0xFF & (~(x>>31));}
     23 
     24 
     25 /* We should combine the Intra4x4 functions with residual decoding and compensation  */
     26 AVCStatus IntraMBPrediction(AVCCommonObj *video)
     27 {
     28     int component, SubBlock_indx, temp;
     29     AVCStatus status;
     30     AVCMacroblock *currMB = video->currMB;
     31     AVCPictureData *currPic = video->currPic;
     32     uint8 *curL, *curCb, *curCr;
     33     uint8 *comp;
     34     int block_x, block_y, offset;
     35     int16 *dataBlock = video->block;
     36     uint8 *predCb, *predCr;
     37 #ifdef USE_PRED_BLOCK
     38     uint8 *pred;
     39 #endif
     40     int pitch = currPic->pitch;
     41     uint32 cbp4x4 = video->cbp4x4;
     42 
     43     offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
     44     curL = currPic->Sl + offset;
     45 
     46 #ifdef USE_PRED_BLOCK
     47     video->pred_block = video->pred + 84;  /* point to separate prediction memory */
     48     pred = video->pred_block;
     49     video->pred_pitch = 20;
     50 #else
     51     video->pred_block = curL;   /* point directly to the frame buffer */
     52     video->pred_pitch = pitch;
     53 #endif
     54 
     55     if (currMB->mbMode == AVC_I4)
     56     {
     57         /* luminance first */
     58         block_x = block_y = 0;
     59         for (component = 0; component < 4; component++)
     60         {
     61             block_x = ((component & 1) << 1);
     62             block_y = ((component >> 1) << 1);
     63             comp = curL;// + (block_x<<2) + (block_y<<2)*currPic->pitch;
     64 
     65             for (SubBlock_indx = 0; SubBlock_indx < 4; SubBlock_indx++)
     66             {
     67                 status = Intra_4x4(video, block_x, block_y, comp);
     68                 if (status != AVC_SUCCESS)
     69                 {
     70                     return status;
     71                 }
     72                 /* transform following the 4x4 prediction, can't be SIMD
     73                 with other blocks. */
     74 #ifdef USE_PRED_BLOCK
     75                 if (cbp4x4&(1 << ((block_y << 2) + block_x)))
     76                 {
     77                     itrans(dataBlock, pred, pred, 20);
     78                 }
     79 #else
     80                 if (cbp4x4&(1 << ((block_y << 2) + block_x)))
     81                 {
     82                     itrans(dataBlock, comp, comp, pitch);
     83                 }
     84 #endif
     85                 temp = SubBlock_indx & 1;
     86                 if (temp)
     87                 {
     88                     block_y++;
     89                     block_x--;
     90                     dataBlock += 60;
     91 #ifdef USE_PRED_BLOCK
     92                     pred += 76;
     93 #else
     94                     comp += ((pitch << 2) - 4);
     95 #endif
     96                 }
     97                 else
     98                 {
     99                     block_x++;
    100                     dataBlock += 4;
    101 #ifdef USE_PRED_BLOCK
    102                     pred += 4;
    103 #else
    104                     comp += 4;
    105 #endif
    106                 }
    107             }
    108             if (component&1)
    109             {
    110 #ifdef USE_PRED_BLOCK
    111                 pred -= 8;
    112 #else
    113                 curL += (pitch << 3) - 8;
    114 #endif
    115                 dataBlock -= 8;
    116             }
    117             else
    118             {
    119 #ifdef USE_PRED_BLOCK
    120                 pred -= 152;
    121 #else
    122                 curL += 8;
    123 #endif
    124                 dataBlock -= 120;
    125             }
    126         }
    127         cbp4x4 >>= 16;
    128     }
    129     else   /* AVC_I16 */
    130     {
    131 #ifdef MB_BASED_DEBLOCK
    132         video->pintra_pred_top = video->intra_pred_top + (video->mb_x << 4);
    133         video->pintra_pred_left = video->intra_pred_left + 1;
    134         video->intra_pred_topleft = video->intra_pred_left[0];
    135         pitch = 1;
    136 #else
    137         video->pintra_pred_top = curL - pitch;
    138         video->pintra_pred_left = curL - 1;
    139         if (video->mb_y)
    140         {
    141             video->intra_pred_topleft = *(curL - pitch - 1);
    142         }
    143 #endif
    144         switch (currMB->i16Mode)
    145         {
    146             case AVC_I16_Vertical:      /* Intra_16x16_Vertical */
    147                 /* check availability of top */
    148                 if (video->intraAvailB)
    149                 {
    150                     Intra_16x16_Vertical(video);
    151                 }
    152                 else
    153                 {
    154                     return AVC_FAIL;
    155                 }
    156                 break;
    157             case AVC_I16_Horizontal:        /* Intra_16x16_Horizontal */
    158                 /* check availability of left */
    159                 if (video->intraAvailA)
    160                 {
    161                     Intra_16x16_Horizontal(video, pitch);
    162                 }
    163                 else
    164                 {
    165                     return AVC_FAIL;
    166                 }
    167                 break;
    168             case AVC_I16_DC:        /* Intra_16x16_DC */
    169                 Intra_16x16_DC(video, pitch);
    170                 break;
    171             case AVC_I16_Plane:     /* Intra_16x16_Plane */
    172                 if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
    173                 {
    174                     Intra_16x16_Plane(video, pitch);
    175                 }
    176                 else
    177                 {
    178                     return AVC_FAIL;
    179                 }
    180                 break;
    181             default:
    182                 break;
    183         }
    184 
    185         pitch = currPic->pitch;
    186 
    187         /* transform */
    188         /* can go in raster scan order now */
    189         /* can be done in SIMD,  */
    190         for (block_y = 4; block_y > 0; block_y--)
    191         {
    192             for (block_x = 4; block_x > 0; block_x--)
    193             {
    194 #ifdef USE_PRED_BLOCK
    195                 if (cbp4x4&1)
    196                 {
    197                     itrans(dataBlock, pred, pred, 20);
    198                 }
    199 #else
    200                 if (cbp4x4&1)
    201                 {
    202                     itrans(dataBlock, curL, curL, pitch);
    203                 }
    204 #endif
    205                 cbp4x4 >>= 1;
    206                 dataBlock += 4;
    207 #ifdef USE_PRED_BLOCK
    208                 pred += 4;
    209 #else
    210                 curL += 4;
    211 #endif
    212             }
    213             dataBlock += 48;
    214 #ifdef USE_PRED_BLOCK
    215             pred += 64;
    216 #else
    217             curL += ((pitch << 2) - 16);
    218 #endif
    219         }
    220     }
    221 
    222     offset = (offset >> 2) + (video->mb_x << 2); //((video->mb_y << 3)* pitch + (video->mb_x << 3));
    223     curCb = currPic->Scb + offset;
    224     curCr = currPic->Scr + offset;
    225 
    226 #ifdef MB_BASED_DEBLOCK
    227     video->pintra_pred_top_cb = video->intra_pred_top_cb + (video->mb_x << 3);
    228     video->pintra_pred_left_cb = video->intra_pred_left_cb + 1;
    229     video->intra_pred_topleft_cb = video->intra_pred_left_cb[0];
    230     video->pintra_pred_top_cr = video->intra_pred_top_cr + (video->mb_x << 3);
    231     video->pintra_pred_left_cr = video->intra_pred_left_cr + 1;
    232     video->intra_pred_topleft_cr = video->intra_pred_left_cr[0];
    233     pitch  = 1;
    234 #else
    235     pitch >>= 1;
    236     video->pintra_pred_top_cb = curCb - pitch;
    237     video->pintra_pred_left_cb = curCb - 1;
    238     video->pintra_pred_top_cr = curCr - pitch;
    239     video->pintra_pred_left_cr = curCr - 1;
    240 
    241     if (video->mb_y)
    242     {
    243         video->intra_pred_topleft_cb = *(curCb - pitch - 1);
    244         video->intra_pred_topleft_cr = *(curCr - pitch - 1);
    245     }
    246 #endif
    247 
    248 #ifdef USE_PRED_BLOCK
    249     predCb = video->pred + 452;
    250     predCr = predCb + 144;
    251     video->pred_pitch = 12;
    252 #else
    253     predCb = curCb;
    254     predCr = curCr;
    255     video->pred_pitch = currPic->pitch >> 1;
    256 #endif
    257     /* chrominance */
    258     switch (currMB->intra_chroma_pred_mode)
    259     {
    260         case AVC_IC_DC:     /* Intra_Chroma_DC */
    261             Intra_Chroma_DC(video, pitch, predCb, predCr);
    262             break;
    263         case AVC_IC_Horizontal:     /* Intra_Chroma_Horizontal */
    264             if (video->intraAvailA)
    265             {
    266                 /* check availability of left */
    267                 Intra_Chroma_Horizontal(video, pitch, predCb, predCr);
    268             }
    269             else
    270             {
    271                 return AVC_FAIL;
    272             }
    273             break;
    274         case AVC_IC_Vertical:       /* Intra_Chroma_Vertical */
    275             if (video->intraAvailB)
    276             {
    277                 /* check availability of top */
    278                 Intra_Chroma_Vertical(video, predCb, predCr);
    279             }
    280             else
    281             {
    282                 return AVC_FAIL;
    283             }
    284             break;
    285         case AVC_IC_Plane:      /* Intra_Chroma_Plane */
    286             if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
    287             {
    288                 /* check availability of top and left */
    289                 Intra_Chroma_Plane(video, pitch, predCb, predCr);
    290             }
    291             else
    292             {
    293                 return AVC_FAIL;
    294             }
    295             break;
    296         default:
    297             break;
    298     }
    299 
    300     /* transform, done in raster scan manner */
    301     pitch = currPic->pitch >> 1;
    302 
    303     for (block_y = 2; block_y > 0; block_y--)
    304     {
    305         for (block_x = 2; block_x > 0; block_x--)
    306         {
    307 #ifdef USE_PRED_BLOCK
    308             if (cbp4x4&1)
    309             {
    310                 ictrans(dataBlock, predCb, predCb, 12);
    311             }
    312 #else
    313             if (cbp4x4&1)
    314             {
    315                 ictrans(dataBlock, curCb, curCb, pitch);
    316             }
    317 #endif
    318             cbp4x4 >>= 1;
    319             dataBlock += 4;
    320 #ifdef USE_PRED_BLOCK
    321             predCb += 4;
    322 #else
    323             curCb += 4;
    324 #endif
    325         }
    326         for (block_x = 2; block_x > 0; block_x--)
    327         {
    328 #ifdef USE_PRED_BLOCK
    329             if (cbp4x4&1)
    330             {
    331                 ictrans(dataBlock, predCr, predCr, 12);
    332             }
    333 #else
    334             if (cbp4x4&1)
    335             {
    336                 ictrans(dataBlock, curCr, curCr, pitch);
    337             }
    338 #endif
    339             cbp4x4 >>= 1;
    340             dataBlock += 4;
    341 #ifdef USE_PRED_BLOCK
    342             predCr += 4;
    343 #else
    344             curCr += 4;
    345 #endif
    346         }
    347         dataBlock += 48;
    348 #ifdef USE_PRED_BLOCK
    349         predCb += 40;
    350         predCr += 40;
    351 #else
    352         curCb += ((pitch << 2) - 8);
    353         curCr += ((pitch << 2) - 8);
    354 #endif
    355     }
    356 
    357 #ifdef MB_BASED_DEBLOCK
    358     SaveNeighborForIntraPred(video, offset);
    359 #endif
    360     return AVC_SUCCESS;
    361 }
    362 
    363 #ifdef MB_BASED_DEBLOCK
    364 void SaveNeighborForIntraPred(AVCCommonObj *video, int offset)
    365 {
    366     AVCPictureData *currPic = video->currPic;
    367     int pitch;
    368     uint8 *pred, *predCb, *predCr;
    369     uint8 *tmp_ptr, tmp_byte;
    370     uint32 tmp_word;
    371     int mb_x = video->mb_x;
    372 
    373     /* save the value for intra prediction  */
    374 #ifdef USE_PRED_BLOCK
    375     pitch = 20;
    376     pred = video->pred + 384; /* bottom line for Y */
    377     predCb = pred + 152;    /* bottom line for Cb */
    378     predCr = predCb + 144;  /* bottom line for Cr */
    379 #else
    380     pitch = currPic->pitch;
    381     tmp_word = offset + (pitch << 2) - (pitch >> 1);
    382     predCb = currPic->Scb + tmp_word;/* bottom line for Cb */
    383     predCr = currPic->Scr + tmp_word;/* bottom line for Cr */
    384 
    385     offset = (offset << 2) - (mb_x << 4);
    386     pred = currPic->Sl + offset + (pitch << 4) - pitch;/* bottom line for Y */
    387 
    388 #endif
    389 
    390     video->intra_pred_topleft = video->intra_pred_top[(mb_x<<4)+15];
    391     video->intra_pred_topleft_cb = video->intra_pred_top_cb[(mb_x<<3)+7];
    392     video->intra_pred_topleft_cr = video->intra_pred_top_cr[(mb_x<<3)+7];
    393 
    394     /* then copy to video->intra_pred_top, intra_pred_top_cb, intra_pred_top_cr */
    395     /*memcpy(video->intra_pred_top + (mb_x<<4), pred, 16);
    396     memcpy(video->intra_pred_top_cb + (mb_x<<3), predCb, 8);
    397     memcpy(video->intra_pred_top_cr + (mb_x<<3), predCr, 8);*/
    398     tmp_ptr = video->intra_pred_top + (mb_x << 4);
    399     *((uint32*)tmp_ptr) = *((uint32*)pred);
    400     *((uint32*)(tmp_ptr + 4)) = *((uint32*)(pred + 4));
    401     *((uint32*)(tmp_ptr + 8)) = *((uint32*)(pred + 8));
    402     *((uint32*)(tmp_ptr + 12)) = *((uint32*)(pred + 12));
    403     tmp_ptr = video->intra_pred_top_cb + (mb_x << 3);
    404     *((uint32*)tmp_ptr) = *((uint32*)predCb);
    405     *((uint32*)(tmp_ptr + 4)) = *((uint32*)(predCb + 4));
    406     tmp_ptr = video->intra_pred_top_cr + (mb_x << 3);
    407     *((uint32*)tmp_ptr) = *((uint32*)predCr);
    408     *((uint32*)(tmp_ptr + 4)) = *((uint32*)(predCr + 4));
    409 
    410 
    411     /* now save last column */
    412 #ifdef USE_PRED_BLOCK
    413     pred = video->pred + 99;    /* last column*/
    414 #else
    415     pred -= ((pitch << 4) - pitch - 15);    /* last column */
    416 #endif
    417     tmp_ptr = video->intra_pred_left;
    418     tmp_word = video->intra_pred_topleft;
    419     tmp_byte = *(pred);
    420     tmp_word |= (tmp_byte << 8);
    421     tmp_byte = *(pred += pitch);
    422     tmp_word |= (tmp_byte << 16);
    423     tmp_byte = *(pred += pitch);
    424     tmp_word |= (tmp_byte << 24);
    425     *((uint32*)tmp_ptr) = tmp_word;
    426     tmp_word = *(pred += pitch);
    427     tmp_byte = *(pred += pitch);
    428     tmp_word |= (tmp_byte << 8);
    429     tmp_byte = *(pred += pitch);
    430     tmp_word |= (tmp_byte << 16);
    431     tmp_byte = *(pred += pitch);
    432     tmp_word |= (tmp_byte << 24);
    433     *((uint32*)(tmp_ptr += 4)) = tmp_word;
    434     tmp_word = *(pred += pitch);
    435     tmp_byte = *(pred += pitch);
    436     tmp_word |= (tmp_byte << 8);
    437     tmp_byte = *(pred += pitch);
    438     tmp_word |= (tmp_byte << 16);
    439     tmp_byte = *(pred += pitch);
    440     tmp_word |= (tmp_byte << 24);
    441     *((uint32*)(tmp_ptr += 4)) = tmp_word;
    442     tmp_word = *(pred += pitch);
    443     tmp_byte = *(pred += pitch);
    444     tmp_word |= (tmp_byte << 8);
    445     tmp_byte = *(pred += pitch);
    446     tmp_word |= (tmp_byte << 16);
    447     tmp_byte = *(pred += pitch);
    448     tmp_word |= (tmp_byte << 24);
    449     *((uint32*)(tmp_ptr += 4)) = tmp_word;
    450     *(tmp_ptr += 4) = *(pred += pitch);
    451 
    452     /* now for Cb */
    453 #ifdef USE_PRED_BLOCK
    454     predCb = video->pred + 459;
    455     pitch = 12;
    456 #else
    457     pitch >>= 1;
    458     predCb -= (7 * pitch - 7);
    459 #endif
    460     tmp_ptr = video->intra_pred_left_cb;
    461     tmp_word = video->intra_pred_topleft_cb;
    462     tmp_byte = *(predCb);
    463     tmp_word |= (tmp_byte << 8);
    464     tmp_byte = *(predCb += pitch);
    465     tmp_word |= (tmp_byte << 16);
    466     tmp_byte = *(predCb += pitch);
    467     tmp_word |= (tmp_byte << 24);
    468     *((uint32*)tmp_ptr) = tmp_word;
    469     tmp_word = *(predCb += pitch);
    470     tmp_byte = *(predCb += pitch);
    471     tmp_word |= (tmp_byte << 8);
    472     tmp_byte = *(predCb += pitch);
    473     tmp_word |= (tmp_byte << 16);
    474     tmp_byte = *(predCb += pitch);
    475     tmp_word |= (tmp_byte << 24);
    476     *((uint32*)(tmp_ptr += 4)) = tmp_word;
    477     *(tmp_ptr += 4) = *(predCb += pitch);
    478 
    479     /* now for Cr */
    480 #ifdef USE_PRED_BLOCK
    481     predCr = video->pred + 603;
    482 #else
    483     predCr -= (7 * pitch - 7);
    484 #endif
    485     tmp_ptr = video->intra_pred_left_cr;
    486     tmp_word = video->intra_pred_topleft_cr;
    487     tmp_byte = *(predCr);
    488     tmp_word |= (tmp_byte << 8);
    489     tmp_byte = *(predCr += pitch);
    490     tmp_word |= (tmp_byte << 16);
    491     tmp_byte = *(predCr += pitch);
    492     tmp_word |= (tmp_byte << 24);
    493     *((uint32*)tmp_ptr) = tmp_word;
    494     tmp_word = *(predCr += pitch);
    495     tmp_byte = *(predCr += pitch);
    496     tmp_word |= (tmp_byte << 8);
    497     tmp_byte = *(predCr += pitch);
    498     tmp_word |= (tmp_byte << 16);
    499     tmp_byte = *(predCr += pitch);
    500     tmp_word |= (tmp_byte << 24);
    501     *((uint32*)(tmp_ptr += 4)) = tmp_word;
    502     *(tmp_ptr += 4) = *(predCr += pitch);
    503 
    504     return ;
    505 }
    506 #endif /* MB_BASED_DEBLOCK */
    507 
    508 AVCStatus Intra_4x4(AVCCommonObj *video, int block_x, int block_y, uint8 *comp)
    509 {
    510     AVCMacroblock *currMB = video->currMB;
    511     int block_offset;
    512     AVCNeighborAvailability availability;
    513     int pitch = video->currPic->pitch;
    514 
    515 #ifdef USE_PRED_BLOCK
    516     block_offset = (block_y * 80) + (block_x << 2);
    517 #else
    518     block_offset = (block_y << 2) * pitch + (block_x << 2);
    519 #endif
    520 
    521 #ifdef MB_BASED_DEBLOCK
    522     /* boundary blocks use video->pred_intra_top, pred_intra_left, pred_intra_topleft */
    523     if (!block_x)
    524     {
    525         video->pintra_pred_left = video->intra_pred_left + 1 + (block_y << 2);
    526         pitch = 1;
    527     }
    528     else
    529     {
    530         video->pintra_pred_left = video->pred_block + block_offset - 1;
    531         pitch = video->pred_pitch;
    532     }
    533 
    534     if (!block_y)
    535     {
    536         video->pintra_pred_top = video->intra_pred_top + (block_x << 2) + (video->mb_x << 4);
    537     }
    538     else
    539     {
    540         video->pintra_pred_top = video->pred_block + block_offset - video->pred_pitch;
    541     }
    542 
    543     if (!block_x)
    544     {
    545         video->intra_pred_topleft = video->intra_pred_left[block_y<<2];
    546     }
    547     else if (!block_y)
    548     {
    549         video->intra_pred_topleft = video->intra_pred_top[(video->mb_x<<4)+(block_x<<2)-1];
    550     }
    551     else
    552     {
    553         video->intra_pred_topleft = video->pred_block[block_offset - video->pred_pitch - 1];
    554     }
    555 
    556 #else
    557     /* normal case */
    558     video->pintra_pred_top = comp - pitch;
    559     video->pintra_pred_left = comp - 1;
    560     if (video->mb_y || block_y)
    561     {
    562         video->intra_pred_topleft = *(comp - pitch - 1);
    563     }
    564 #endif
    565 
    566     switch (currMB->i4Mode[(block_y << 2) + block_x])
    567     {
    568         case AVC_I4_Vertical:       /* Intra_4x4_Vertical */
    569             if (block_y > 0 || video->intraAvailB)/* to prevent out-of-bound access*/
    570             {
    571                 Intra_4x4_Vertical(video,  block_offset);
    572             }
    573             else
    574             {
    575                 return AVC_FAIL;
    576             }
    577             break;
    578 
    579         case AVC_I4_Horizontal:     /* Intra_4x4_Horizontal */
    580             if (block_x || video->intraAvailA)  /* to prevent out-of-bound access */
    581             {
    582                 Intra_4x4_Horizontal(video, pitch, block_offset);
    583             }
    584             else
    585             {
    586                 return AVC_FAIL;
    587             }
    588             break;
    589 
    590         case AVC_I4_DC:     /* Intra_4x4_DC */
    591             availability.left = TRUE;
    592             availability.top = TRUE;
    593             if (!block_y)
    594             { /* check availability up */
    595                 availability.top = video->intraAvailB ;
    596             }
    597             if (!block_x)
    598             { /* check availability left */
    599                 availability.left = video->intraAvailA ;
    600             }
    601             Intra_4x4_DC(video, pitch, block_offset, &availability);
    602             break;
    603 
    604         case AVC_I4_Diagonal_Down_Left:     /* Intra_4x4_Diagonal_Down_Left */
    605             /* lookup table will be more appropriate for this case  */
    606             if (block_y == 0 && !video->intraAvailB)
    607             {
    608                 return AVC_FAIL;
    609             }
    610 
    611             availability.top_right = BlkTopRight[(block_y<<2) + block_x];
    612 
    613             if (availability.top_right == 2)
    614             {
    615                 availability.top_right = video->intraAvailB;
    616             }
    617             else if (availability.top_right == 3)
    618             {
    619                 availability.top_right = video->intraAvailC;
    620             }
    621 
    622             Intra_4x4_Down_Left(video, block_offset, &availability);
    623             break;
    624 
    625         case AVC_I4_Diagonal_Down_Right:        /* Intra_4x4_Diagonal_Down_Right */
    626             if ((block_y && block_x)  /* to prevent out-of-bound access */
    627                     || (block_y && video->intraAvailA)
    628                     || (block_x && video->intraAvailB)
    629                     || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
    630             {
    631                 Intra_4x4_Diagonal_Down_Right(video, pitch, block_offset);
    632             }
    633             else
    634             {
    635                 return AVC_FAIL;
    636             }
    637             break;
    638 
    639         case AVC_I4_Vertical_Right:     /* Intra_4x4_Vertical_Right */
    640             if ((block_y && block_x)  /* to prevent out-of-bound access */
    641                     || (block_y && video->intraAvailA)
    642                     || (block_x && video->intraAvailB)
    643                     || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
    644             {
    645                 Intra_4x4_Diagonal_Vertical_Right(video, pitch, block_offset);
    646             }
    647             else
    648             {
    649                 return AVC_FAIL;
    650             }
    651             break;
    652 
    653         case AVC_I4_Horizontal_Down:        /* Intra_4x4_Horizontal_Down */
    654             if ((block_y && block_x)  /* to prevent out-of-bound access */
    655                     || (block_y && video->intraAvailA)
    656                     || (block_x && video->intraAvailB)
    657                     || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
    658             {
    659                 Intra_4x4_Diagonal_Horizontal_Down(video, pitch, block_offset);
    660             }
    661             else
    662             {
    663                 return AVC_FAIL;
    664             }
    665             break;
    666 
    667         case AVC_I4_Vertical_Left:      /* Intra_4x4_Vertical_Left */
    668             /* lookup table may be more appropriate for this case  */
    669             if (block_y == 0 && !video->intraAvailB)
    670             {
    671                 return AVC_FAIL;
    672             }
    673 
    674             availability.top_right = BlkTopRight[(block_y<<2) + block_x];
    675 
    676             if (availability.top_right == 2)
    677             {
    678                 availability.top_right = video->intraAvailB;
    679             }
    680             else if (availability.top_right == 3)
    681             {
    682                 availability.top_right = video->intraAvailC;
    683             }
    684 
    685             Intra_4x4_Vertical_Left(video,  block_offset, &availability);
    686             break;
    687 
    688         case AVC_I4_Horizontal_Up:      /* Intra_4x4_Horizontal_Up */
    689             if (block_x || video->intraAvailA)
    690             {
    691                 Intra_4x4_Horizontal_Up(video, pitch, block_offset);
    692             }
    693             else
    694             {
    695                 return AVC_FAIL;
    696             }
    697             break;
    698 
    699 
    700         default:
    701 
    702             break;
    703     }
    704 
    705     return AVC_SUCCESS;
    706 }
    707 
    708 
    709 /* =============================== BEGIN 4x4
    710 MODES======================================*/
    711 void Intra_4x4_Vertical(AVCCommonObj *video,  int block_offset)
    712 {
    713     uint8 *comp_ref = video->pintra_pred_top;
    714     uint32 temp;
    715     uint8 *pred = video->pred_block + block_offset;
    716     int pred_pitch = video->pred_pitch;
    717 
    718     /*P = (int) *comp_ref++;
    719     Q = (int) *comp_ref++;
    720     R = (int) *comp_ref++;
    721     S = (int) *comp_ref++;
    722     temp = S|(R<<8)|(Q<<16)|(P<<24);*/
    723     temp = *((uint32*)comp_ref);
    724 
    725     *((uint32*)pred) =  temp; /* write 4 at a time */
    726     pred += pred_pitch;
    727     *((uint32*)pred) =  temp;
    728     pred += pred_pitch;
    729     *((uint32*)pred) =  temp;
    730     pred += pred_pitch;
    731     *((uint32*)pred) =  temp;
    732 
    733     return ;
    734 }
    735 
    736 void Intra_4x4_Horizontal(AVCCommonObj *video, int pitch, int block_offset)
    737 {
    738     uint8   *comp_ref = video->pintra_pred_left;
    739     uint32 temp;
    740     int P;
    741     uint8 *pred = video->pred_block + block_offset;
    742     int pred_pitch = video->pred_pitch;
    743 
    744     P = *comp_ref;
    745     temp = P | (P << 8);
    746     temp = temp | (temp << 16);
    747     *((uint32*)pred) = temp;
    748     pred += pred_pitch;
    749     comp_ref += pitch;
    750     P = *comp_ref;
    751     temp = P | (P << 8);
    752     temp = temp | (temp << 16);
    753     *((uint32*)pred) = temp;
    754     pred += pred_pitch;
    755     comp_ref += pitch;
    756     P = *comp_ref;
    757     temp = P | (P << 8);
    758     temp = temp | (temp << 16);
    759     *((uint32*)pred) = temp;
    760     pred += pred_pitch;
    761     comp_ref += pitch;
    762     P = *comp_ref;
    763     temp = P | (P << 8);
    764     temp = temp | (temp << 16);
    765     *((uint32*)pred) = temp;
    766 
    767     return ;
    768 }
    769 
    770 void Intra_4x4_DC(AVCCommonObj *video, int pitch, int block_offset,
    771                   AVCNeighborAvailability *availability)
    772 {
    773     uint8   *comp_ref = video->pintra_pred_left;
    774     uint32  temp;
    775     int DC;
    776     uint8 *pred = video->pred_block + block_offset;
    777     int pred_pitch = video->pred_pitch;
    778 
    779     if (availability->left)
    780     {
    781         DC = *comp_ref;
    782         comp_ref += pitch;
    783         DC += *comp_ref;
    784         comp_ref += pitch;
    785         DC += *comp_ref;
    786         comp_ref += pitch;
    787         DC += *comp_ref;
    788         comp_ref = video->pintra_pred_top;
    789 
    790         if (availability->top)
    791         {
    792             DC = (comp_ref[0] + comp_ref[1] + comp_ref[2] + comp_ref[3] + DC + 4) >> 3;
    793         }
    794         else
    795         {
    796             DC = (DC + 2) >> 2;
    797 
    798         }
    799     }
    800     else if (availability->top)
    801     {
    802         comp_ref = video->pintra_pred_top;
    803         DC = (comp_ref[0] + comp_ref[1] + comp_ref[2] + comp_ref[3] + 2) >> 2;
    804 
    805     }
    806     else
    807     {
    808         DC = 128;
    809     }
    810 
    811     temp = DC | (DC << 8);
    812     temp = temp | (temp << 16);
    813     *((uint32*)pred) = temp;
    814     pred += pred_pitch;
    815     *((uint32*)pred) = temp;
    816     pred += pred_pitch;
    817     *((uint32*)pred) = temp;
    818     pred += pred_pitch;
    819     *((uint32*)pred) = temp;
    820 
    821     return ;
    822 }
    823 
    824 void Intra_4x4_Down_Left(AVCCommonObj *video, int block_offset,
    825                          AVCNeighborAvailability *availability)
    826 {
    827     uint8   *comp_refx = video->pintra_pred_top;
    828     uint32 temp;
    829     int r0, r1, r2, r3, r4, r5, r6, r7;
    830     uint8 *pred = video->pred_block + block_offset;
    831     int pred_pitch = video->pred_pitch;
    832 
    833     r0 = *comp_refx++;
    834     r1 = *comp_refx++;
    835     r2 = *comp_refx++;
    836     r3 = *comp_refx++;
    837     if (availability->top_right)
    838     {
    839         r4 = *comp_refx++;
    840         r5 = *comp_refx++;
    841         r6 = *comp_refx++;
    842         r7 = *comp_refx++;
    843     }
    844     else
    845     {
    846         r4 = r3;
    847         r5 = r3;
    848         r6 = r3;
    849         r7 = r3;
    850     }
    851 
    852     r0 += (r1 << 1);
    853     r0 += r2;
    854     r0 += 2;
    855     r0 >>= 2;
    856     r1 += (r2 << 1);
    857     r1 += r3;
    858     r1 += 2;
    859     r1 >>= 2;
    860     r2 += (r3 << 1);
    861     r2 += r4;
    862     r2 += 2;
    863     r2 >>= 2;
    864     r3 += (r4 << 1);
    865     r3 += r5;
    866     r3 += 2;
    867     r3 >>= 2;
    868     r4 += (r5 << 1);
    869     r4 += r6;
    870     r4 += 2;
    871     r4 >>= 2;
    872     r5 += (r6 << 1);
    873     r5 += r7;
    874     r5 += 2;
    875     r5 >>= 2;
    876     r6 += (3 * r7);
    877     r6 += 2;
    878     r6 >>= 2;
    879 
    880     temp = r0 | (r1 << 8);
    881     temp |= (r2 << 16);
    882     temp |= (r3 << 24);
    883     *((uint32*)pred) = temp;
    884     pred += pred_pitch;
    885 
    886     temp = (temp >> 8) | (r4 << 24);
    887     *((uint32*)pred) = temp;
    888     pred += pred_pitch;
    889 
    890     temp = (temp >> 8) | (r5 << 24);
    891     *((uint32*)pred) = temp;
    892     pred += pred_pitch;
    893 
    894     temp = (temp >> 8) | (r6 << 24);
    895     *((uint32*)pred) = temp;
    896 
    897     return ;
    898 }
    899 
    900 void Intra_4x4_Diagonal_Down_Right(AVCCommonObj *video, int pitch, int
    901                                    block_offset)
    902 {
    903     uint8 *comp_refx = video->pintra_pred_top;
    904     uint8 *comp_refy = video->pintra_pred_left;
    905     uint32 temp;
    906     int P_x, Q_x, R_x, P_y, Q_y, R_y, D;
    907     int x0, x1, x2;
    908     uint8 *pred = video->pred_block + block_offset;
    909     int pred_pitch = video->pred_pitch;
    910 
    911     temp = *((uint32*)comp_refx); /* read 4 bytes */
    912     x0 = temp & 0xFF;
    913     x1 = (temp >> 8) & 0xFF;
    914     x2 = (temp >> 16) & 0xFF;
    915 
    916     Q_x = (x0 + 2 * x1 + x2 + 2) >> 2;
    917     R_x = (x1 + 2 * x2 + (temp >> 24) + 2) >> 2;
    918 
    919     x2 = video->intra_pred_topleft; /* re-use x2 instead of y0 */
    920     P_x = (x2 + 2 * x0 + x1 + 2) >> 2;
    921 
    922     x1 = *comp_refy;
    923     comp_refy += pitch; /* re-use x1 instead of y1 */
    924     D = (x0 + 2 * x2 + x1 + 2) >> 2;
    925 
    926     x0 = *comp_refy;
    927     comp_refy += pitch; /* re-use x0 instead of y2 */
    928     P_y = (x2 + 2 * x1 + x0 + 2) >> 2;
    929 
    930     x2 = *comp_refy;
    931     comp_refy += pitch; /* re-use x2 instead of y3 */
    932     Q_y = (x1 + 2 * x0 + x2 + 2) >> 2;
    933 
    934     x1 = *comp_refy;                    /* re-use x1 instead of y4 */
    935     R_y = (x0 + 2 * x2 + x1 + 2) >> 2;
    936 
    937     /* we can pack these  */
    938     temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
    939     //[P_y D   P_x Q_x]
    940     temp |= (Q_x << 16); //[Q_y P_y D   P_x]
    941     temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
    942     *((uint32*)pred) = temp;
    943     pred += pred_pitch;
    944 
    945     temp =  P_y | (D << 8);
    946     temp |= (P_x << 16);
    947     temp |= (Q_x << 24);
    948     *((uint32*)pred) = temp;
    949     pred += pred_pitch;
    950 
    951     temp =  Q_y | (P_y << 8);
    952     temp |= (D << 16);
    953     temp |= (P_x << 24);
    954     *((uint32*)pred) = temp;
    955     pred += pred_pitch;
    956 
    957     temp = R_y | (Q_y << 8);
    958     temp |= (P_y << 16);
    959     temp |= (D << 24);
    960     *((uint32*)pred) = temp;
    961 
    962     return ;
    963 }
    964 
    965 void    Intra_4x4_Diagonal_Vertical_Right(AVCCommonObj *video, int pitch, int block_offset)
    966 {
    967     uint8   *comp_refx = video->pintra_pred_top;
    968     uint8   *comp_refy = video->pintra_pred_left;
    969     uint32 temp;
    970     int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2, D;
    971     int x0, x1, x2;
    972     uint8 *pred = video->pred_block + block_offset;
    973     int pred_pitch = video->pred_pitch;
    974 
    975     x0 = *comp_refx++;
    976     x1 = *comp_refx++;
    977     Q0 = x0 + x1 + 1;
    978 
    979     x2 = *comp_refx++;
    980     R0 = x1 + x2 + 1;
    981 
    982     x1 = *comp_refx++; /* reuse x1 instead of x3 */
    983     S0 = x2 + x1 + 1;
    984 
    985     x1 = video->intra_pred_topleft; /* reuse x1 instead of y0 */
    986     P0 = x1 + x0 + 1;
    987 
    988     x2 = *comp_refy;
    989     comp_refy += pitch; /* reuse x2 instead of y1 */
    990     D = (x2 + 2 * x1 + x0 + 2) >> 2;
    991 
    992     P1 = (P0 + Q0) >> 2;
    993     Q1 = (Q0 + R0) >> 2;
    994     R1 = (R0 + S0) >> 2;
    995 
    996     P0 >>= 1;
    997     Q0 >>= 1;
    998     R0 >>= 1;
    999     S0 >>= 1;
   1000 
   1001     x0 = *comp_refy;
   1002     comp_refy += pitch; /* reuse x0 instead of y2 */
   1003     P2 = (x1 + 2 * x2 + x0 + 2) >> 2;
   1004     x1 = *comp_refy;
   1005     comp_refy += pitch; /* reuse x1 instead of y3 */
   1006     Q2 = (x2 + 2 * x0 + x1 + 2) >> 2;
   1007 
   1008     temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
   1009     //[D  P1 Q1 R1]
   1010     temp |= (R0 << 16); //[P2 P0 Q0 R0]
   1011     temp |= (S0 << 24); //[Q2 D  P1 Q1]
   1012     *((uint32*)pred) =  temp;
   1013     pred += pred_pitch;
   1014 
   1015     temp =  D | (P1 << 8);
   1016     temp |= (Q1 << 16);
   1017     temp |= (R1 << 24);
   1018     *((uint32*)pred) =  temp;
   1019     pred += pred_pitch;
   1020 
   1021     temp = P2 | (P0 << 8);
   1022     temp |= (Q0 << 16);
   1023     temp |= (R0 << 24);
   1024     *((uint32*)pred) =  temp;
   1025     pred += pred_pitch;
   1026 
   1027     temp = Q2 | (D << 8);
   1028     temp |= (P1 << 16);
   1029     temp |= (Q1 << 24);
   1030     *((uint32*)pred) =  temp;
   1031 
   1032     return ;
   1033 }
   1034 
   1035 void Intra_4x4_Diagonal_Horizontal_Down(AVCCommonObj *video, int pitch,
   1036                                         int block_offset)
   1037 {
   1038     uint8   *comp_refx = video->pintra_pred_top;
   1039     uint8   *comp_refy = video->pintra_pred_left;
   1040     uint32 temp;
   1041     int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2, D;
   1042     int x0, x1, x2;
   1043     uint8 *pred = video->pred_block + block_offset;
   1044     int pred_pitch = video->pred_pitch;
   1045 
   1046     x0 = *comp_refx++;
   1047     x1 = *comp_refx++;
   1048     x2 = *comp_refx++;
   1049     Q2 = (x0 + 2 * x1 + x2 + 2) >> 2;
   1050 
   1051     x2 = video->intra_pred_topleft; /* reuse x2 instead of y0 */
   1052     P2 = (x2 + 2 * x0 + x1 + 2) >> 2;
   1053 
   1054     x1 = *comp_refy;
   1055     comp_refy += pitch; /* reuse x1 instead of y1 */
   1056     D = (x1 + 2 * x2 + x0 + 2) >> 2;
   1057     P0 = x2 + x1 + 1;
   1058 
   1059     x0 = *comp_refy;
   1060     comp_refy += pitch; /* reuse x0 instead of y2 */
   1061     Q0 = x1 + x0 + 1;
   1062 
   1063     x1 = *comp_refy;
   1064     comp_refy += pitch; /* reuse x1 instead of y3 */
   1065     R0 = x0 + x1 + 1;
   1066 
   1067     x2 = *comp_refy;    /* reuse x2 instead of y4 */
   1068     S0 = x1 + x2 + 1;
   1069 
   1070     P1 = (P0 + Q0) >> 2;
   1071     Q1 = (Q0 + R0) >> 2;
   1072     R1 = (R0 + S0) >> 2;
   1073 
   1074     P0 >>= 1;
   1075     Q0 >>= 1;
   1076     R0 >>= 1;
   1077     S0 >>= 1;
   1078 
   1079 
   1080     /* we can pack these  */
   1081     temp = P0 | (D << 8);   //[P0 D  P2 Q2]
   1082     //[Q0 P1 P0 D ]
   1083     temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
   1084     temp |= (Q2 << 24); //[S0 R1 R0 Q1]
   1085     *((uint32*)pred) = temp;
   1086     pred += pred_pitch;
   1087 
   1088     temp = Q0 | (P1 << 8);
   1089     temp |= (P0 << 16);
   1090     temp |= (D << 24);
   1091     *((uint32*)pred) = temp;
   1092     pred += pred_pitch;
   1093 
   1094     temp = R0 | (Q1 << 8);
   1095     temp |= (Q0 << 16);
   1096     temp |= (P1 << 24);
   1097     *((uint32*)pred) = temp;
   1098     pred += pred_pitch;
   1099 
   1100     temp = S0 | (R1 << 8);
   1101     temp |= (R0 << 16);
   1102     temp |= (Q1 << 24);
   1103     *((uint32*)pred) = temp;
   1104 
   1105     return ;
   1106 }
   1107 
   1108 void Intra_4x4_Vertical_Left(AVCCommonObj *video, int block_offset, AVCNeighborAvailability *availability)
   1109 {
   1110     uint8   *comp_refx = video->pintra_pred_top;
   1111     uint32 temp1, temp2;
   1112     int x0, x1, x2, x3, x4, x5, x6;
   1113     uint8 *pred = video->pred_block + block_offset;
   1114     int pred_pitch = video->pred_pitch;
   1115 
   1116     x0 = *comp_refx++;
   1117     x1 = *comp_refx++;
   1118     x2 = *comp_refx++;
   1119     x3 = *comp_refx++;
   1120     if (availability->top_right)
   1121     {
   1122         x4 = *comp_refx++;
   1123         x5 = *comp_refx++;
   1124         x6 = *comp_refx++;
   1125     }
   1126     else
   1127     {
   1128         x4 = x3;
   1129         x5 = x3;
   1130         x6 = x3;
   1131     }
   1132 
   1133     x0 += x1 + 1;
   1134     x1 += x2 + 1;
   1135     x2 += x3 + 1;
   1136     x3 += x4 + 1;
   1137     x4 += x5 + 1;
   1138     x5 += x6 + 1;
   1139 
   1140     temp1 = (x0 >> 1);
   1141     temp1 |= ((x1 >> 1) << 8);
   1142     temp1 |= ((x2 >> 1) << 16);
   1143     temp1 |= ((x3 >> 1) << 24);
   1144 
   1145     *((uint32*)pred) = temp1;
   1146     pred += pred_pitch;
   1147 
   1148     temp2 = ((x0 + x1) >> 2);
   1149     temp2 |= (((x1 + x2) >> 2) << 8);
   1150     temp2 |= (((x2 + x3) >> 2) << 16);
   1151     temp2 |= (((x3 + x4) >> 2) << 24);
   1152 
   1153     *((uint32*)pred) = temp2;
   1154     pred += pred_pitch;
   1155 
   1156     temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
   1157     *((uint32*)pred) = temp1;
   1158     pred += pred_pitch;
   1159 
   1160     temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
   1161     *((uint32*)pred) = temp2;
   1162     pred += pred_pitch;
   1163 
   1164     return ;
   1165 }
   1166 
   1167 void Intra_4x4_Horizontal_Up(AVCCommonObj *video, int pitch, int block_offset)
   1168 {
   1169     uint8   *comp_refy = video->pintra_pred_left;
   1170     uint32 temp;
   1171     int Q0, R0, Q1, D0, D1, P0, P1;
   1172     int y0, y1, y2, y3;
   1173     uint8 *pred = video->pred_block + block_offset;
   1174     int pred_pitch = video->pred_pitch;
   1175 
   1176     y0 = *comp_refy;
   1177     comp_refy += pitch;
   1178     y1 = *comp_refy;
   1179     comp_refy += pitch;
   1180     y2 = *comp_refy;
   1181     comp_refy += pitch;
   1182     y3 = *comp_refy;
   1183 
   1184     Q0 = (y1 + y2 + 1) >> 1;
   1185     Q1 = (y1 + (y2 << 1) + y3 + 2) >> 2;
   1186     P0 = ((y0 + y1 + 1) >> 1);
   1187     P1 = ((y0 + (y1 << 1) + y2 + 2) >> 2);
   1188 
   1189     temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
   1190     temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
   1191     temp |= (Q1 << 24);     // [R0 D0 D1 D1]
   1192     *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
   1193     pred += pred_pitch;
   1194 
   1195     D0 = (y2 + 3 * y3 + 2) >> 2;
   1196     R0 = (y2 + y3 + 1) >> 1;
   1197 
   1198     temp = Q0 | (Q1 << 8);
   1199     temp |= (R0 << 16);
   1200     temp |= (D0 << 24);
   1201     *((uint32*)pred) = temp;
   1202     pred += pred_pitch;
   1203 
   1204     D1 = y3;
   1205 
   1206     temp = R0 | (D0 << 8);
   1207     temp |= (D1 << 16);
   1208     temp |= (D1 << 24);
   1209     *((uint32*)pred) = temp;
   1210     pred += pred_pitch;
   1211 
   1212     temp = D1 | (D1 << 8);
   1213     temp |= (temp << 16);
   1214     *((uint32*)pred) = temp;
   1215 
   1216     return ;
   1217 }
   1218 /* =============================== END 4x4 MODES======================================*/
   1219 void  Intra_16x16_Vertical(AVCCommonObj *video)
   1220 {
   1221     int i;
   1222     uint32 temp1, temp2, temp3, temp4;
   1223     uint8   *comp_ref = video->pintra_pred_top;
   1224     uint8 *pred = video->pred_block;
   1225     int pred_pitch = video->pred_pitch;
   1226 
   1227     temp1 = *((uint32*)comp_ref);
   1228     comp_ref += 4;
   1229 
   1230     temp2 = *((uint32*)comp_ref);
   1231     comp_ref += 4;
   1232 
   1233     temp3 = *((uint32*)comp_ref);
   1234     comp_ref += 4;
   1235 
   1236     temp4 = *((uint32*)comp_ref);
   1237     comp_ref += 4;
   1238 
   1239     i = 16;
   1240     while (i > 0)
   1241     {
   1242         *((uint32*)pred) = temp1;
   1243         *((uint32*)(pred + 4)) = temp2;
   1244         *((uint32*)(pred + 8)) = temp3;
   1245         *((uint32*)(pred + 12)) = temp4;
   1246         pred += pred_pitch;
   1247         i--;
   1248     }
   1249 
   1250     return ;
   1251 }
   1252 
   1253 void Intra_16x16_Horizontal(AVCCommonObj *video, int pitch)
   1254 {
   1255     int i;
   1256     uint32 temp;
   1257     uint8 *comp_ref = video->pintra_pred_left;
   1258     uint8 *pred = video->pred_block;
   1259     int pred_pitch = video->pred_pitch;
   1260 
   1261     for (i = 0; i < 16; i++)
   1262     {
   1263         temp = *comp_ref;
   1264         temp |= (temp << 8);
   1265         temp |= (temp << 16);
   1266         *((uint32*)pred) = temp;
   1267         *((uint32*)(pred + 4)) = temp;
   1268         *((uint32*)(pred + 8)) = temp;
   1269         *((uint32*)(pred + 12)) = temp;
   1270         pred += pred_pitch;
   1271         comp_ref += pitch;
   1272     }
   1273 }
   1274 
   1275 
   1276 void  Intra_16x16_DC(AVCCommonObj *video, int pitch)
   1277 {
   1278     int i;
   1279     uint32 temp, temp2;
   1280     uint8 *comp_ref_x = video->pintra_pred_top;
   1281     uint8 *comp_ref_y = video->pintra_pred_left;
   1282     int sum = 0;
   1283     uint8 *pred = video->pred_block;
   1284     int pred_pitch = video->pred_pitch;
   1285 
   1286     if (video->intraAvailB)
   1287     {
   1288         temp = *((uint32*)comp_ref_x);
   1289         comp_ref_x += 4;
   1290         temp2 = (temp >> 8) & 0xFF00FF;
   1291         temp &= 0xFF00FF;
   1292         temp += temp2;
   1293         sum = temp + (temp >> 16);
   1294         temp = *((uint32*)comp_ref_x);
   1295         comp_ref_x += 4;
   1296         temp2 = (temp >> 8) & 0xFF00FF;
   1297         temp &= 0xFF00FF;
   1298         temp += temp2;
   1299         sum += temp + (temp >> 16);
   1300         temp = *((uint32*)comp_ref_x);
   1301         comp_ref_x += 4;
   1302         temp2 = (temp >> 8) & 0xFF00FF;
   1303         temp &= 0xFF00FF;
   1304         temp += temp2;
   1305         sum += temp + (temp >> 16);
   1306         temp = *((uint32*)comp_ref_x);
   1307         comp_ref_x += 4;
   1308         temp2 = (temp >> 8) & 0xFF00FF;
   1309         temp &= 0xFF00FF;
   1310         temp += temp2;
   1311         sum += temp + (temp >> 16);
   1312         sum &= 0xFFFF;
   1313 
   1314         if (video->intraAvailA)
   1315         {
   1316             for (i = 0; i < 16; i++)
   1317             {
   1318                 sum += (*comp_ref_y);
   1319                 comp_ref_y += pitch;
   1320             }
   1321             sum = (sum + 16) >> 5;
   1322         }
   1323         else
   1324         {
   1325             sum = (sum + 8) >> 4;
   1326         }
   1327     }
   1328     else if (video->intraAvailA)
   1329     {
   1330         for (i = 0; i < 16; i++)
   1331         {
   1332             sum += *comp_ref_y;
   1333             comp_ref_y += pitch;
   1334         }
   1335         sum = (sum + 8) >> 4;
   1336     }
   1337     else
   1338     {
   1339         sum = 128;
   1340     }
   1341 
   1342     temp = sum | (sum << 8);
   1343     temp |= (temp << 16);
   1344 
   1345     for (i = 0; i < 16; i++)
   1346     {
   1347         *((uint32*)pred) = temp;
   1348         *((uint32*)(pred + 4)) = temp;
   1349         *((uint32*)(pred + 8)) = temp;
   1350         *((uint32*)(pred + 12)) = temp;
   1351         pred += pred_pitch;
   1352     }
   1353 
   1354 }
   1355 
   1356 void Intra_16x16_Plane(AVCCommonObj *video, int pitch)
   1357 {
   1358     int i, a_16, b, c, factor_c;
   1359     uint8 *comp_ref_x = video->pintra_pred_top;
   1360     uint8 *comp_ref_y = video->pintra_pred_left;
   1361     uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
   1362     int H = 0, V = 0 , tmp;
   1363     uint8 *pred = video->pred_block;
   1364     uint32 temp;
   1365     uint8 byte1, byte2, byte3;
   1366     int value;
   1367     int pred_pitch = video->pred_pitch;
   1368 
   1369     comp_ref_x0 = comp_ref_x + 8;
   1370     comp_ref_x1 = comp_ref_x + 6;
   1371     comp_ref_y0 = comp_ref_y + (pitch << 3);
   1372     comp_ref_y1 = comp_ref_y + 6 * pitch;
   1373 
   1374     for (i = 1; i < 8; i++)
   1375     {
   1376         H += i * (*comp_ref_x0++ - *comp_ref_x1--);
   1377         V += i * (*comp_ref_y0 - *comp_ref_y1);
   1378         comp_ref_y0 += pitch;
   1379         comp_ref_y1 -= pitch;
   1380     }
   1381 
   1382     H += i * (*comp_ref_x0++ - video->intra_pred_topleft);
   1383     V += i * (*comp_ref_y0 - *comp_ref_y1);
   1384 
   1385 
   1386     a_16 = ((*(comp_ref_x + 15) + *(comp_ref_y + 15 * pitch)) << 4) + 16;;
   1387     b = (5 * H + 32) >> 6;
   1388     c = (5 * V + 32) >> 6;
   1389 
   1390     tmp = 0;
   1391 
   1392     for (i = 0; i < 16; i++)
   1393     {
   1394         factor_c = a_16 + c * (tmp++ - 7);
   1395 
   1396         factor_c -= 7 * b;
   1397 
   1398         value = factor_c >> 5;
   1399         factor_c += b;
   1400         CLIP_RESULT(value)
   1401         byte1 = value;
   1402         value = factor_c >> 5;
   1403         factor_c += b;
   1404         CLIP_RESULT(value)
   1405         byte2 = value;
   1406         value = factor_c >> 5;
   1407         factor_c += b;
   1408         CLIP_RESULT(value)
   1409         byte3 = value;
   1410         value = factor_c >> 5;
   1411         factor_c += b;
   1412         CLIP_RESULT(value)
   1413         temp = byte1 | (byte2 << 8);
   1414         temp |= (byte3 << 16);
   1415         temp |= (value << 24);
   1416         *((uint32*)pred) = temp;
   1417 
   1418         value = factor_c >> 5;
   1419         factor_c += b;
   1420         CLIP_RESULT(value)
   1421         byte1 = value;
   1422         value = factor_c >> 5;
   1423         factor_c += b;
   1424         CLIP_RESULT(value)
   1425         byte2 = value;
   1426         value = factor_c >> 5;
   1427         factor_c += b;
   1428         CLIP_RESULT(value)
   1429         byte3 = value;
   1430         value = factor_c >> 5;
   1431         factor_c += b;
   1432         CLIP_RESULT(value)
   1433         temp = byte1 | (byte2 << 8);
   1434         temp |= (byte3 << 16);
   1435         temp |= (value << 24);
   1436         *((uint32*)(pred + 4)) = temp;
   1437 
   1438         value = factor_c >> 5;
   1439         factor_c += b;
   1440         CLIP_RESULT(value)
   1441         byte1 = value;
   1442         value = factor_c >> 5;
   1443         factor_c += b;
   1444         CLIP_RESULT(value)
   1445         byte2 = value;
   1446         value = factor_c >> 5;
   1447         factor_c += b;
   1448         CLIP_RESULT(value)
   1449         byte3 = value;
   1450         value = factor_c >> 5;
   1451         factor_c += b;
   1452         CLIP_RESULT(value)
   1453         temp = byte1 | (byte2 << 8);
   1454         temp |= (byte3 << 16);
   1455         temp |= (value << 24);
   1456         *((uint32*)(pred + 8)) = temp;
   1457 
   1458         value = factor_c >> 5;
   1459         factor_c += b;
   1460         CLIP_RESULT(value)
   1461         byte1 = value;
   1462         value = factor_c >> 5;
   1463         factor_c += b;
   1464         CLIP_RESULT(value)
   1465         byte2 = value;
   1466         value = factor_c >> 5;
   1467         factor_c += b;
   1468         CLIP_RESULT(value)
   1469         byte3 = value;
   1470         value = factor_c >> 5;
   1471         CLIP_RESULT(value)
   1472         temp = byte1 | (byte2 << 8);
   1473         temp |= (byte3 << 16);
   1474         temp |= (value << 24);
   1475         *((uint32*)(pred + 12)) = temp;
   1476         pred += pred_pitch;
   1477     }
   1478 }
   1479 
   1480 /************** Chroma intra prediction *********************/
   1481 
   1482 void Intra_Chroma_DC(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
   1483 {
   1484     int i;
   1485     uint32 temp, temp2, pred_a, pred_b;
   1486     uint8 *comp_ref_x, *comp_ref_y;
   1487     uint8 *comp_ref_cb_x = video->pintra_pred_top_cb;
   1488     uint8 *comp_ref_cb_y = video->pintra_pred_left_cb;
   1489     uint8 *comp_ref_cr_x = video->pintra_pred_top_cr;
   1490     uint8 *comp_ref_cr_y = video->pintra_pred_left_cr;
   1491     int  component, j;
   1492     int  sum_x0, sum_x1, sum_y0, sum_y1;
   1493     int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
   1494     int pred_pitch = video->pred_pitch;
   1495     uint8 *pred;
   1496 
   1497     if (video->intraAvailB & video->intraAvailA)
   1498     {
   1499         comp_ref_x = comp_ref_cb_x;
   1500         comp_ref_y = comp_ref_cb_y;
   1501         for (i = 0; i < 2; i++)
   1502         {
   1503             temp = *((uint32*)comp_ref_x);
   1504             comp_ref_x += 4;
   1505             temp2 = (temp >> 8) & 0xFF00FF;
   1506             temp &= 0xFF00FF;
   1507             temp += temp2;
   1508             temp += (temp >> 16);
   1509             sum_x0 = temp & 0xFFFF;
   1510 
   1511             temp = *((uint32*)comp_ref_x);
   1512             temp2 = (temp >> 8) & 0xFF00FF;
   1513             temp &= 0xFF00FF;
   1514             temp += temp2;
   1515             temp += (temp >> 16);
   1516             sum_x1 = temp & 0xFFFF;
   1517 
   1518             pred_1[i] = (sum_x1 + 2) >> 2;
   1519 
   1520             sum_y0 = *comp_ref_y;
   1521             sum_y0 += *(comp_ref_y += pitch);
   1522             sum_y0 += *(comp_ref_y += pitch);
   1523             sum_y0 += *(comp_ref_y += pitch);
   1524 
   1525             sum_y1 = *(comp_ref_y += pitch);
   1526             sum_y1 += *(comp_ref_y += pitch);
   1527             sum_y1 += *(comp_ref_y += pitch);
   1528             sum_y1 += *(comp_ref_y += pitch);
   1529 
   1530             pred_2[i] = (sum_y1 + 2) >> 2;
   1531 
   1532             pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
   1533             pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
   1534 
   1535             comp_ref_x = comp_ref_cr_x;
   1536             comp_ref_y = comp_ref_cr_y;
   1537         }
   1538     }
   1539 
   1540     else if (video->intraAvailA)
   1541     {
   1542         comp_ref_y = comp_ref_cb_y;
   1543         for (i = 0; i < 2; i++)
   1544         {
   1545             sum_y0 = *comp_ref_y;
   1546             sum_y0 += *(comp_ref_y += pitch);
   1547             sum_y0 += *(comp_ref_y += pitch);
   1548             sum_y0 += *(comp_ref_y += pitch);
   1549 
   1550             sum_y1 = *(comp_ref_y += pitch);
   1551             sum_y1 += *(comp_ref_y += pitch);
   1552             sum_y1 += *(comp_ref_y += pitch);
   1553             sum_y1 += *(comp_ref_y += pitch);
   1554 
   1555             pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
   1556             pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
   1557             comp_ref_y = comp_ref_cr_y;
   1558         }
   1559     }
   1560     else if (video->intraAvailB)
   1561     {
   1562         comp_ref_x = comp_ref_cb_x;
   1563         for (i = 0; i < 2; i++)
   1564         {
   1565             temp = *((uint32*)comp_ref_x);
   1566             comp_ref_x += 4;
   1567             temp2 = (temp >> 8) & 0xFF00FF;
   1568             temp &= 0xFF00FF;
   1569             temp += temp2;
   1570             temp += (temp >> 16);
   1571             sum_x0 = temp & 0xFFFF;
   1572 
   1573             temp = *((uint32*)comp_ref_x);
   1574             temp2 = (temp >> 8) & 0xFF00FF;
   1575             temp &= 0xFF00FF;
   1576             temp += temp2;
   1577             temp += (temp >> 16);
   1578             sum_x1 = temp & 0xFFFF;
   1579 
   1580             pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
   1581             pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
   1582             comp_ref_x = comp_ref_cr_x;
   1583         }
   1584     }
   1585     else
   1586     {
   1587         pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
   1588                                                 pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
   1589     }
   1590 
   1591     pred = predCb;
   1592     for (component = 0; component < 2; component++)
   1593     {
   1594         pred_a = pred_0[component];
   1595         pred_b = pred_1[component];
   1596         pred_a |= (pred_a << 8);
   1597         pred_a |= (pred_a << 16);
   1598         pred_b |= (pred_b << 8);
   1599         pred_b |= (pred_b << 16);
   1600 
   1601         for (i = 4; i < 6; i++)
   1602         {
   1603             for (j = 0; j < 4; j++) /* 4 lines */
   1604             {
   1605                 *((uint32*)pred) = pred_a;
   1606                 *((uint32*)(pred + 4)) = pred_b;
   1607                 pred += pred_pitch; /* move to the next line */
   1608             }
   1609             pred_a = pred_2[component];
   1610             pred_b = pred_3[component];
   1611             pred_a |= (pred_a << 8);
   1612             pred_a |= (pred_a << 16);
   1613             pred_b |= (pred_b << 8);
   1614             pred_b |= (pred_b << 16);
   1615         }
   1616         pred = predCr; /* point to cr */
   1617     }
   1618 }
   1619 
   1620 void  Intra_Chroma_Horizontal(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
   1621 {
   1622     int i;
   1623     uint32 temp;
   1624     uint8   *comp_ref_cb_y = video->pintra_pred_left_cb;
   1625     uint8   *comp_ref_cr_y = video->pintra_pred_left_cr;
   1626     uint8  *comp;
   1627     int component, j;
   1628     int     pred_pitch = video->pred_pitch;
   1629     uint8   *pred;
   1630 
   1631     comp = comp_ref_cb_y;
   1632     pred = predCb;
   1633     for (component = 0; component < 2; component++)
   1634     {
   1635         for (i = 4; i < 6; i++)
   1636         {
   1637             for (j = 0; j < 4; j++)
   1638             {
   1639                 temp = *comp;
   1640                 comp += pitch;
   1641                 temp |= (temp << 8);
   1642                 temp |= (temp << 16);
   1643                 *((uint32*)pred) = temp;
   1644                 *((uint32*)(pred + 4)) = temp;
   1645                 pred += pred_pitch;
   1646             }
   1647         }
   1648         comp = comp_ref_cr_y;
   1649         pred = predCr; /* point to cr */
   1650     }
   1651 
   1652 }
   1653 
   1654 void  Intra_Chroma_Vertical(AVCCommonObj *video, uint8 *predCb, uint8 *predCr)
   1655 {
   1656     uint32  temp1, temp2;
   1657     uint8   *comp_ref_cb_x = video->pintra_pred_top_cb;
   1658     uint8   *comp_ref_cr_x = video->pintra_pred_top_cr;
   1659     uint8   *comp_ref;
   1660     int     component, j;
   1661     int     pred_pitch = video->pred_pitch;
   1662     uint8   *pred;
   1663 
   1664     comp_ref = comp_ref_cb_x;
   1665     pred = predCb;
   1666     for (component = 0; component < 2; component++)
   1667     {
   1668         temp1 = *((uint32*)comp_ref);
   1669         temp2 = *((uint32*)(comp_ref + 4));
   1670         for (j = 0; j < 8; j++)
   1671         {
   1672             *((uint32*)pred) = temp1;
   1673             *((uint32*)(pred + 4)) = temp2;
   1674             pred += pred_pitch;
   1675         }
   1676         comp_ref = comp_ref_cr_x;
   1677         pred = predCr; /* point to cr */
   1678     }
   1679 
   1680 }
   1681 
   1682 void  Intra_Chroma_Plane(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
   1683 {
   1684     int i;
   1685     int a_16_C[2], b_C[2], c_C[2], a_16, b, c, factor_c;
   1686     uint8 *comp_ref_x, *comp_ref_y, *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
   1687     int component, j;
   1688     int H, V, tmp;
   1689     uint32 temp;
   1690     uint8 byte1, byte2, byte3;
   1691     int value;
   1692     uint8 topleft;
   1693     int pred_pitch = video->pred_pitch;
   1694     uint8 *pred;
   1695 
   1696     comp_ref_x = video->pintra_pred_top_cb;
   1697     comp_ref_y = video->pintra_pred_left_cb;
   1698     topleft = video->intra_pred_topleft_cb;
   1699 
   1700     for (component = 0; component < 2; component++)
   1701     {
   1702         H = V = 0;
   1703         comp_ref_x0 = comp_ref_x + 4;
   1704         comp_ref_x1 = comp_ref_x + 2;
   1705         comp_ref_y0 = comp_ref_y + (pitch << 2);
   1706         comp_ref_y1 = comp_ref_y + (pitch << 1);
   1707         for (i = 1; i < 4; i++)
   1708         {
   1709             H += i * (*comp_ref_x0++ - *comp_ref_x1--);
   1710             V += i * (*comp_ref_y0 - *comp_ref_y1);
   1711             comp_ref_y0 += pitch;
   1712             comp_ref_y1 -= pitch;
   1713         }
   1714         H += i * (*comp_ref_x0++ - topleft);
   1715         V += i * (*comp_ref_y0 - *comp_ref_y1);
   1716 
   1717         a_16_C[component] = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
   1718         b_C[component] = (17 * H + 16) >> 5;
   1719         c_C[component] = (17 * V + 16) >> 5;
   1720 
   1721         comp_ref_x = video->pintra_pred_top_cr;
   1722         comp_ref_y = video->pintra_pred_left_cr;
   1723         topleft = video->intra_pred_topleft_cr;
   1724     }
   1725 
   1726     pred = predCb;
   1727     for (component = 0; component < 2; component++)
   1728     {
   1729         a_16 = a_16_C[component];
   1730         b = b_C[component];
   1731         c = c_C[component];
   1732         tmp = 0;
   1733         for (i = 4; i < 6; i++)
   1734         {
   1735             for (j = 0; j < 4; j++)
   1736             {
   1737                 factor_c = a_16 + c * (tmp++ - 3);
   1738 
   1739                 factor_c -= 3 * b;
   1740 
   1741                 value = factor_c >> 5;
   1742                 factor_c += b;
   1743                 CLIP_RESULT(value)
   1744                 byte1 = value;
   1745                 value = factor_c >> 5;
   1746                 factor_c += b;
   1747                 CLIP_RESULT(value)
   1748                 byte2 = value;
   1749                 value = factor_c >> 5;
   1750                 factor_c += b;
   1751                 CLIP_RESULT(value)
   1752                 byte3 = value;
   1753                 value = factor_c >> 5;
   1754                 factor_c += b;
   1755                 CLIP_RESULT(value)
   1756                 temp = byte1 | (byte2 << 8);
   1757                 temp |= (byte3 << 16);
   1758                 temp |= (value << 24);
   1759                 *((uint32*)pred) = temp;
   1760 
   1761                 value = factor_c >> 5;
   1762                 factor_c += b;
   1763                 CLIP_RESULT(value)
   1764                 byte1 = value;
   1765                 value = factor_c >> 5;
   1766                 factor_c += b;
   1767                 CLIP_RESULT(value)
   1768                 byte2 = value;
   1769                 value = factor_c >> 5;
   1770                 factor_c += b;
   1771                 CLIP_RESULT(value)
   1772                 byte3 = value;
   1773                 value = factor_c >> 5;
   1774                 factor_c += b;
   1775                 CLIP_RESULT(value)
   1776                 temp = byte1 | (byte2 << 8);
   1777                 temp |= (byte3 << 16);
   1778                 temp |= (value << 24);
   1779                 *((uint32*)(pred + 4)) = temp;
   1780                 pred += pred_pitch;
   1781             }
   1782         }
   1783         pred = predCr; /* point to cr */
   1784     }
   1785 }
   1786 
   1787