Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "avcenc_lib.h"
     19 
     20 #define TH_I4  0  /* threshold biasing toward I16 mode instead of I4 mode */
     21 #define TH_Intra  0 /* threshold biasing toward INTER mode instead of intra mode */
     22 
     23 #define FIXED_INTRAPRED_MODE  AVC_I16
     24 #define FIXED_I16_MODE  AVC_I16_DC
     25 #define FIXED_I4_MODE   AVC_I4_Diagonal_Down_Left
     26 #define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
     27 
     28 #define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
     29                  x = 0xFF & (~(x>>31));}
     30 
     31 
     32 bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
     33 {
     34     AVCCommonObj *video = encvid->common;
     35     AVCFrameIO *currInput = encvid->currInput;
     36     int orgPitch = currInput->pitch;
     37     int x_pos = (video->mb_x) << 4;
     38     int y_pos = (video->mb_y) << 4;
     39     uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
     40     int j;
     41     uint8 *topL, *leftL, *orgY_2, *orgY_3;
     42     int temp, SBE, offset;
     43     OsclFloat ABE;
     44     bool intra = true;
     45 
     46     if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
     47             ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
     48             video->intraAvailA &&
     49             video->intraAvailB)
     50     {
     51         SBE = 0;
     52         /* top neighbor */
     53         topL = curL - picPitch;
     54         /* left neighbor */
     55         leftL = curL - 1;
     56         orgY_2 = orgY - orgPitch;
     57 
     58         for (j = 0; j < 16; j++)
     59         {
     60             temp = *topL++ - orgY[j];
     61             SBE += ((temp >= 0) ? temp : -temp);
     62             temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
     63             SBE += ((temp >= 0) ? temp : -temp);
     64         }
     65 
     66         /* calculate chroma */
     67         offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
     68         topL = video->currPic->Scb + offset;
     69         orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
     70 
     71         leftL = topL - 1;
     72         topL -= (picPitch >> 1);
     73         orgY_3 = orgY_2 - (orgPitch >> 1);
     74         for (j = 0; j < 8; j++)
     75         {
     76             temp = *topL++ - orgY_2[j];
     77             SBE += ((temp >= 0) ? temp : -temp);
     78             temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
     79             SBE += ((temp >= 0) ? temp : -temp);
     80         }
     81 
     82         topL = video->currPic->Scr + offset;
     83         orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
     84 
     85         leftL = topL - 1;
     86         topL -= (picPitch >> 1);
     87         orgY_3 = orgY_2 - (orgPitch >> 1);
     88         for (j = 0; j < 8; j++)
     89         {
     90             temp = *topL++ - orgY_2[j];
     91             SBE += ((temp >= 0) ? temp : -temp);
     92             temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
     93             SBE += ((temp >= 0) ? temp : -temp);
     94         }
     95 
     96         /* compare mincost/384 and SBE/64 */
     97         ABE = SBE / 64.0;
     98         if (ABE*0.8 >= min_cost / 384.0)
     99         {
    100             intra = false;
    101         }
    102     }
    103 
    104     return intra;
    105 }
    106 
    107 /* perform searching for MB mode */
    108 /* assuming that this is done inside the encoding loop,
    109 no need to call InitNeighborAvailability */
    110 
    111 void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
    112 {
    113     AVCCommonObj *video = encvid->common;
    114     AVCFrameIO *currInput = encvid->currInput;
    115     AVCMacroblock *currMB = video->currMB;
    116     int min_cost;
    117     uint8 *orgY;
    118     int x_pos = (video->mb_x) << 4;
    119     int y_pos = (video->mb_y) << 4;
    120     uint32 *saved_inter;
    121     int j;
    122     int orgPitch = currInput->pitch;
    123     bool intra = true;
    124 
    125     currMB->CBP = 0;
    126 
    127     /* first do motion vector and variable block size search */
    128     min_cost = encvid->min_cost[mbnum];
    129 
    130     /* now perform intra prediction search */
    131     /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
    132        if it's not worth checking. */
    133     if (video->slice_type == AVC_P_SLICE)
    134     {
    135         /* Decide whether intra search is necessary or not */
    136         /* This one, we do it in the encoding loop so the neighboring pixel are the
    137         actual reconstructed pixels. */
    138         intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
    139     }
    140 
    141     if (intra == true || video->slice_type == AVC_I_SLICE)
    142     {
    143         orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
    144 
    145         /* i16 mode search */
    146         /* generate all the predictions */
    147         intrapred_luma_16x16(encvid);
    148 
    149         /* evaluate them one by one */
    150         find_cost_16x16(encvid, orgY, &min_cost);
    151 
    152         if (video->slice_type == AVC_P_SLICE)
    153         {
    154             /* save current inter prediction */
    155             saved_inter = encvid->subpel_pred; /* reuse existing buffer */
    156             j = 16;
    157             curL -= 4;
    158             picPitch -= 16;
    159             while (j--)
    160             {
    161                 *saved_inter++ = *((uint32*)(curL += 4));
    162                 *saved_inter++ = *((uint32*)(curL += 4));
    163                 *saved_inter++ = *((uint32*)(curL += 4));
    164                 *saved_inter++ = *((uint32*)(curL += 4));
    165                 curL += picPitch;
    166             }
    167 
    168         }
    169 
    170         /* i4 mode search */
    171         mb_intra4x4_search(encvid, &min_cost);
    172 
    173         encvid->min_cost[mbnum] = min_cost; /* update min_cost */
    174     }
    175 
    176 
    177     if (currMB->mb_intra)
    178     {
    179         chroma_intra_search(encvid);
    180 
    181         /* need to set this in order for the MBInterPrediction to work!! */
    182         memset(currMB->mvL0, 0, sizeof(int32)*16);
    183         currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
    184                                     currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
    185     }
    186     else if (video->slice_type == AVC_P_SLICE && intra == true)
    187     {
    188         /* restore current inter prediction */
    189         saved_inter = encvid->subpel_pred; /* reuse existing buffer */
    190         j = 16;
    191         curL -= ((picPitch + 16) << 4);
    192         while (j--)
    193         {
    194             *((uint32*)(curL += 4)) = *saved_inter++;
    195             *((uint32*)(curL += 4)) = *saved_inter++;
    196             *((uint32*)(curL += 4)) = *saved_inter++;
    197             *((uint32*)(curL += 4)) = *saved_inter++;
    198             curL += picPitch;
    199         }
    200     }
    201 
    202     return ;
    203 }
    204 
    205 /* generate all the prediction values */
    206 void intrapred_luma_16x16(AVCEncObject *encvid)
    207 {
    208     AVCCommonObj *video = encvid->common;
    209     AVCPictureData *currPic = video->currPic;
    210 
    211     int x_pos = (video->mb_x) << 4;
    212     int y_pos = (video->mb_y) << 4;
    213     int pitch = currPic->pitch;
    214 
    215     int offset = y_pos * pitch + x_pos;
    216 
    217     uint8 *pred, *top, *left;
    218     uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
    219     uint32 word1, word2, word3, word4;
    220     uint32 sum = 0;
    221 
    222     int a_16, b, c, factor_c;
    223     uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
    224     int H = 0, V = 0, tmp, value;
    225     int i;
    226 
    227     if (video->intraAvailB)
    228     {
    229         //get vertical prediction mode
    230         top = curL - pitch;
    231 
    232         pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
    233 
    234         word1 = *((uint32*)(top));  /* read 4 bytes from top */
    235         word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
    236         word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
    237         word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
    238 
    239         for (i = 0; i < 16; i++)
    240         {
    241             *((uint32*)(pred += 16)) = word1;
    242             *((uint32*)(pred + 4)) = word2;
    243             *((uint32*)(pred + 8)) = word3;
    244             *((uint32*)(pred + 12)) = word4;
    245 
    246         }
    247 
    248         sum = word1 & 0xFF00FF;
    249         word1 = (word1 >> 8) & 0xFF00FF;
    250         sum += word1;
    251         word1 = (word2 & 0xFF00FF);
    252         sum += word1;
    253         word2 = (word2 >> 8) & 0xFF00FF;
    254         sum += word2;
    255         word1 = (word3 & 0xFF00FF);
    256         sum += word1;
    257         word3 = (word3 >> 8) & 0xFF00FF;
    258         sum += word3;
    259         word1 = (word4 & 0xFF00FF);
    260         sum += word1;
    261         word4 = (word4 >> 8) & 0xFF00FF;
    262         sum += word4;
    263 
    264         sum += (sum >> 16);
    265         sum &= 0xFFFF;
    266 
    267         if (!video->intraAvailA)
    268         {
    269             sum = (sum + 8) >> 4;
    270         }
    271     }
    272 
    273     if (video->intraAvailA)
    274     {
    275         // get horizontal mode
    276         left = curL - 1 - pitch;
    277 
    278         pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
    279 
    280         for (i = 0; i < 16; i++)
    281         {
    282             word1 = *(left += pitch);
    283             sum += word1;
    284 
    285             word1 = (word1 << 8) | word1;
    286             word1 = (word1 << 16) | word1; /* make it 4 */
    287 
    288             *(uint32*)(pred += 16) = word1;
    289             *(uint32*)(pred + 4) = word1;
    290             *(uint32*)(pred + 8) = word1;
    291             *(uint32*)(pred + 12) = word1;
    292         }
    293 
    294         if (!video->intraAvailB)
    295         {
    296             sum = (sum + 8) >> 4;
    297         }
    298         else
    299         {
    300             sum = (sum + 16) >> 5;
    301         }
    302     }
    303 
    304     // get DC mode
    305     if (!video->intraAvailA && !video->intraAvailB)
    306     {
    307         sum = 0x80808080;
    308     }
    309     else
    310     {
    311         sum = (sum << 8) | sum;
    312         sum = (sum << 16) | sum;
    313     }
    314 
    315     pred = encvid->pred_i16[AVC_I16_DC] - 16;
    316     for (i = 0; i < 16; i++)
    317     {
    318         *((uint32*)(pred += 16)) = sum;
    319         *((uint32*)(pred + 4)) = sum;
    320         *((uint32*)(pred + 8)) = sum;
    321         *((uint32*)(pred + 12)) = sum;
    322     }
    323 
    324     // get plane mode
    325     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
    326     {
    327         pred = encvid->pred_i16[AVC_I16_Plane] - 16;
    328 
    329         comp_ref_x0 = curL - pitch + 8;
    330         comp_ref_x1 = curL - pitch + 6;
    331         comp_ref_y0 = curL - 1 + (pitch << 3);
    332         comp_ref_y1 = curL - 1 + 6 * pitch;
    333 
    334         for (i = 1; i < 8; i++)
    335         {
    336             H += i * (*comp_ref_x0++ - *comp_ref_x1--);
    337             V += i * (*comp_ref_y0 - *comp_ref_y1);
    338             comp_ref_y0 += pitch;
    339             comp_ref_y1 -= pitch;
    340         }
    341 
    342         H += i * (*comp_ref_x0++ - curL[-pitch-1]);
    343         V += i * (*comp_ref_y0 - *comp_ref_y1);
    344 
    345 
    346         a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
    347         b = (5 * H + 32) >> 6;
    348         c = (5 * V + 32) >> 6;
    349 
    350         tmp = 0;
    351         for (i = 0; i < 16; i++)
    352         {
    353             factor_c = a_16 + c * (tmp++ - 7);
    354             factor_c -= 7 * b;
    355 
    356             value = factor_c >> 5;
    357             factor_c += b;
    358             CLIP_RESULT(value)
    359             word1 = value;
    360             value = factor_c >> 5;
    361             factor_c += b;
    362             CLIP_RESULT(value)
    363             word1 = (word1) | (value << 8);
    364             value = factor_c >> 5;
    365             factor_c += b;
    366             CLIP_RESULT(value)
    367             word1 = (word1) | (value << 16);
    368             value = factor_c >> 5;
    369             factor_c += b;
    370             CLIP_RESULT(value)
    371             word1 = (word1) | (value << 24);
    372             *((uint32*)(pred += 16)) = word1;
    373             value = factor_c >> 5;
    374             factor_c += b;
    375             CLIP_RESULT(value)
    376             word1 = value;
    377             value = factor_c >> 5;
    378             factor_c += b;
    379             CLIP_RESULT(value)
    380             word1 = (word1) | (value << 8);
    381             value = factor_c >> 5;
    382             factor_c += b;
    383             CLIP_RESULT(value)
    384             word1 = (word1) | (value << 16);
    385             value = factor_c >> 5;
    386             factor_c += b;
    387             CLIP_RESULT(value)
    388             word1 = (word1) | (value << 24);
    389             *((uint32*)(pred + 4)) = word1;
    390             value = factor_c >> 5;
    391             factor_c += b;
    392             CLIP_RESULT(value)
    393             word1 = value;
    394             value = factor_c >> 5;
    395             factor_c += b;
    396             CLIP_RESULT(value)
    397             word1 = (word1) | (value << 8);
    398             value = factor_c >> 5;
    399             factor_c += b;
    400             CLIP_RESULT(value)
    401             word1 = (word1) | (value << 16);
    402             value = factor_c >> 5;
    403             factor_c += b;
    404             CLIP_RESULT(value)
    405             word1 = (word1) | (value << 24);
    406             *((uint32*)(pred + 8)) = word1;
    407             value = factor_c >> 5;
    408             factor_c += b;
    409             CLIP_RESULT(value)
    410             word1 = value;
    411             value = factor_c >> 5;
    412             factor_c += b;
    413             CLIP_RESULT(value)
    414             word1 = (word1) | (value << 8);
    415             value = factor_c >> 5;
    416             factor_c += b;
    417             CLIP_RESULT(value)
    418             word1 = (word1) | (value << 16);
    419             value = factor_c >> 5;
    420             CLIP_RESULT(value)
    421             word1 = (word1) | (value << 24);
    422             *((uint32*)(pred + 12)) = word1;
    423         }
    424     }
    425 
    426     return ;
    427 }
    428 
    429 
    430 /* evaluate each prediction mode of I16 */
    431 void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
    432 {
    433     AVCCommonObj *video = encvid->common;
    434     AVCMacroblock *currMB = video->currMB;
    435     int cost;
    436     int org_pitch = encvid->currInput->pitch;
    437 
    438     /* evaluate vertical mode */
    439     if (video->intraAvailB)
    440     {
    441         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
    442         if (cost < *min_cost)
    443         {
    444             *min_cost = cost;
    445             currMB->mbMode = AVC_I16;
    446             currMB->mb_intra = 1;
    447             currMB->i16Mode = AVC_I16_Vertical;
    448         }
    449     }
    450 
    451 
    452     /* evaluate horizontal mode */
    453     if (video->intraAvailA)
    454     {
    455         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
    456         if (cost < *min_cost)
    457         {
    458             *min_cost = cost;
    459             currMB->mbMode = AVC_I16;
    460             currMB->mb_intra = 1;
    461             currMB->i16Mode = AVC_I16_Horizontal;
    462         }
    463     }
    464 
    465     /* evaluate DC mode */
    466     cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
    467     if (cost < *min_cost)
    468     {
    469         *min_cost = cost;
    470         currMB->mbMode = AVC_I16;
    471         currMB->mb_intra = 1;
    472         currMB->i16Mode = AVC_I16_DC;
    473     }
    474 
    475     /* evaluate plane mode */
    476     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
    477     {
    478         cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
    479         if (cost < *min_cost)
    480         {
    481             *min_cost = cost;
    482             currMB->mbMode = AVC_I16;
    483             currMB->mb_intra = 1;
    484             currMB->i16Mode = AVC_I16_Plane;
    485         }
    486     }
    487 
    488     return ;
    489 }
    490 
    491 
    492 int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
    493 {
    494 
    495     int cost;
    496     int j, k;
    497     int16 res[256], *pres; // residue
    498     int m0, m1, m2, m3;
    499 
    500     // calculate SATD
    501     org_pitch -= 16;
    502     pres = res;
    503     // horizontal transform
    504     for (j = 0; j < 16; j++)
    505     {
    506         k = 4;
    507         while (k > 0)
    508         {
    509             m0 = org[0] - pred[0];
    510             m3 = org[3] - pred[3];
    511             m0 += m3;
    512             m3 = m0 - (m3 << 1);
    513             m1 = org[1] - pred[1];
    514             m2 = org[2] - pred[2];
    515             m1 += m2;
    516             m2 = m1 - (m2 << 1);
    517             pres[0] = m0 + m1;
    518             pres[2] = m0 - m1;
    519             pres[1] = m2 + m3;
    520             pres[3] = m3 - m2;
    521 
    522             org += 4;
    523             pres += 4;
    524             pred += 4;
    525             k--;
    526         }
    527         org += org_pitch;
    528     }
    529     /* vertical transform */
    530     cost = 0;
    531     for (j = 0; j < 4; j++)
    532     {
    533         pres = res + (j << 6);
    534         k = 16;
    535         while (k > 0)
    536         {
    537             m0 = pres[0];
    538             m3 = pres[3<<4];
    539             m0 += m3;
    540             m3 = m0 - (m3 << 1);
    541             m1 = pres[1<<4];
    542             m2 = pres[2<<4];
    543             m1 += m2;
    544             m2 = m1 - (m2 << 1);
    545             pres[0] = m0 = m0 + m1;
    546 
    547             if (k&0x3)  // only sum up non DC values.
    548             {
    549                 cost += ((m0 > 0) ? m0 : -m0);
    550             }
    551 
    552             m1 = m0 - (m1 << 1);
    553             cost += ((m1 > 0) ? m1 : -m1);
    554             m3 = m2 + m3;
    555             cost += ((m3 > 0) ? m3 : -m3);
    556             m2 = m3 - (m2 << 1);
    557             cost += ((m2 > 0) ? m2 : -m2);
    558 
    559             pres++;
    560             k--;
    561         }
    562         if ((cost >> 1) > min_cost) /* early drop out */
    563         {
    564             return (cost >> 1);
    565         }
    566     }
    567 
    568     /* Hadamard of the DC coefficient */
    569     pres = res;
    570     k = 4;
    571     while (k > 0)
    572     {
    573         m0 = pres[0];
    574         m3 = pres[3<<2];
    575         m0 >>= 2;
    576         m0 += (m3 >> 2);
    577         m3 = m0 - (m3 >> 1);
    578         m1 = pres[1<<2];
    579         m2 = pres[2<<2];
    580         m1 >>= 2;
    581         m1 += (m2 >> 2);
    582         m2 = m1 - (m2 >> 1);
    583         pres[0] = (m0 + m1);
    584         pres[2<<2] = (m0 - m1);
    585         pres[1<<2] = (m2 + m3);
    586         pres[3<<2] = (m3 - m2);
    587         pres += (4 << 4);
    588         k--;
    589     }
    590 
    591     pres = res;
    592     k = 4;
    593     while (k > 0)
    594     {
    595         m0 = pres[0];
    596         m3 = pres[3<<6];
    597         m0 += m3;
    598         m3 = m0 - (m3 << 1);
    599         m1 = pres[1<<6];
    600         m2 = pres[2<<6];
    601         m1 += m2;
    602         m2 = m1 - (m2 << 1);
    603         m0 = m0 + m1;
    604         cost += ((m0 >= 0) ? m0 : -m0);
    605         m1 = m0 - (m1 << 1);
    606         cost += ((m1 >= 0) ? m1 : -m1);
    607         m3 = m2 + m3;
    608         cost += ((m3 >= 0) ? m3 : -m3);
    609         m2 = m3 - (m2 << 1);
    610         cost += ((m2 >= 0) ? m2 : -m2);
    611         pres += 4;
    612 
    613         if ((cost >> 1) > min_cost) /* early drop out */
    614         {
    615             return (cost >> 1);
    616         }
    617 
    618         k--;
    619     }
    620 
    621     return (cost >> 1);
    622 }
    623 
    624 
    625 void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
    626 {
    627     AVCCommonObj *video = encvid->common;
    628     AVCMacroblock *currMB = video->currMB;
    629     AVCPictureData *currPic = video->currPic;
    630     AVCFrameIO *currInput = encvid->currInput;
    631     int pitch = currPic->pitch;
    632     int org_pitch = currInput->pitch;
    633     int offset;
    634     uint8 *curL, *comp, *org4, *org8;
    635     int y = video->mb_y << 4;
    636     int x = video->mb_x << 4;
    637 
    638     int b8, b4, cost4x4, blkidx;
    639     int cost = 0;
    640     int numcoef;
    641     int dummy = 0;
    642     int mb_intra = currMB->mb_intra; // save the original value
    643 
    644     offset = y * pitch + x;
    645 
    646     curL = currPic->Sl + offset;
    647     org8 = currInput->YCbCr[0] + y * org_pitch + x;
    648     video->pred_pitch = 4;
    649 
    650     cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
    651     cost <<= 2;
    652 
    653     currMB->mb_intra = 1;  // temporary set this to one to enable the IDCT
    654     // operation inside dct_luma
    655 
    656     for (b8 = 0; b8 < 4; b8++)
    657     {
    658         comp = curL;
    659         org4 = org8;
    660 
    661         for (b4 = 0; b4 < 4; b4++)
    662         {
    663             blkidx = blkIdx2blkXY[b8][b4];
    664             cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
    665             cost += cost4x4;
    666             if (cost > *min_cost)
    667             {
    668                 currMB->mb_intra = mb_intra; // restore the value
    669                 return ;
    670             }
    671 
    672             /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
    673             video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
    674             numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
    675             currMB->nz_coeff[blkidx] = numcoef;
    676             if (numcoef)
    677             {
    678                 video->cbp4x4 |= (1 << blkidx);
    679                 currMB->CBP |= (1 << b8);
    680             }
    681 
    682             if (b4&1)
    683             {
    684                 comp += ((pitch << 2) - 4);
    685                 org4 += ((org_pitch << 2) - 4);
    686             }
    687             else
    688             {
    689                 comp += 4;
    690                 org4 += 4;
    691             }
    692         }
    693 
    694         if (b8&1)
    695         {
    696             curL += ((pitch << 3) - 8);
    697             org8 += ((org_pitch << 3) - 8);
    698         }
    699         else
    700         {
    701             curL += 8;
    702             org8 += 8;
    703         }
    704     }
    705 
    706     currMB->mb_intra = mb_intra; // restore the value
    707 
    708     if (cost < *min_cost)
    709     {
    710         *min_cost = cost;
    711         currMB->mbMode = AVC_I4;
    712         currMB->mb_intra = 1;
    713     }
    714 
    715     return ;
    716 }
    717 
    718 
    719 /* search for i4 mode for a 4x4 block */
    720 int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
    721 {
    722     AVCCommonObj *video = encvid->common;
    723     AVCNeighborAvailability availability;
    724     AVCMacroblock *currMB = video->currMB;
    725     bool top_left = FALSE;
    726     int pitch = video->currPic->pitch;
    727     uint8 mode_avail[AVCNumI4PredMode];
    728     uint32 temp, DC;
    729     uint8 *pred;
    730     int org_pitch = encvid->currInput->pitch;
    731     uint16 min_cost, cost;
    732 
    733     int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
    734     int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
    735     uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
    736     int r0, r1, r2, r3, r4, r5, r6, r7;
    737     int x0, x1, x2, x3, x4, x5;
    738     uint32 temp1, temp2;
    739 
    740     int ipmode, mostProbableMode;
    741     int fixedcost = 4 * encvid->lambda_mode;
    742     int min_sad = 0x7FFF;
    743 
    744     availability.left = TRUE;
    745     availability.top = TRUE;
    746     if (blkidx <= 3) /* top row block  (!block_y) */
    747     { /* check availability up */
    748         availability.top = video->intraAvailB ;
    749     }
    750     if (!(blkidx&0x3)) /* left column block (!block_x)*/
    751     { /* check availability left */
    752         availability.left = video->intraAvailA ;
    753     }
    754     availability.top_right = BlkTopRight[blkidx];
    755 
    756     if (availability.top_right == 2)
    757     {
    758         availability.top_right = video->intraAvailB;
    759     }
    760     else if (availability.top_right == 3)
    761     {
    762         availability.top_right = video->intraAvailC;
    763     }
    764 
    765     if (availability.top == TRUE)
    766     {
    767         temp = *(uint32*)(cur - pitch);
    768         P_A = temp & 0xFF;
    769         P_B = (temp >> 8) & 0xFF;
    770         P_C = (temp >> 16) & 0xFF;
    771         P_D = (temp >> 24) & 0xFF;
    772     }
    773     else
    774     {
    775         P_A = P_B = P_C = P_D = 128;
    776     }
    777 
    778     if (availability.top_right == TRUE)
    779     {
    780         temp = *(uint32*)(cur - pitch + 4);
    781         P_E = temp & 0xFF;
    782         P_F = (temp >> 8) & 0xFF;
    783         P_G = (temp >> 16) & 0xFF;
    784         P_H = (temp >> 24) & 0xFF;
    785     }
    786     else
    787     {
    788         P_E = P_F = P_G = P_H = 128;
    789     }
    790 
    791     if (availability.left == TRUE)
    792     {
    793         cur--;
    794         P_I = *cur;
    795         P_J = *(cur += pitch);
    796         P_K = *(cur += pitch);
    797         P_L = *(cur + pitch);
    798         cur -= (pitch << 1);
    799         cur++;
    800     }
    801     else
    802     {
    803         P_I = P_J = P_K = P_L = 128;
    804     }
    805 
    806     /* check if top-left pixel is available */
    807     if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
    808             || ((blkidx&0x3) && video->intraAvailB)
    809             || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
    810     {
    811         top_left = TRUE;
    812         P_X = *(cur - pitch - 1);
    813     }
    814     else
    815     {
    816         P_X = 128;
    817     }
    818 
    819     //===== INTRA PREDICTION FOR 4x4 BLOCK =====
    820     /* vertical */
    821     mode_avail[AVC_I4_Vertical] = 0;
    822     if (availability.top)
    823     {
    824         mode_avail[AVC_I4_Vertical] = 1;
    825         pred = encvid->pred_i4[AVC_I4_Vertical];
    826 
    827         temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
    828         *((uint32*)pred) =  temp; /* write 4 at a time */
    829         *((uint32*)(pred += 4)) =  temp;
    830         *((uint32*)(pred += 4)) =  temp;
    831         *((uint32*)(pred += 4)) =  temp;
    832     }
    833     /* horizontal */
    834     mode_avail[AVC_I4_Horizontal] = 0;
    835     mode_avail[AVC_I4_Horizontal_Up] = 0;
    836     if (availability.left)
    837     {
    838         mode_avail[AVC_I4_Horizontal] = 1;
    839         pred = encvid->pred_i4[AVC_I4_Horizontal];
    840 
    841         temp = P_I | (P_I << 8);
    842         temp = temp | (temp << 16);
    843         *((uint32*)pred) = temp;
    844         temp = P_J | (P_J << 8);
    845         temp = temp | (temp << 16);
    846         *((uint32*)(pred += 4)) = temp;
    847         temp = P_K | (P_K << 8);
    848         temp = temp | (temp << 16);
    849         *((uint32*)(pred += 4)) = temp;
    850         temp = P_L | (P_L << 8);
    851         temp = temp | (temp << 16);
    852         *((uint32*)(pred += 4)) = temp;
    853 
    854         mode_avail[AVC_I4_Horizontal_Up] = 1;
    855         pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
    856 
    857         Q0 = (P_J + P_K + 1) >> 1;
    858         Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
    859         P0 = ((P_I + P_J + 1) >> 1);
    860         P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
    861 
    862         temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
    863         temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
    864         temp |= (Q1 << 24);     // [R0 D0 D1 D1]
    865         *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
    866 
    867         D0 = (P_K + 3 * P_L + 2) >> 2;
    868         R0 = (P_K + P_L + 1) >> 1;
    869 
    870         temp = Q0 | (Q1 << 8);
    871         temp |= (R0 << 16);
    872         temp |= (D0 << 24);
    873         *((uint32*)(pred += 4)) = temp;
    874 
    875         D1 = P_L;
    876 
    877         temp = R0 | (D0 << 8);
    878         temp |= (D1 << 16);
    879         temp |= (D1 << 24);
    880         *((uint32*)(pred += 4)) = temp;
    881 
    882         temp = D1 | (D1 << 8);
    883         temp |= (temp << 16);
    884         *((uint32*)(pred += 4)) = temp;
    885     }
    886     /* DC */
    887     mode_avail[AVC_I4_DC] = 1;
    888     pred = encvid->pred_i4[AVC_I4_DC];
    889     if (availability.left)
    890     {
    891         DC = P_I + P_J + P_K + P_L;
    892 
    893         if (availability.top)
    894         {
    895             DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
    896         }
    897         else
    898         {
    899             DC = (DC + 2) >> 2;
    900 
    901         }
    902     }
    903     else if (availability.top)
    904     {
    905         DC = (P_A + P_B + P_C + P_D + 2) >> 2;
    906 
    907     }
    908     else
    909     {
    910         DC = 128;
    911     }
    912 
    913     temp = DC | (DC << 8);
    914     temp = temp | (temp << 16);
    915     *((uint32*)pred) = temp;
    916     *((uint32*)(pred += 4)) = temp;
    917     *((uint32*)(pred += 4)) = temp;
    918     *((uint32*)(pred += 4)) = temp;
    919 
    920     /* Down-left */
    921     mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
    922 
    923     if (availability.top)
    924     {
    925         mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
    926 
    927         pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
    928 
    929         r0 = P_A;
    930         r1 = P_B;
    931         r2 = P_C;
    932         r3 = P_D;
    933 
    934         r0 += (r1 << 1);
    935         r0 += r2;
    936         r0 += 2;
    937         r0 >>= 2;
    938         r1 += (r2 << 1);
    939         r1 += r3;
    940         r1 += 2;
    941         r1 >>= 2;
    942 
    943         if (availability.top_right)
    944         {
    945             r4 = P_E;
    946             r5 = P_F;
    947             r6 = P_G;
    948             r7 = P_H;
    949 
    950             r2 += (r3 << 1);
    951             r2 += r4;
    952             r2 += 2;
    953             r2 >>= 2;
    954             r3 += (r4 << 1);
    955             r3 += r5;
    956             r3 += 2;
    957             r3 >>= 2;
    958             r4 += (r5 << 1);
    959             r4 += r6;
    960             r4 += 2;
    961             r4 >>= 2;
    962             r5 += (r6 << 1);
    963             r5 += r7;
    964             r5 += 2;
    965             r5 >>= 2;
    966             r6 += (3 * r7);
    967             r6 += 2;
    968             r6 >>= 2;
    969             temp = r0 | (r1 << 8);
    970             temp |= (r2 << 16);
    971             temp |= (r3 << 24);
    972             *((uint32*)pred) = temp;
    973 
    974             temp = (temp >> 8) | (r4 << 24);
    975             *((uint32*)(pred += 4)) = temp;
    976 
    977             temp = (temp >> 8) | (r5 << 24);
    978             *((uint32*)(pred += 4)) = temp;
    979 
    980             temp = (temp >> 8) | (r6 << 24);
    981             *((uint32*)(pred += 4)) = temp;
    982         }
    983         else
    984         {
    985             r2 += (r3 * 3);
    986             r2 += 2;
    987             r2 >>= 2;
    988             r3 = ((r3 << 2) + 2);
    989             r3 >>= 2;
    990 
    991             temp = r0 | (r1 << 8);
    992             temp |= (r2 << 16);
    993             temp |= (r3 << 24);
    994             *((uint32*)pred) = temp;
    995 
    996             temp = (temp >> 8) | (r3 << 24);
    997             *((uint32*)(pred += 4)) = temp;
    998 
    999             temp = (temp >> 8) | (r3 << 24);
   1000             *((uint32*)(pred += 4)) = temp;
   1001 
   1002             temp = (temp >> 8) | (r3 << 24);
   1003             *((uint32*)(pred += 4)) = temp;
   1004 
   1005         }
   1006     }
   1007 
   1008     /* Down Right */
   1009     mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
   1010     /* Diagonal Vertical Right */
   1011     mode_avail[AVC_I4_Vertical_Right] = 0;
   1012     /* Horizontal Down */
   1013     mode_avail[AVC_I4_Horizontal_Down] = 0;
   1014 
   1015     if (top_left == TRUE)
   1016     {
   1017         /* Down Right */
   1018         mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
   1019         pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
   1020 
   1021         Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
   1022         R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
   1023         P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
   1024         D   = (P_A + 2 * P_X + P_I + 2) >> 2;
   1025         P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
   1026         Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
   1027         R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
   1028 
   1029         /* we can pack these */
   1030         temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
   1031         //[P_y D   P_x Q_x]
   1032         temp |= (Q_x << 16); //[Q_y P_y D   P_x]
   1033         temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
   1034         *((uint32*)pred) = temp;
   1035 
   1036         temp =  P_y | (D << 8);
   1037         temp |= (P_x << 16);
   1038         temp |= (Q_x << 24);
   1039         *((uint32*)(pred += 4)) = temp;
   1040 
   1041         temp =  Q_y | (P_y << 8);
   1042         temp |= (D << 16);
   1043         temp |= (P_x << 24);
   1044         *((uint32*)(pred += 4)) = temp;
   1045 
   1046         temp = R_y | (Q_y << 8);
   1047         temp |= (P_y << 16);
   1048         temp |= (D << 24);
   1049         *((uint32*)(pred += 4)) = temp;
   1050 
   1051 
   1052         /* Diagonal Vertical Right */
   1053         mode_avail[AVC_I4_Vertical_Right] = 1;
   1054         pred = encvid->pred_i4[AVC_I4_Vertical_Right];
   1055 
   1056         Q0 = P_A + P_B + 1;
   1057         R0 = P_B + P_C + 1;
   1058         S0 = P_C + P_D + 1;
   1059         P0 = P_X + P_A + 1;
   1060         D = (P_I + 2 * P_X + P_A + 2) >> 2;
   1061 
   1062         P1 = (P0 + Q0) >> 2;
   1063         Q1 = (Q0 + R0) >> 2;
   1064         R1 = (R0 + S0) >> 2;
   1065 
   1066         P0 >>= 1;
   1067         Q0 >>= 1;
   1068         R0 >>= 1;
   1069         S0 >>= 1;
   1070 
   1071         P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
   1072         Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
   1073 
   1074         temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
   1075         //[D  P1 Q1 R1]
   1076         temp |= (R0 << 16); //[P2 P0 Q0 R0]
   1077         temp |= (S0 << 24); //[Q2 D  P1 Q1]
   1078         *((uint32*)pred) =  temp;
   1079 
   1080         temp =  D | (P1 << 8);
   1081         temp |= (Q1 << 16);
   1082         temp |= (R1 << 24);
   1083         *((uint32*)(pred += 4)) =  temp;
   1084 
   1085         temp = P2 | (P0 << 8);
   1086         temp |= (Q0 << 16);
   1087         temp |= (R0 << 24);
   1088         *((uint32*)(pred += 4)) =  temp;
   1089 
   1090         temp = Q2 | (D << 8);
   1091         temp |= (P1 << 16);
   1092         temp |= (Q1 << 24);
   1093         *((uint32*)(pred += 4)) =  temp;
   1094 
   1095 
   1096         /* Horizontal Down */
   1097         mode_avail[AVC_I4_Horizontal_Down] = 1;
   1098         pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
   1099 
   1100 
   1101         Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
   1102         P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
   1103         D = (P_I + 2 * P_X + P_A + 2) >> 2;
   1104         P0 = P_X + P_I + 1;
   1105         Q0 = P_I + P_J + 1;
   1106         R0 = P_J + P_K + 1;
   1107         S0 = P_K + P_L + 1;
   1108 
   1109         P1 = (P0 + Q0) >> 2;
   1110         Q1 = (Q0 + R0) >> 2;
   1111         R1 = (R0 + S0) >> 2;
   1112 
   1113         P0 >>= 1;
   1114         Q0 >>= 1;
   1115         R0 >>= 1;
   1116         S0 >>= 1;
   1117 
   1118 
   1119         /* we can pack these */
   1120         temp = P0 | (D << 8);   //[P0 D  P2 Q2]
   1121         //[Q0 P1 P0 D ]
   1122         temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
   1123         temp |= (Q2 << 24); //[S0 R1 R0 Q1]
   1124         *((uint32*)pred) = temp;
   1125 
   1126         temp = Q0 | (P1 << 8);
   1127         temp |= (P0 << 16);
   1128         temp |= (D << 24);
   1129         *((uint32*)(pred += 4)) = temp;
   1130 
   1131         temp = R0 | (Q1 << 8);
   1132         temp |= (Q0 << 16);
   1133         temp |= (P1 << 24);
   1134         *((uint32*)(pred += 4)) = temp;
   1135 
   1136         temp = S0 | (R1 << 8);
   1137         temp |= (R0 << 16);
   1138         temp |= (Q1 << 24);
   1139         *((uint32*)(pred += 4)) = temp;
   1140 
   1141     }
   1142 
   1143     /* vertical left */
   1144     mode_avail[AVC_I4_Vertical_Left] = 0;
   1145     if (availability.top)
   1146     {
   1147         mode_avail[AVC_I4_Vertical_Left] = 1;
   1148         pred = encvid->pred_i4[AVC_I4_Vertical_Left];
   1149 
   1150         x0 = P_A + P_B + 1;
   1151         x1 = P_B + P_C + 1;
   1152         x2 = P_C + P_D + 1;
   1153         if (availability.top_right)
   1154         {
   1155             x3 = P_D + P_E + 1;
   1156             x4 = P_E + P_F + 1;
   1157             x5 = P_F + P_G + 1;
   1158         }
   1159         else
   1160         {
   1161             x3 = x4 = x5 = (P_D << 1) + 1;
   1162         }
   1163 
   1164         temp1 = (x0 >> 1);
   1165         temp1 |= ((x1 >> 1) << 8);
   1166         temp1 |= ((x2 >> 1) << 16);
   1167         temp1 |= ((x3 >> 1) << 24);
   1168 
   1169         *((uint32*)pred) = temp1;
   1170 
   1171         temp2 = ((x0 + x1) >> 2);
   1172         temp2 |= (((x1 + x2) >> 2) << 8);
   1173         temp2 |= (((x2 + x3) >> 2) << 16);
   1174         temp2 |= (((x3 + x4) >> 2) << 24);
   1175 
   1176         *((uint32*)(pred += 4)) = temp2;
   1177 
   1178         temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
   1179         *((uint32*)(pred += 4)) = temp1;
   1180 
   1181         temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
   1182         *((uint32*)(pred += 4)) = temp2;
   1183     }
   1184 
   1185     //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
   1186     // can re-order the search here instead of going in order
   1187 
   1188     // find most probable mode
   1189     encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
   1190 
   1191     min_cost = 0xFFFF;
   1192 
   1193     for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
   1194     {
   1195         if (mode_avail[ipmode] == TRUE)
   1196         {
   1197             cost  = (ipmode == mostProbableMode) ? 0 : fixedcost;
   1198             pred = encvid->pred_i4[ipmode];
   1199 
   1200             cost_i4(org, org_pitch, pred, &cost);
   1201 
   1202             if (cost < min_cost)
   1203             {
   1204                 currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
   1205                 min_cost   = cost;
   1206                 min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
   1207             }
   1208         }
   1209     }
   1210 
   1211     if (blkidx == 0)
   1212     {
   1213         encvid->i4_sad = min_sad;
   1214     }
   1215     else
   1216     {
   1217         encvid->i4_sad += min_sad;
   1218     }
   1219 
   1220     return min_cost;
   1221 }
   1222 
   1223 int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
   1224 {
   1225     int dcOnlyPredictionFlag;
   1226     AVCMacroblock *currMB = video->currMB;
   1227     int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
   1228 
   1229 
   1230     dcOnlyPredictionFlag = 0;
   1231     if (blkidx&0x3)
   1232     {
   1233         intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
   1234     }
   1235     else /* for blk 0, 4, 8, 12 */
   1236     {
   1237         if (video->intraAvailA)
   1238         {
   1239             if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
   1240             {
   1241                 intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
   1242             }
   1243             else
   1244             {
   1245                 intra4x4PredModeA = AVC_I4_DC;
   1246             }
   1247         }
   1248         else
   1249         {
   1250             dcOnlyPredictionFlag = 1;
   1251             goto PRED_RESULT_READY;  // skip below
   1252         }
   1253     }
   1254 
   1255     if (blkidx >> 2)
   1256     {
   1257         intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
   1258     }
   1259     else /* block 0, 1, 2, 3 */
   1260     {
   1261         if (video->intraAvailB)
   1262         {
   1263             if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
   1264             {
   1265                 intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
   1266             }
   1267             else
   1268             {
   1269                 intra4x4PredModeB = AVC_I4_DC;
   1270             }
   1271         }
   1272         else
   1273         {
   1274             dcOnlyPredictionFlag = 1;
   1275         }
   1276     }
   1277 
   1278 PRED_RESULT_READY:
   1279     if (dcOnlyPredictionFlag)
   1280     {
   1281         intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
   1282     }
   1283 
   1284     predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
   1285 
   1286     return predIntra4x4PredMode;
   1287 }
   1288 
   1289 void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
   1290 {
   1291     int k;
   1292     int16 res[16], *pres;
   1293     int m0, m1, m2, m3, tmp1;
   1294     int satd = 0;
   1295 
   1296     pres = res;
   1297     // horizontal transform
   1298     k = 4;
   1299     while (k > 0)
   1300     {
   1301         m0 = org[0] - pred[0];
   1302         m3 = org[3] - pred[3];
   1303         m0 += m3;
   1304         m3 = m0 - (m3 << 1);
   1305         m1 = org[1] - pred[1];
   1306         m2 = org[2] - pred[2];
   1307         m1 += m2;
   1308         m2 = m1 - (m2 << 1);
   1309         pres[0] = m0 + m1;
   1310         pres[2] = m0 - m1;
   1311         pres[1] = m2 + m3;
   1312         pres[3] = m3 - m2;
   1313 
   1314         org += org_pitch;
   1315         pres += 4;
   1316         pred += 4;
   1317         k--;
   1318     }
   1319     /* vertical transform */
   1320     pres = res;
   1321     k = 4;
   1322     while (k > 0)
   1323     {
   1324         m0 = pres[0];
   1325         m3 = pres[12];
   1326         m0 += m3;
   1327         m3 = m0 - (m3 << 1);
   1328         m1 = pres[4];
   1329         m2 = pres[8];
   1330         m1 += m2;
   1331         m2 = m1 - (m2 << 1);
   1332         pres[0] = m0 + m1;
   1333         pres[8] = m0 - m1;
   1334         pres[4] = m2 + m3;
   1335         pres[12] = m3 - m2;
   1336 
   1337         pres++;
   1338         k--;
   1339 
   1340     }
   1341 
   1342     pres = res;
   1343     k = 4;
   1344     while (k > 0)
   1345     {
   1346         tmp1 = *pres++;
   1347         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1348         tmp1 = *pres++;
   1349         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1350         tmp1 = *pres++;
   1351         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1352         tmp1 = *pres++;
   1353         satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1354         k--;
   1355     }
   1356 
   1357     satd = (satd + 1) >> 1;
   1358     *cost += satd;
   1359 
   1360     return ;
   1361 }
   1362 
   1363 void chroma_intra_search(AVCEncObject *encvid)
   1364 {
   1365     AVCCommonObj *video = encvid->common;
   1366     AVCPictureData *currPic = video->currPic;
   1367 
   1368     int x_pos = video->mb_x << 3;
   1369     int y_pos = video->mb_y << 3;
   1370     int pitch = currPic->pitch >> 1;
   1371     int offset = y_pos * pitch + x_pos;
   1372 
   1373     uint8 *comp_ref_x, *comp_ref_y, *pred;
   1374     int  sum_x0, sum_x1, sum_y0, sum_y1;
   1375     int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
   1376     uint32 pred_a, pred_b, pred_c, pred_d;
   1377     int i, j, component;
   1378     int a_16, b, c, factor_c, topleft;
   1379     int H, V, value;
   1380     uint8 *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
   1381 
   1382     uint8 *curCb = currPic->Scb + offset;
   1383     uint8 *curCr = currPic->Scr + offset;
   1384 
   1385     uint8 *orgCb, *orgCr;
   1386     AVCFrameIO *currInput = encvid->currInput;
   1387     AVCMacroblock *currMB = video->currMB;
   1388     int org_pitch;
   1389     int cost, mincost;
   1390 
   1391     /* evaluate DC mode */
   1392     if (video->intraAvailB & video->intraAvailA)
   1393     {
   1394         comp_ref_x = curCb - pitch;
   1395         comp_ref_y = curCb - 1;
   1396 
   1397         for (i = 0; i < 2; i++)
   1398         {
   1399             pred_a = *((uint32*)comp_ref_x);
   1400             comp_ref_x += 4;
   1401             pred_b = (pred_a >> 8) & 0xFF00FF;
   1402             pred_a &= 0xFF00FF;
   1403             pred_a += pred_b;
   1404             pred_a += (pred_a >> 16);
   1405             sum_x0 = pred_a & 0xFFFF;
   1406 
   1407             pred_a = *((uint32*)comp_ref_x);
   1408             pred_b = (pred_a >> 8) & 0xFF00FF;
   1409             pred_a &= 0xFF00FF;
   1410             pred_a += pred_b;
   1411             pred_a += (pred_a >> 16);
   1412             sum_x1 = pred_a & 0xFFFF;
   1413 
   1414             pred_1[i] = (sum_x1 + 2) >> 2;
   1415 
   1416             sum_y0 = *comp_ref_y;
   1417             sum_y0 += *(comp_ref_y += pitch);
   1418             sum_y0 += *(comp_ref_y += pitch);
   1419             sum_y0 += *(comp_ref_y += pitch);
   1420 
   1421             sum_y1 = *(comp_ref_y += pitch);
   1422             sum_y1 += *(comp_ref_y += pitch);
   1423             sum_y1 += *(comp_ref_y += pitch);
   1424             sum_y1 += *(comp_ref_y += pitch);
   1425 
   1426             pred_2[i] = (sum_y1 + 2) >> 2;
   1427 
   1428             pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
   1429             pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
   1430 
   1431             comp_ref_x = curCr - pitch;
   1432             comp_ref_y = curCr - 1;
   1433         }
   1434     }
   1435 
   1436     else if (video->intraAvailA)
   1437     {
   1438         comp_ref_y = curCb - 1;
   1439         for (i = 0; i < 2; i++)
   1440         {
   1441             sum_y0 = *comp_ref_y;
   1442             sum_y0 += *(comp_ref_y += pitch);
   1443             sum_y0 += *(comp_ref_y += pitch);
   1444             sum_y0 += *(comp_ref_y += pitch);
   1445 
   1446             sum_y1 = *(comp_ref_y += pitch);
   1447             sum_y1 += *(comp_ref_y += pitch);
   1448             sum_y1 += *(comp_ref_y += pitch);
   1449             sum_y1 += *(comp_ref_y += pitch);
   1450 
   1451             pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
   1452             pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
   1453 
   1454             comp_ref_y = curCr - 1;
   1455         }
   1456     }
   1457     else if (video->intraAvailB)
   1458     {
   1459         comp_ref_x = curCb - pitch;
   1460         for (i = 0; i < 2; i++)
   1461         {
   1462             pred_a = *((uint32*)comp_ref_x);
   1463             comp_ref_x += 4;
   1464             pred_b = (pred_a >> 8) & 0xFF00FF;
   1465             pred_a &= 0xFF00FF;
   1466             pred_a += pred_b;
   1467             pred_a += (pred_a >> 16);
   1468             sum_x0 = pred_a & 0xFFFF;
   1469 
   1470             pred_a = *((uint32*)comp_ref_x);
   1471             pred_b = (pred_a >> 8) & 0xFF00FF;
   1472             pred_a &= 0xFF00FF;
   1473             pred_a += pred_b;
   1474             pred_a += (pred_a >> 16);
   1475             sum_x1 = pred_a & 0xFFFF;
   1476 
   1477             pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
   1478             pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
   1479 
   1480             comp_ref_x = curCr - pitch;
   1481         }
   1482     }
   1483     else
   1484     {
   1485         pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
   1486                                                 pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
   1487     }
   1488 
   1489     pred = encvid->pred_ic[AVC_IC_DC];
   1490 
   1491     pred_a = pred_0[0];
   1492     pred_b = pred_1[0];
   1493     pred_a |= (pred_a << 8);
   1494     pred_a |= (pred_a << 16);
   1495     pred_b |= (pred_b << 8);
   1496     pred_b |= (pred_b << 16);
   1497 
   1498     pred_c = pred_0[1];
   1499     pred_d = pred_1[1];
   1500     pred_c |= (pred_c << 8);
   1501     pred_c |= (pred_c << 16);
   1502     pred_d |= (pred_d << 8);
   1503     pred_d |= (pred_d << 16);
   1504 
   1505 
   1506     for (j = 0; j < 4; j++) /* 4 lines */
   1507     {
   1508         *((uint32*)pred) = pred_a;
   1509         *((uint32*)(pred + 4)) = pred_b;
   1510         *((uint32*)(pred + 8)) = pred_c;
   1511         *((uint32*)(pred + 12)) = pred_d;
   1512         pred += 16; /* move to the next line */
   1513     }
   1514 
   1515     pred_a = pred_2[0];
   1516     pred_b = pred_3[0];
   1517     pred_a |= (pred_a << 8);
   1518     pred_a |= (pred_a << 16);
   1519     pred_b |= (pred_b << 8);
   1520     pred_b |= (pred_b << 16);
   1521 
   1522     pred_c = pred_2[1];
   1523     pred_d = pred_3[1];
   1524     pred_c |= (pred_c << 8);
   1525     pred_c |= (pred_c << 16);
   1526     pred_d |= (pred_d << 8);
   1527     pred_d |= (pred_d << 16);
   1528 
   1529     for (j = 0; j < 4; j++) /* 4 lines */
   1530     {
   1531         *((uint32*)pred) = pred_a;
   1532         *((uint32*)(pred + 4)) = pred_b;
   1533         *((uint32*)(pred + 8)) = pred_c;
   1534         *((uint32*)(pred + 12)) = pred_d;
   1535         pred += 16; /* move to the next line */
   1536     }
   1537 
   1538     /* predict horizontal mode */
   1539     if (video->intraAvailA)
   1540     {
   1541         comp_ref_y = curCb - 1;
   1542         comp_ref_x = curCr - 1;
   1543         pred = encvid->pred_ic[AVC_IC_Horizontal];
   1544 
   1545         for (i = 4; i < 6; i++)
   1546         {
   1547             for (j = 0; j < 4; j++)
   1548             {
   1549                 pred_a = *comp_ref_y;
   1550                 comp_ref_y += pitch;
   1551                 pred_a |= (pred_a << 8);
   1552                 pred_a |= (pred_a << 16);
   1553                 *((uint32*)pred) = pred_a;
   1554                 *((uint32*)(pred + 4)) = pred_a;
   1555 
   1556                 pred_a = *comp_ref_x;
   1557                 comp_ref_x += pitch;
   1558                 pred_a |= (pred_a << 8);
   1559                 pred_a |= (pred_a << 16);
   1560                 *((uint32*)(pred + 8)) = pred_a;
   1561                 *((uint32*)(pred + 12)) = pred_a;
   1562 
   1563                 pred += 16;
   1564             }
   1565         }
   1566     }
   1567 
   1568     /* vertical mode */
   1569     if (video->intraAvailB)
   1570     {
   1571         comp_ref_x = curCb - pitch;
   1572         comp_ref_y = curCr - pitch;
   1573         pred = encvid->pred_ic[AVC_IC_Vertical];
   1574 
   1575         pred_a = *((uint32*)comp_ref_x);
   1576         pred_b = *((uint32*)(comp_ref_x + 4));
   1577         pred_c = *((uint32*)comp_ref_y);
   1578         pred_d = *((uint32*)(comp_ref_y + 4));
   1579 
   1580         for (j = 0; j < 8; j++)
   1581         {
   1582             *((uint32*)pred) = pred_a;
   1583             *((uint32*)(pred + 4)) = pred_b;
   1584             *((uint32*)(pred + 8)) = pred_c;
   1585             *((uint32*)(pred + 12)) = pred_d;
   1586             pred += 16;
   1587         }
   1588     }
   1589 
   1590     /* Intra_Chroma_Plane */
   1591     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
   1592     {
   1593         comp_ref_x = curCb - pitch;
   1594         comp_ref_y = curCb - 1;
   1595         topleft = curCb[-pitch-1];
   1596 
   1597         pred = encvid->pred_ic[AVC_IC_Plane];
   1598         for (component = 0; component < 2; component++)
   1599         {
   1600             H = V = 0;
   1601             comp_ref_x0 = comp_ref_x + 4;
   1602             comp_ref_x1 = comp_ref_x + 2;
   1603             comp_ref_y0 = comp_ref_y + (pitch << 2);
   1604             comp_ref_y1 = comp_ref_y + (pitch << 1);
   1605             for (i = 1; i < 4; i++)
   1606             {
   1607                 H += i * (*comp_ref_x0++ - *comp_ref_x1--);
   1608                 V += i * (*comp_ref_y0 - *comp_ref_y1);
   1609                 comp_ref_y0 += pitch;
   1610                 comp_ref_y1 -= pitch;
   1611             }
   1612             H += i * (*comp_ref_x0++ - topleft);
   1613             V += i * (*comp_ref_y0 - *comp_ref_y1);
   1614 
   1615             a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
   1616             b = (17 * H + 16) >> 5;
   1617             c = (17 * V + 16) >> 5;
   1618 
   1619             pred_a = 0;
   1620             for (i = 4; i < 6; i++)
   1621             {
   1622                 for (j = 0; j < 4; j++)
   1623                 {
   1624                     factor_c = a_16 + c * (pred_a++ - 3);
   1625 
   1626                     factor_c -= 3 * b;
   1627 
   1628                     value = factor_c >> 5;
   1629                     factor_c += b;
   1630                     CLIP_RESULT(value)
   1631                     pred_b = value;
   1632                     value = factor_c >> 5;
   1633                     factor_c += b;
   1634                     CLIP_RESULT(value)
   1635                     pred_b |= (value << 8);
   1636                     value = factor_c >> 5;
   1637                     factor_c += b;
   1638                     CLIP_RESULT(value)
   1639                     pred_b |= (value << 16);
   1640                     value = factor_c >> 5;
   1641                     factor_c += b;
   1642                     CLIP_RESULT(value)
   1643                     pred_b |= (value << 24);
   1644                     *((uint32*)pred) = pred_b;
   1645 
   1646                     value = factor_c >> 5;
   1647                     factor_c += b;
   1648                     CLIP_RESULT(value)
   1649                     pred_b = value;
   1650                     value = factor_c >> 5;
   1651                     factor_c += b;
   1652                     CLIP_RESULT(value)
   1653                     pred_b |= (value << 8);
   1654                     value = factor_c >> 5;
   1655                     factor_c += b;
   1656                     CLIP_RESULT(value)
   1657                     pred_b |= (value << 16);
   1658                     value = factor_c >> 5;
   1659                     factor_c += b;
   1660                     CLIP_RESULT(value)
   1661                     pred_b |= (value << 24);
   1662                     *((uint32*)(pred + 4)) = pred_b;
   1663                     pred += 16;
   1664                 }
   1665             }
   1666 
   1667             pred -= 120; /* point to cr */
   1668             comp_ref_x = curCr - pitch;
   1669             comp_ref_y = curCr - 1;
   1670             topleft = curCr[-pitch-1];
   1671         }
   1672     }
   1673 
   1674     /* now evaluate it */
   1675 
   1676     org_pitch = (currInput->pitch) >> 1;
   1677     offset = x_pos + y_pos * org_pitch;
   1678 
   1679     orgCb = currInput->YCbCr[1] + offset;
   1680     orgCr = currInput->YCbCr[2] + offset;
   1681 
   1682     mincost = 0x7fffffff;
   1683     cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
   1684     if (cost < mincost)
   1685     {
   1686         mincost = cost;
   1687         currMB->intra_chroma_pred_mode = AVC_IC_DC;
   1688     }
   1689 
   1690     if (video->intraAvailA)
   1691     {
   1692         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
   1693         if (cost < mincost)
   1694         {
   1695             mincost = cost;
   1696             currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
   1697         }
   1698     }
   1699 
   1700     if (video->intraAvailB)
   1701     {
   1702         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
   1703         if (cost < mincost)
   1704         {
   1705             mincost = cost;
   1706             currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
   1707         }
   1708     }
   1709 
   1710     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
   1711     {
   1712         cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
   1713         if (cost < mincost)
   1714         {
   1715             mincost = cost;
   1716             currMB->intra_chroma_pred_mode = AVC_IC_Plane;
   1717         }
   1718     }
   1719 
   1720 
   1721     return ;
   1722 }
   1723 
   1724 
   1725 int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
   1726 {
   1727     int cost;
   1728     /* first take difference between orgCb, orgCr and pred */
   1729     int16 res[128], *pres; // residue
   1730     int m0, m1, m2, m3, tmp1;
   1731     int j, k;
   1732 
   1733     pres = res;
   1734     org_pitch -= 8;
   1735     // horizontal transform
   1736     for (j = 0; j < 8; j++)
   1737     {
   1738         k = 2;
   1739         while (k > 0)
   1740         {
   1741             m0 = orgCb[0] - pred[0];
   1742             m3 = orgCb[3] - pred[3];
   1743             m0 += m3;
   1744             m3 = m0 - (m3 << 1);
   1745             m1 = orgCb[1] - pred[1];
   1746             m2 = orgCb[2] - pred[2];
   1747             m1 += m2;
   1748             m2 = m1 - (m2 << 1);
   1749             pres[0] = m0 + m1;
   1750             pres[2] = m0 - m1;
   1751             pres[1] = m2 + m3;
   1752             pres[3] = m3 - m2;
   1753 
   1754             orgCb += 4;
   1755             pres += 4;
   1756             pred += 4;
   1757             k--;
   1758         }
   1759         orgCb += org_pitch;
   1760         k = 2;
   1761         while (k > 0)
   1762         {
   1763             m0 = orgCr[0] - pred[0];
   1764             m3 = orgCr[3] - pred[3];
   1765             m0 += m3;
   1766             m3 = m0 - (m3 << 1);
   1767             m1 = orgCr[1] - pred[1];
   1768             m2 = orgCr[2] - pred[2];
   1769             m1 += m2;
   1770             m2 = m1 - (m2 << 1);
   1771             pres[0] = m0 + m1;
   1772             pres[2] = m0 - m1;
   1773             pres[1] = m2 + m3;
   1774             pres[3] = m3 - m2;
   1775 
   1776             orgCr += 4;
   1777             pres += 4;
   1778             pred += 4;
   1779             k--;
   1780         }
   1781         orgCr += org_pitch;
   1782     }
   1783 
   1784     /* vertical transform */
   1785     for (j = 0; j < 2; j++)
   1786     {
   1787         pres = res + (j << 6);
   1788         k = 16;
   1789         while (k > 0)
   1790         {
   1791             m0 = pres[0];
   1792             m3 = pres[3<<4];
   1793             m0 += m3;
   1794             m3 = m0 - (m3 << 1);
   1795             m1 = pres[1<<4];
   1796             m2 = pres[2<<4];
   1797             m1 += m2;
   1798             m2 = m1 - (m2 << 1);
   1799             pres[0] = m0 + m1;
   1800             pres[2<<4] = m0 - m1;
   1801             pres[1<<4] = m2 + m3;
   1802             pres[3<<4] = m3 - m2;
   1803 
   1804             pres++;
   1805             k--;
   1806         }
   1807     }
   1808 
   1809     /* now sum of absolute value */
   1810     pres = res;
   1811     cost = 0;
   1812     k = 128;
   1813     while (k > 0)
   1814     {
   1815         tmp1 = *pres++;
   1816         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1817         tmp1 = *pres++;
   1818         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1819         tmp1 = *pres++;
   1820         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1821         tmp1 = *pres++;
   1822         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1823         tmp1 = *pres++;
   1824         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1825         tmp1 = *pres++;
   1826         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1827         tmp1 = *pres++;
   1828         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1829         tmp1 = *pres++;
   1830         cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
   1831         k -= 8;
   1832         if (cost > min_cost) /* early drop out */
   1833         {
   1834             return cost;
   1835         }
   1836     }
   1837 
   1838     return cost;
   1839 }
   1840 
   1841 
   1842 
   1843 ///////////////////////////////// old code, unused
   1844 /* find the best intra mode based on original (unencoded) frame */
   1845 /* output is
   1846     currMB->mb_intra, currMB->mbMode,
   1847     currMB->i16Mode  (if currMB->mbMode == AVC_I16)
   1848     currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
   1849 
   1850 #ifdef FIXED_INTRAPRED_MODE
   1851 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
   1852 {
   1853     (void)(mbNum);
   1854 
   1855     AVCCommonObj *video = encvid->common;
   1856     int indx, block_x, block_y;
   1857 
   1858     video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
   1859 
   1860     if (!video->currPicParams->constrained_intra_pred_flag)
   1861     {
   1862         video->intraAvailA = video->mbAvailA;
   1863         video->intraAvailB = video->mbAvailB;
   1864         video->intraAvailC = video->mbAvailC;
   1865         video->intraAvailD = video->mbAvailD;
   1866     }
   1867     else
   1868     {
   1869         if (video->mbAvailA)
   1870         {
   1871             video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
   1872         }
   1873         if (video->mbAvailB)
   1874         {
   1875             video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
   1876         }
   1877         if (video->mbAvailC)
   1878         {
   1879             video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
   1880         }
   1881         if (video->mbAvailD)
   1882         {
   1883             video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
   1884         }
   1885     }
   1886 
   1887     currMB->mb_intra = TRUE;
   1888     currMB->mbMode = FIXED_INTRAPRED_MODE;
   1889 
   1890     if (currMB->mbMode == AVC_I16)
   1891     {
   1892         currMB->i16Mode = FIXED_I16_MODE;
   1893 
   1894         if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
   1895         {
   1896             currMB->i16Mode = AVC_I16_DC;
   1897         }
   1898 
   1899         if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
   1900         {
   1901             currMB->i16Mode = AVC_I16_DC;
   1902         }
   1903 
   1904         if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
   1905         {
   1906             currMB->i16Mode = AVC_I16_DC;
   1907         }
   1908     }
   1909     else //if(currMB->mbMode == AVC_I4)
   1910     {
   1911         for (indx = 0; indx < 16; indx++)
   1912         {
   1913             block_x = blkIdx2blkX[indx];
   1914             block_y = blkIdx2blkY[indx];
   1915 
   1916             currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
   1917 
   1918             if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
   1919             {
   1920                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1921             }
   1922 
   1923             if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
   1924             {
   1925                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1926             }
   1927 
   1928             if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
   1929                     (block_y == 0 && !video->intraAvailB))
   1930             {
   1931                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1932             }
   1933 
   1934             if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
   1935                     !((block_y && block_x)
   1936                       || (block_y && video->intraAvailA)
   1937                       || (block_x && video->intraAvailB)
   1938                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
   1939             {
   1940                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1941             }
   1942 
   1943             if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
   1944                     !((block_y && block_x)
   1945                       || (block_y && video->intraAvailA)
   1946                       || (block_x && video->intraAvailB)
   1947                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
   1948             {
   1949                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1950             }
   1951 
   1952             if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
   1953                     !((block_y && block_x)
   1954                       || (block_y && video->intraAvailA)
   1955                       || (block_x && video->intraAvailB)
   1956                       || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
   1957             {
   1958                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1959             }
   1960 
   1961             if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
   1962                     (block_y == 0 && !video->intraAvailB))
   1963             {
   1964                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1965             }
   1966 
   1967             if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
   1968             {
   1969                 currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
   1970             }
   1971         }
   1972     }
   1973 
   1974     currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
   1975 
   1976     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
   1977     {
   1978         currMB->intra_chroma_pred_mode = AVC_IC_DC;
   1979     }
   1980 
   1981     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
   1982     {
   1983         currMB->intra_chroma_pred_mode = AVC_IC_DC;
   1984     }
   1985 
   1986     if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
   1987     {
   1988         currMB->intra_chroma_pred_mode = AVC_IC_DC;
   1989     }
   1990 
   1991     /* also reset the motion vectors */
   1992     /* set MV and Ref_Idx codes of Intra blocks in P-slices */
   1993     memset(currMB->mvL0, 0, sizeof(int32)*16);
   1994     currMB->ref_idx_L0[0] = -1;
   1995     currMB->ref_idx_L0[1] = -1;
   1996     currMB->ref_idx_L0[2] = -1;
   1997     currMB->ref_idx_L0[3] = -1;
   1998 
   1999     // output from this function, currMB->mbMode should be set to either
   2000     // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
   2001     return ;
   2002 }
   2003 #else // faster combined prediction+SAD calculation
   2004 void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
   2005 {
   2006     AVCCommonObj *video = encvid->common;
   2007     AVCFrameIO *currInput = encvid->currInput;
   2008     uint8 *curL, *curCb, *curCr;
   2009     uint8 *comp, *pred_block;
   2010     int block_x, block_y, offset;
   2011     uint sad, sad4, sadI4, sadI16;
   2012     int component, SubBlock_indx, temp;
   2013     int pitch = video->currPic->pitch;
   2014 
   2015     /* calculate the cost of each intra prediction mode  and compare to the
   2016     inter mode */
   2017     /* full search for all intra prediction */
   2018     offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
   2019     curL = currInput->YCbCr[0] + offset;
   2020     pred_block = video->pred_block + 84;
   2021 
   2022     /* Assuming that InitNeighborAvailability has been called prior to this function */
   2023     video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
   2024 
   2025     if (!video->currPicParams->constrained_intra_pred_flag)
   2026     {
   2027         video->intraAvailA = video->mbAvailA;
   2028         video->intraAvailB = video->mbAvailB;
   2029         video->intraAvailC = video->mbAvailC;
   2030         video->intraAvailD = video->mbAvailD;
   2031     }
   2032     else
   2033     {
   2034         if (video->mbAvailA)
   2035         {
   2036             video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
   2037         }
   2038         if (video->mbAvailB)
   2039         {
   2040             video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
   2041         }
   2042         if (video->mbAvailC)
   2043         {
   2044             video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
   2045         }
   2046         if (video->mbAvailD)
   2047         {
   2048             video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
   2049         }
   2050     }
   2051 
   2052     /* currently we're doing exhaustive search. Smart search will be used later */
   2053 
   2054     /* I16 modes */
   2055     curL = currInput->YCbCr[0] + offset;
   2056     video->pintra_pred_top = curL - pitch;
   2057     video->pintra_pred_left = curL - 1;
   2058     if (video->mb_y)
   2059     {
   2060         video->intra_pred_topleft = *(curL - pitch - 1);
   2061     }
   2062 
   2063     /* Intra_16x16_Vertical */
   2064     sadI16 = 65536;
   2065     /* check availability of top */
   2066     if (video->intraAvailB)
   2067     {
   2068         sad = SAD_I16_Vert(video, curL, sadI16);
   2069 
   2070         if (sad < sadI16)
   2071         {
   2072             sadI16 = sad;
   2073             currMB->i16Mode = AVC_I16_Vertical;
   2074         }
   2075     }
   2076     /* Intra_16x16_Horizontal */
   2077     /* check availability of left */
   2078     if (video->intraAvailA)
   2079     {
   2080         sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
   2081 
   2082         if (sad < sadI16)
   2083         {
   2084             sadI16 = sad;
   2085             currMB->i16Mode = AVC_I16_Horizontal;
   2086         }
   2087     }
   2088 
   2089     /* Intra_16x16_DC, default mode */
   2090     sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
   2091     if (sad < sadI16)
   2092     {
   2093         sadI16 = sad;
   2094         currMB->i16Mode = AVC_I16_DC;
   2095     }
   2096 
   2097     /* Intra_16x16_Plane */
   2098     if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
   2099     {
   2100         sad = SAD_I16_Plane(video, curL, sadI16);
   2101 
   2102         if (sad < sadI16)
   2103         {
   2104             sadI16 = sad;
   2105             currMB->i16Mode = AVC_I16_Plane;
   2106         }
   2107     }
   2108 
   2109     sadI16 >>= 1;  /* before comparison */
   2110 
   2111     /* selection between intra4, intra16 or inter mode */
   2112     if (sadI16 < encvid->min_cost)
   2113     {
   2114         currMB->mb_intra = TRUE;
   2115         currMB->mbMode = AVC_I16;
   2116         encvid->min_cost = sadI16;
   2117     }
   2118 
   2119     if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
   2120     {
   2121         /* Note that we might be able to guess the type of prediction from
   2122         the luma prediction type */
   2123 
   2124         /* now search for the best chroma intra prediction */
   2125         offset = (offset >> 2) + (video->mb_x << 2);
   2126         curCb = currInput->YCbCr[1] + offset;
   2127         curCr = currInput->YCbCr[2] + offset;
   2128 
   2129         pitch >>= 1;
   2130         video->pintra_pred_top_cb = curCb - pitch;
   2131         video->pintra_pred_left_cb = curCb - 1;
   2132         video->pintra_pred_top_cr = curCr - pitch;
   2133         video->pintra_pred_left_cr = curCr - 1;
   2134 
   2135         if (video->mb_y)
   2136         {
   2137             video->intra_pred_topleft_cb = *(curCb - pitch - 1);
   2138             video->intra_pred_topleft_cr = *(curCr - pitch - 1);
   2139         }
   2140 
   2141         /* Intra_Chroma_DC */
   2142         sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
   2143         currMB->intra_chroma_pred_mode = AVC_IC_DC;
   2144 
   2145         /* Intra_Chroma_Horizontal */
   2146         if (video->intraAvailA)
   2147         {
   2148             /* check availability of left */
   2149             sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
   2150             if (sad < sad4)
   2151             {
   2152                 sad4 = sad;
   2153                 currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
   2154             }
   2155         }
   2156 
   2157         /* Intra_Chroma_Vertical */
   2158         if (video->intraAvailB)
   2159         {
   2160             /* check availability of top */
   2161             sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
   2162 
   2163             if (sad < sad4)
   2164             {
   2165                 sad4 = sad;
   2166                 currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
   2167             }
   2168         }
   2169 
   2170         /* Intra_Chroma_Plane */
   2171         if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
   2172         {
   2173             /* check availability of top and left */
   2174             Intra_Chroma_Plane(video, pitch);
   2175 
   2176             sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
   2177 
   2178             if (sad < sad4)
   2179             {
   2180                 sad4 = sad;
   2181                 currMB->intra_chroma_pred_mode = AVC_IC_Plane;
   2182             }
   2183         }
   2184 
   2185         /* also reset the motion vectors */
   2186         /* set MV and Ref_Idx codes of Intra blocks in P-slices */
   2187         memset(currMB->mvL0, 0, sizeof(int32)*16);
   2188         memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
   2189 
   2190     }
   2191 
   2192     // output from this function, currMB->mbMode should be set to either
   2193     // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
   2194 
   2195     return ;
   2196 }
   2197 #endif
   2198 
   2199 
   2200