Home | History | Annotate | Download | only in src
      1 /* ------------------------------------------------------------------
      2  * Copyright (C) 1998-2009 PacketVideo
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
     13  * express or implied.
     14  * See the License for the specific language governing permissions
     15  * and limitations under the License.
     16  * -------------------------------------------------------------------
     17  */
     18 #include "mp4def.h"
     19 #include "mp4enc_lib.h"
     20 #include "mp4lib_int.h"
     21 #include "m4venc_oscl.h"
     22 
     23 //#define PRINT_MV
     24 #define MIN_GOP 1   /* minimum size of GOP,  1/23/01, need to be tested */
     25 
     26 #define CANDIDATE_DISTANCE  0 /* distance candidate from one another to consider as a distinct one */
     27 /* shouldn't be more than 3 */
     28 
     29 #define ZERO_MV_PREF    0 /* 0: bias (0,0)MV before full-pel search, lowest complexity*/
     30 /* 1: bias (0,0)MV after full-pel search, before half-pel, highest comp */
     31 /* 2: bias (0,0)MV after half-pel, high comp, better PSNR */
     32 
     33 #define RASTER_REFRESH  /* instead of random INTRA refresh, do raster scan,  2/26/01 */
     34 
     35 #ifdef RASTER_REFRESH
     36 #define TARGET_REFRESH_PER_REGION 4 /* , no. MB per frame to be INTRA refreshed */
     37 #else
     38 #define TARGET_REFRESH_PER_REGION 1 /* , no. MB per region to be INTRA refreshed */
     39 #endif
     40 
     41 #define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
     42 
     43 #define NumPixelMB  256     /*  number of pixels used in SAD calculation */
     44 
     45 #define DEF_8X8_WIN 3   /* search region for 8x8 MVs around the 16x16 MV */
     46 #define MB_Nb  256
     47 
     48 #define PREF_NULL_VEC 129   /* for zero vector bias */
     49 #define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
     50 #define PREF_INTRA  512     /* bias for INTRA coding */
     51 
     52 const static Int tab_exclude[9][9] =  // [last_loc][curr_loc]
     53 {
     54     {0, 0, 0, 0, 0, 0, 0, 0, 0},
     55     {0, 0, 0, 0, 1, 1, 1, 0, 0},
     56     {0, 0, 0, 0, 1, 1, 1, 1, 1},
     57     {0, 0, 0, 0, 0, 0, 1, 1, 1},
     58     {0, 1, 1, 0, 0, 0, 1, 1, 1},
     59     {0, 1, 1, 0, 0, 0, 0, 0, 1},
     60     {0, 1, 1, 1, 1, 0, 0, 0, 1},
     61     {0, 0, 1, 1, 1, 0, 0, 0, 0},
     62     {0, 0, 1, 1, 1, 1, 1, 0, 0}
     63 }; //to decide whether to continue or compute
     64 
     65 const static Int refine_next[8][2] =    /* [curr_k][increment] */
     66 {
     67     {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
     68 };
     69 
     70 #ifdef __cplusplus
     71 extern "C"
     72 {
     73 #endif
     74 
     75     void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
     76     Int i0, Int j0, Int type_pred, Int fullsearch, Int *hp_guess);
     77 
     78     Int  fullsearch(VideoEncData *video, Vol *currVol, UChar *ref, UChar *cur,
     79                     Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh);
     80     Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
     81                       Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range);
     82     void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
     83                             VideoEncData *video, Int type_pred);
     84     void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh);
     85     void ResetIntraUpdate(UChar *intraArray, Int totalMB);
     86     void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
     87                                 Int start_j, Int rheight, Int mbwidth, Int mbheight);
     88 
     89     void MoveNeighborSAD(Int dn[], Int new_loc);
     90     Int FindMin(Int dn[]);
     91     void PrepareCurMB(VideoEncData *video, UChar *cur);
     92 
     93 #ifdef __cplusplus
     94 }
     95 #endif
     96 
     97 /***************************************/
     98 /*  2/28/01, for HYPOTHESIS TESTING */
     99 #ifdef HTFM     /* defined in mp4def.h */
    100 #ifdef __cplusplus
    101 extern "C"
    102 {
    103 #endif
    104     void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[]);
    105     void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur);
    106 #ifdef __cplusplus
    107 }
    108 #endif
    109 
    110 
    111 #define HTFM_Pf  0.25   /* 3/2/1, probability of false alarm, can be varied from 0 to 0.5 */
    112 /***************************************/
    113 #endif
    114 
    115 #ifdef _SAD_STAT
    116 ULong num_MB = 0;
    117 ULong num_HP_MB = 0;
    118 ULong num_Blk = 0;
    119 ULong num_HP_Blk = 0;
    120 ULong num_cand = 0;
    121 ULong num_better_hp = 0;
    122 ULong i_dist_from_guess = 0;
    123 ULong j_dist_from_guess = 0;
    124 ULong num_hp_not_zero = 0;
    125 #endif
    126 
    127 
    128 
    129 /*==================================================================
    130     Function:   MotionEstimation
    131     Date:       10/3/2000
    132     Purpose:    Go through all macroblock for motion search and
    133                 determine scene change detection.
    134 ====================================================================*/
    135 
    136 void MotionEstimation(VideoEncData *video)
    137 {
    138     UChar use_4mv = video->encParams->MV8x8_Enabled;
    139     Vol *currVol = video->vol[video->currLayer];
    140     Vop *currVop = video->currVop;
    141     VideoEncFrameIO *currFrame = video->input;
    142     Int i, j, comp;
    143     Int mbwidth = currVol->nMBPerRow;
    144     Int mbheight = currVol->nMBPerCol;
    145     Int totalMB = currVol->nTotalMB;
    146     Int width = currFrame->pitch;
    147     UChar *mode_mb, *Mode = video->headerInfo.Mode;
    148     MOT *mot_mb, **mot = video->mot;
    149     UChar *intraArray = video->intraArray;
    150     Int FS_en = video->encParams->FullSearch_Enabled;
    151     void (*ComputeMBSum)(UChar *, Int, MOT *) = video->functionPointer->ComputeMBSum;
    152     void (*ChooseMode)(UChar*, UChar*, Int, Int) = video->functionPointer->ChooseMode;
    153 
    154     Int numIntra, start_i, numLoop, incr_i;
    155     Int mbnum, offset;
    156     UChar *cur, *best_cand[5];
    157     Int sad8 = 0, sad16 = 0;
    158     Int totalSAD = 0;   /* average SAD for rate control */
    159     Int skip_halfpel_4mv;
    160     Int f_code_p, f_code_n, max_mag = 0, min_mag = 0;
    161     Int type_pred;
    162     Int xh[5] = {0, 0, 0, 0, 0};
    163     Int yh[5] = {0, 0, 0, 0, 0}; /* half-pel */
    164     UChar hp_mem4MV[17*17*4];
    165 
    166 #ifdef HTFM
    167     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
    168     Int collect = 0;
    169     HTFM_Stat htfm_stat;
    170     double newvar[16];
    171     double exp_lamda[15];
    172     /*********************************/
    173 #endif
    174     Int hp_guess = 0;
    175 #ifdef PRINT_MV
    176     FILE *fp_debug;
    177 #endif
    178 
    179 //  FILE *fstat;
    180 //  static int frame_num = 0;
    181 
    182     offset = 0;
    183 
    184     if (video->currVop->predictionType == I_VOP)
    185     {   /* compute the SAV */
    186         mbnum = 0;
    187         cur = currFrame->yChan;
    188 
    189         for (j = 0; j < mbheight; j++)
    190         {
    191             for (i = 0; i < mbwidth; i++)
    192             {
    193                 video->mbnum = mbnum;
    194                 mot_mb = mot[mbnum];
    195 
    196                 (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
    197 
    198                 totalSAD += mot_mb[0].sad;
    199 
    200                 mbnum++;
    201             }
    202             cur += (width << 4);
    203         }
    204 
    205         video->sumMAD = (float)totalSAD / (float)NumPixelMB;
    206 
    207         ResetIntraUpdate(intraArray, totalMB);
    208 
    209         return  ;
    210     }
    211 
    212     /* 09/20/05 */
    213     if (video->prevBaseVop->padded == 0 && !video->encParams->H263_Enabled)
    214     {
    215         PaddingEdge(video->prevBaseVop);
    216         video->prevBaseVop->padded = 1;
    217     }
    218 
    219     /* Random INTRA update */
    220     /*  suggest to do it in CodeMB */
    221     /*  2/21/2001 */
    222     //if(video->encParams->RC_Type == CBR_1 || video->encParams->RC_Type == CBR_2)
    223     if (video->currLayer == 0 && video->encParams->Refresh)
    224     {
    225         RasterIntraUpdate(intraArray, Mode, totalMB, video->encParams->Refresh);
    226     }
    227 
    228     video->sad_extra_info = NULL;
    229 
    230 #ifdef HTFM
    231     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
    232     InitHTFM(video, &htfm_stat, newvar, &collect);
    233     /*********************************/
    234 #endif
    235 
    236     if ((video->encParams->SceneChange_Det == 1) /*&& video->currLayer==0 */
    237             && ((video->encParams->LayerFrameRate[0] < 5.0) || (video->numVopsInGOP > MIN_GOP)))
    238         /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
    239     {
    240         incr_i = 2;
    241         numLoop = 2;
    242         start_i = 1;
    243         type_pred = 0; /* for initial candidate selection */
    244     }
    245     else
    246     {
    247         incr_i = 1;
    248         numLoop = 1;
    249         start_i = 0;
    250         type_pred = 2;
    251     }
    252 
    253     /* First pass, loop thru half the macroblock */
    254     /* determine scene change */
    255     /* Second pass, for the rest of macroblocks */
    256     numIntra = 0;
    257     while (numLoop--)
    258     {
    259         for (j = 0; j < mbheight; j++)
    260         {
    261             if (incr_i > 1)
    262                 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
    263 
    264             offset = width * (j << 4) + (start_i << 4);
    265 
    266             mbnum = j * mbwidth + start_i;
    267 
    268             for (i = start_i; i < mbwidth; i += incr_i)
    269             {
    270                 video->mbnum = mbnum;
    271                 mot_mb = mot[mbnum];
    272                 mode_mb = Mode + mbnum;
    273 
    274                 cur = currFrame->yChan + offset;
    275 
    276 
    277                 if (*mode_mb != MODE_INTRA)
    278                 {
    279 #if defined(HTFM)
    280                     HTFMPrepareCurMB(video, &htfm_stat, cur);
    281 #else
    282                     PrepareCurMB(video, cur);
    283 #endif
    284                     /************************************************************/
    285                     /******** full-pel 1MV and 4MVs search **********************/
    286 
    287 #ifdef _SAD_STAT
    288                     num_MB++;
    289 #endif
    290                     MBMotionSearch(video, cur, best_cand, i << 4, j << 4, type_pred,
    291                                    FS_en, &hp_guess);
    292 
    293 #ifdef PRINT_MV
    294                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    295                     fprintf(fp_debug, "#%d (%d,%d,%d) : ", mbnum, mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
    296                     fprintf(fp_debug, "(%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : ==>\n",
    297                             mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
    298                             mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
    299                             mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
    300                             mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
    301                     fclose(fp_debug);
    302 #endif
    303                     sad16 = mot_mb[0].sad;
    304 #ifdef NO_INTER4V
    305                     sad8 = sad16;
    306 #else
    307                     sad8 = mot_mb[1].sad + mot_mb[2].sad + mot_mb[3].sad + mot_mb[4].sad;
    308 #endif
    309 
    310                     /* choose between INTRA or INTER */
    311                     (*ChooseMode)(mode_mb, cur, width, ((sad8 < sad16) ? sad8 : sad16));
    312                 }
    313                 else    /* INTRA update, use for prediction 3/23/01 */
    314                 {
    315                     mot_mb[0].x = mot_mb[0].y = 0;
    316                 }
    317 
    318                 if (*mode_mb == MODE_INTRA)
    319                 {
    320                     numIntra++ ;
    321 
    322                     /* compute SAV for rate control and fast DCT, 11/28/00 */
    323                     (*ComputeMBSum)(cur, width, mot_mb);
    324 
    325                     /* leave mot_mb[0] as it is for fast motion search */
    326                     /* set the 4 MVs to zeros */
    327                     for (comp = 1; comp <= 4; comp++)
    328                     {
    329                         mot_mb[comp].x = 0;
    330                         mot_mb[comp].y = 0;
    331                     }
    332 #ifdef PRINT_MV
    333                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    334                     fprintf(fp_debug, "\n");
    335                     fclose(fp_debug);
    336 #endif
    337                 }
    338                 else /* *mode_mb = MODE_INTER;*/
    339                 {
    340                     if (video->encParams->HalfPel_Enabled)
    341                     {
    342 #ifdef _SAD_STAT
    343                         num_HP_MB++;
    344 #endif
    345                         /* find half-pel resolution motion vector */
    346                         FindHalfPelMB(video, cur, mot_mb, best_cand[0],
    347                                       i << 4, j << 4, xh, yh, hp_guess);
    348 #ifdef PRINT_MV
    349                         fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    350                         fprintf(fp_debug, "(%d,%d), %d\n", mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
    351                         fclose(fp_debug);
    352 #endif
    353                         skip_halfpel_4mv = ((sad16 - mot_mb[0].sad) <= (MB_Nb >> 1) + 1);
    354                         sad16 = mot_mb[0].sad;
    355 
    356 #ifndef NO_INTER4V
    357                         if (use_4mv && !skip_halfpel_4mv)
    358                         {
    359                             /* Also decide 1MV or 4MV !!!!!!!!*/
    360                             sad8 = FindHalfPelBlk(video, cur, mot_mb, sad16,
    361                                                   best_cand, mode_mb, i << 4, j << 4, xh, yh, hp_mem4MV);
    362 
    363 #ifdef PRINT_MV
    364                             fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    365                             fprintf(fp_debug, " (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) \n",
    366                                     mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
    367                                     mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
    368                                     mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
    369                                     mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
    370                             fclose(fp_debug);
    371 #endif
    372                         }
    373 #endif /* NO_INTER4V */
    374                     }
    375                     else    /* HalfPel_Enabled ==0  */
    376                     {
    377 #ifndef NO_INTER4V
    378                         //if(sad16 < sad8-PREF_16_VEC)
    379                         if (sad16 - PREF_16_VEC > sad8)
    380                         {
    381                             *mode_mb = MODE_INTER4V;
    382                         }
    383 #endif
    384                     }
    385 #if (ZERO_MV_PREF==2)   /* use mot_mb[7].sad as d0 computed in MBMotionSearch*/
    386                     /******************************************************/
    387                     if (mot_mb[7].sad - PREF_NULL_VEC < sad16 && mot_mb[7].sad - PREF_NULL_VEC < sad8)
    388                     {
    389                         mot_mb[0].sad = mot_mb[7].sad - PREF_NULL_VEC;
    390                         mot_mb[0].x = mot_mb[0].y = 0;
    391                         *mode_mb = MODE_INTER;
    392                     }
    393                     /******************************************************/
    394 #endif
    395                     if (*mode_mb == MODE_INTER)
    396                     {
    397                         if (mot_mb[0].x == 0 && mot_mb[0].y == 0)   /* use zero vector */
    398                             mot_mb[0].sad += PREF_NULL_VEC; /* add back the bias */
    399 
    400                         mot_mb[1].sad = mot_mb[2].sad = mot_mb[3].sad = mot_mb[4].sad = (mot_mb[0].sad + 2) >> 2;
    401                         mot_mb[1].x = mot_mb[2].x = mot_mb[3].x = mot_mb[4].x = mot_mb[0].x;
    402                         mot_mb[1].y = mot_mb[2].y = mot_mb[3].y = mot_mb[4].y = mot_mb[0].y;
    403 
    404                     }
    405                 }
    406 
    407                 /* find maximum magnitude */
    408                 /* compute average SAD for rate control, 11/28/00 */
    409                 if (*mode_mb == MODE_INTER)
    410                 {
    411 #ifdef PRINT_MV
    412                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    413                     fprintf(fp_debug, "%d MODE_INTER\n", mbnum);
    414                     fclose(fp_debug);
    415 #endif
    416                     totalSAD += mot_mb[0].sad;
    417                     if (mot_mb[0].x > max_mag)
    418                         max_mag = mot_mb[0].x;
    419                     if (mot_mb[0].y > max_mag)
    420                         max_mag = mot_mb[0].y;
    421                     if (mot_mb[0].x < min_mag)
    422                         min_mag = mot_mb[0].x;
    423                     if (mot_mb[0].y < min_mag)
    424                         min_mag = mot_mb[0].y;
    425                 }
    426                 else if (*mode_mb == MODE_INTER4V)
    427                 {
    428 #ifdef PRINT_MV
    429                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    430                     fprintf(fp_debug, "%d MODE_INTER4V\n", mbnum);
    431                     fclose(fp_debug);
    432 #endif
    433                     totalSAD += sad8;
    434                     for (comp = 1; comp <= 4; comp++)
    435                     {
    436                         if (mot_mb[comp].x > max_mag)
    437                             max_mag = mot_mb[comp].x;
    438                         if (mot_mb[comp].y > max_mag)
    439                             max_mag = mot_mb[comp].y;
    440                         if (mot_mb[comp].x < min_mag)
    441                             min_mag = mot_mb[comp].x;
    442                         if (mot_mb[comp].y < min_mag)
    443                             min_mag = mot_mb[comp].y;
    444                     }
    445                 }
    446                 else    /* MODE_INTRA */
    447                 {
    448 #ifdef PRINT_MV
    449                     fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
    450                     fprintf(fp_debug, "%d MODE_INTRA\n", mbnum);
    451                     fclose(fp_debug);
    452 #endif
    453                     totalSAD += mot_mb[0].sad;
    454                 }
    455                 mbnum += incr_i;
    456                 offset += (incr_i << 4);
    457 
    458             }
    459         }
    460 
    461         if (incr_i > 1 && numLoop) /* scene change on and first loop */
    462         {
    463             //if(numIntra > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
    464             if (numIntra > (0.30*(totalMB / 2.0))) /* 15% of 50%MBs */
    465             {
    466                 /******** scene change detected *******************/
    467                 currVop->predictionType = I_VOP;
    468                 M4VENC_MEMSET(Mode, MODE_INTRA, sizeof(UChar)*totalMB); /* set this for MB level coding*/
    469                 currVop->quantizer = video->encParams->InitQuantIvop[video->currLayer];
    470 
    471                 /* compute the SAV for rate control & fast DCT */
    472                 totalSAD = 0;
    473                 offset = 0;
    474                 mbnum = 0;
    475                 cur = currFrame->yChan;
    476 
    477                 for (j = 0; j < mbheight; j++)
    478                 {
    479                     for (i = 0; i < mbwidth; i++)
    480                     {
    481                         video->mbnum = mbnum;
    482                         mot_mb = mot[mbnum];
    483 
    484 
    485                         (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
    486                         totalSAD += mot_mb[0].sad;
    487 
    488                         mbnum++;
    489                     }
    490                     cur += (width << 4);
    491                 }
    492 
    493                 video->sumMAD = (float)totalSAD / (float)NumPixelMB;
    494                 ResetIntraUpdate(intraArray, totalMB);
    495                 /* video->numVopsInGOP=0; 3/13/01 move it to vop.c*/
    496 
    497                 return ;
    498             }
    499         }
    500         /******** no scene change, continue motion search **********************/
    501         start_i = 0;
    502         type_pred++; /* second pass */
    503     }
    504 
    505     video->sumMAD = (float)totalSAD / (float)NumPixelMB;    /* avg SAD */
    506 
    507     /* find f_code , 10/27/2000 */
    508     f_code_p = 1;
    509     while ((max_mag >> (4 + f_code_p)) > 0)
    510         f_code_p++;
    511 
    512     f_code_n = 1;
    513     min_mag *= -1;
    514     while ((min_mag - 1) >> (4 + f_code_n) > 0)
    515         f_code_n++;
    516 
    517     currVop->fcodeForward = (f_code_p > f_code_n ? f_code_p : f_code_n);
    518 
    519 #ifdef HTFM
    520     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
    521     if (collect)
    522     {
    523         collect = 0;
    524         UpdateHTFM(video, newvar, exp_lamda, &htfm_stat);
    525     }
    526     /*********************************/
    527 #endif
    528 
    529     return ;
    530 }
    531 
    532 
    533 #ifdef HTFM
    534 void InitHTFM(VideoEncData *video, HTFM_Stat *htfm_stat, double *newvar, Int *collect)
    535 {
    536     Int i;
    537     Int lx = video->currVop->width; //  padding
    538     Int lx2 = lx << 1;
    539     Int lx3 = lx2 + lx;
    540     Int rx = video->currVop->pitch;
    541     Int rx2 = rx << 1;
    542     Int rx3 = rx2 + rx;
    543 
    544     Int *offset, *offset2;
    545 
    546     /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
    547     if (((Int)video->numVopsInGOP) % 30 == 1)
    548     {
    549 
    550         *collect = 1;
    551 
    552         htfm_stat->countbreak = 0;
    553         htfm_stat->abs_dif_mad_avg = 0;
    554 
    555         for (i = 0; i < 16; i++)
    556         {
    557             newvar[i] = 0.0;
    558         }
    559 //      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
    560         video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
    561         video->functionPointer->SAD_MB_HalfPel[0] = NULL;
    562         video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
    563         video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
    564         video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
    565         video->sad_extra_info = (void*)(htfm_stat);
    566         offset = htfm_stat->offsetArray;
    567         offset2 = htfm_stat->offsetRef;
    568     }
    569     else
    570     {
    571 //      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
    572         video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
    573         video->functionPointer->SAD_MB_HalfPel[0] = NULL;
    574         video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
    575         video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
    576         video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
    577         video->sad_extra_info = (void*)(video->nrmlz_th);
    578         offset = video->nrmlz_th + 16;
    579         offset2 = video->nrmlz_th + 32;
    580     }
    581 
    582     offset[0] = 0;
    583     offset[1] = lx2 + 2;
    584     offset[2] = 2;
    585     offset[3] = lx2;
    586     offset[4] = lx + 1;
    587     offset[5] = lx3 + 3;
    588     offset[6] = lx + 3;
    589     offset[7] = lx3 + 1;
    590     offset[8] = lx;
    591     offset[9] = lx3 + 2;
    592     offset[10] = lx3 ;
    593     offset[11] = lx + 2 ;
    594     offset[12] = 1;
    595     offset[13] = lx2 + 3;
    596     offset[14] = lx2 + 1;
    597     offset[15] = 3;
    598 
    599     offset2[0] = 0;
    600     offset2[1] = rx2 + 2;
    601     offset2[2] = 2;
    602     offset2[3] = rx2;
    603     offset2[4] = rx + 1;
    604     offset2[5] = rx3 + 3;
    605     offset2[6] = rx + 3;
    606     offset2[7] = rx3 + 1;
    607     offset2[8] = rx;
    608     offset2[9] = rx3 + 2;
    609     offset2[10] = rx3 ;
    610     offset2[11] = rx + 2 ;
    611     offset2[12] = 1;
    612     offset2[13] = rx2 + 3;
    613     offset2[14] = rx2 + 1;
    614     offset2[15] = 3;
    615 
    616     return ;
    617 }
    618 
    619 void UpdateHTFM(VideoEncData *video, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
    620 {
    621     if (htfm_stat->countbreak == 0)
    622         htfm_stat->countbreak = 1;
    623 
    624     newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
    625 
    626     if (newvar[0] < 0.001)
    627     {
    628         newvar[0] = 0.001; /* to prevent floating overflow */
    629     }
    630     exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
    631     exp_lamda[1] = exp_lamda[0] * 1.5825;
    632     exp_lamda[2] = exp_lamda[0] * 2.1750;
    633     exp_lamda[3] = exp_lamda[0] * 3.5065;
    634     exp_lamda[4] = exp_lamda[0] * 3.1436;
    635     exp_lamda[5] = exp_lamda[0] * 3.5315;
    636     exp_lamda[6] = exp_lamda[0] * 3.7449;
    637     exp_lamda[7] = exp_lamda[0] * 4.5854;
    638     exp_lamda[8] = exp_lamda[0] * 4.6191;
    639     exp_lamda[9] = exp_lamda[0] * 5.4041;
    640     exp_lamda[10] = exp_lamda[0] * 6.5974;
    641     exp_lamda[11] = exp_lamda[0] * 10.5341;
    642     exp_lamda[12] = exp_lamda[0] * 10.0719;
    643     exp_lamda[13] = exp_lamda[0] * 12.0516;
    644     exp_lamda[14] = exp_lamda[0] * 15.4552;
    645 
    646     CalcThreshold(HTFM_Pf, exp_lamda, video->nrmlz_th);
    647     return ;
    648 }
    649 
    650 
    651 void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[])
    652 {
    653     Int i;
    654     double temp[15];
    655     //  printf("\nLamda: ");
    656 
    657     /* parametric PREMODELling */
    658     for (i = 0; i < 15; i++)
    659     {
    660         //    printf("%g ",exp_lamda[i]);
    661         if (pf < 0.5)
    662             temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
    663         else
    664             temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
    665     }
    666 
    667     nrmlz_th[15] = 0;
    668     for (i = 0; i < 15; i++)        /* scale upto no.pixels */
    669         nrmlz_th[i] = (Int)(temp[i] * ((i + 1) << 4) + 0.5);
    670 
    671     return ;
    672 }
    673 
    674 void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur)
    675 {
    676     void* tmp = (void*)(video->currYMB);
    677     ULong *htfmMB = (ULong*)tmp;
    678     UChar *ptr, byte;
    679     Int *offset;
    680     Int i;
    681     ULong word;
    682     Int width = video->currVop->width;
    683 
    684     if (((Int)video->numVopsInGOP) % 30 == 1)
    685     {
    686         offset = htfm_stat->offsetArray;
    687     }
    688     else
    689     {
    690         offset = video->nrmlz_th + 16;
    691     }
    692 
    693     for (i = 0; i < 16; i++)
    694     {
    695         ptr = cur + offset[i];
    696         word = ptr[0];
    697         byte = ptr[4];
    698         word |= (byte << 8);
    699         byte = ptr[8];
    700         word |= (byte << 16);
    701         byte = ptr[12];
    702         word |= (byte << 24);
    703         *htfmMB++ = word;
    704 
    705         word = *(ptr += (width << 2));
    706         byte = ptr[4];
    707         word |= (byte << 8);
    708         byte = ptr[8];
    709         word |= (byte << 16);
    710         byte = ptr[12];
    711         word |= (byte << 24);
    712         *htfmMB++ = word;
    713 
    714         word = *(ptr += (width << 2));
    715         byte = ptr[4];
    716         word |= (byte << 8);
    717         byte = ptr[8];
    718         word |= (byte << 16);
    719         byte = ptr[12];
    720         word |= (byte << 24);
    721         *htfmMB++ = word;
    722 
    723         word = *(ptr += (width << 2));
    724         byte = ptr[4];
    725         word |= (byte << 8);
    726         byte = ptr[8];
    727         word |= (byte << 16);
    728         byte = ptr[12];
    729         word |= (byte << 24);
    730         *htfmMB++ = word;
    731     }
    732 
    733     return ;
    734 }
    735 
    736 
    737 #endif
    738 
    739 void    PrepareCurMB(VideoEncData *video, UChar *cur)
    740 {
    741     void* tmp = (void*)(video->currYMB);
    742     ULong *currYMB = (ULong*)tmp;
    743     Int i;
    744     Int width = video->currVop->width;
    745 
    746     cur -= width;
    747 
    748     for (i = 0; i < 16; i++)
    749     {
    750         *currYMB++ = *((ULong*)(cur += width));
    751         *currYMB++ = *((ULong*)(cur + 4));
    752         *currYMB++ = *((ULong*)(cur + 8));
    753         *currYMB++ = *((ULong*)(cur + 12));
    754     }
    755 
    756     return ;
    757 }
    758 
    759 
    760 /*==================================================================
    761     Function:   MBMotionSearch
    762     Date:       09/06/2000
    763     Purpose:    Perform motion estimation for a macroblock.
    764                 Find 1MV and 4MVs in half-pels resolutions.
    765                 Using ST1 algorithm provided by Chalidabhongse and Kuo
    766                 CSVT March'98.
    767 
    768 ==================================================================*/
    769 
    770 void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
    771                     Int i0, Int j0, Int type_pred, Int FS_en, Int *hp_guess)
    772 {
    773     Vol *currVol = video->vol[video->currLayer];
    774     UChar *ref, *cand, *ncand = NULL, *cur8;
    775     void *extra_info = video->sad_extra_info;
    776     Int mbnum = video->mbnum;
    777     Int width = video->currVop->width; /* 6/12/01, must be multiple of 16 */
    778     Int height = video->currVop->height;
    779     MOT **mot = video->mot;
    780     UChar use_4mv = video->encParams->MV8x8_Enabled;
    781     UChar h263_mode = video->encParams->H263_Enabled;
    782     Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
    783     Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
    784     VideoEncParams *encParams = video->encParams;
    785     Int range = encParams->SearchRange;
    786 
    787     Int lx = video->currVop->pitch; /* padding */
    788     Int comp;
    789     Int i, j, imin, jmin, ilow, ihigh, jlow, jhigh, iorg, jorg;
    790     Int d, dmin, dn[9];
    791 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
    792     Int d0;
    793 #endif
    794     Int k;
    795     Int mvx[5], mvy[5], imin0, jmin0;
    796     Int num_can, center_again;
    797     Int last_loc, new_loc = 0;
    798     Int step, max_step = range >> 1;
    799     Int next;
    800 
    801     ref = video->forwardRefVop->yChan; /* origin of actual frame */
    802 
    803     cur = video->currYMB; /* use smaller memory space for current MB */
    804 
    805     /*  find limit of the search (adjusting search range)*/
    806 
    807     if (!h263_mode)
    808     {
    809         ilow = i0 - range;
    810         if (ilow < -15)
    811             ilow = -15;
    812         ihigh = i0 + range - 1;
    813         if (ihigh > width - 1)
    814             ihigh = width - 1;
    815         jlow = j0 - range;
    816         if (jlow < -15)
    817             jlow = -15;
    818         jhigh = j0 + range - 1;
    819         if (jhigh > height - 1)
    820             jhigh = height - 1;
    821     }
    822     else
    823     {
    824         ilow = i0 - range;
    825         if (ilow < 0)
    826             ilow = 0;
    827         ihigh = i0 + range - 1;
    828         if (ihigh > width - 16)
    829             ihigh = width - 16;
    830         jlow = j0 - range;
    831         if (jlow < 0)
    832             jlow = 0;
    833         jhigh = j0 + range - 1;
    834         if (jhigh > height - 16)
    835             jhigh = height - 16;
    836     }
    837 
    838     imin = i0;
    839     jmin = j0; /* needed for fullsearch */
    840     ncand = ref + imin + jmin * lx;
    841 
    842     /* for first row of MB, fullsearch can be used */
    843     if (FS_en)
    844     {
    845         *hp_guess = 0; /* no guess for fast half-pel */
    846 
    847         dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
    848 
    849         ncand = ref + imin + jmin * lx;
    850 
    851         mot[mbnum][0].sad = dmin;
    852         mot[mbnum][0].x = (imin - i0) << 1;
    853         mot[mbnum][0].y = (jmin - j0) << 1;
    854         imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
    855         jmin0 = jmin << 1;
    856         best_cand[0] = ncand;
    857     }
    858     else
    859     {   /* 4/7/01, modified this testing for fullsearch the top row to only upto (0,3) MB */
    860         /*            upto 30% complexity saving with the same complexity */
    861         if (video->forwardRefVop->predictionType == I_VOP && j0 == 0 && i0 <= 64 && type_pred != 1)
    862         {
    863             *hp_guess = 0; /* no guess for fast half-pel */
    864             dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
    865             ncand = ref + imin + jmin * lx;
    866         }
    867         else
    868         {
    869             /************** initialize candidate **************************/
    870             /* find initial motion vector */
    871             CandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, video, type_pred);
    872 
    873             dmin = 65535;
    874 
    875             /* check if all are equal */
    876             if (num_can == ALL_CAND_EQUAL)
    877             {
    878                 i = i0 + mvx[0];
    879                 j = j0 + mvy[0];
    880 
    881                 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
    882                 {
    883                     cand = ref + i + j * lx;
    884 
    885                     d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
    886 
    887                     if (d < dmin)
    888                     {
    889                         dmin = d;
    890                         imin = i;
    891                         jmin = j;
    892                         ncand = cand;
    893                     }
    894                 }
    895             }
    896             else
    897             {
    898                 /************** evaluate unique candidates **********************/
    899                 for (k = 0; k < num_can; k++)
    900                 {
    901                     i = i0 + mvx[k];
    902                     j = j0 + mvy[k];
    903 
    904                     if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
    905                     {
    906                         cand = ref + i + j * lx;
    907                         d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
    908 
    909                         if (d < dmin)
    910                         {
    911                             dmin = d;
    912                             imin = i;
    913                             jmin = j;
    914                             ncand = cand;
    915                         }
    916                         else if ((d == dmin) && PV_ABS(mvx[k]) + PV_ABS(mvy[k]) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
    917                         {
    918                             dmin = d;
    919                             imin = i;
    920                             jmin = j;
    921                             ncand = cand;
    922                         }
    923                     }
    924                 }
    925             }
    926             if (num_can == 0 || dmin == 65535) /* no candidate selected */
    927             {
    928                 ncand = ref + i0 + j0 * lx; /* use (0,0) MV as initial value */
    929                 mot[mbnum][7].sad = dmin = (*SAD_Macroblock)(ncand, cur, (65535 << 16) | lx, extra_info);
    930 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
    931                 d0 = dmin;
    932 #endif
    933                 imin = i0;
    934                 jmin = j0;
    935             }
    936 
    937 #if (ZERO_MV_PREF==0)  /*  COMPUTE ZERO VECTOR FIRST !!!!!*/
    938             dmin -= PREF_NULL_VEC;
    939 #endif
    940 
    941             /******************* local refinement ***************************/
    942             center_again = 0;
    943             last_loc = new_loc = 0;
    944             //          ncand = ref + jmin*lx + imin;  /* center of the search */
    945             step = 0;
    946             dn[0] = dmin;
    947             while (!center_again && step <= max_step)
    948             {
    949 
    950                 MoveNeighborSAD(dn, last_loc);
    951 
    952                 center_again = 1;
    953                 i = imin;
    954                 j = jmin - 1;
    955                 cand = ref + i + j * lx;
    956 
    957                 /*  starting from [0,-1] */
    958                 /* spiral check one step at a time*/
    959                 for (k = 2; k <= 8; k += 2)
    960                 {
    961                     if (!tab_exclude[last_loc][k]) /* exclude last step computation */
    962                     {       /* not already computed */
    963                         if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
    964                         {
    965                             d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
    966                             dn[k] = d; /* keep it for half pel use */
    967 
    968                             if (d < dmin)
    969                             {
    970                                 ncand = cand;
    971                                 dmin = d;
    972                                 imin = i;
    973                                 jmin = j;
    974                                 center_again = 0;
    975                                 new_loc = k;
    976                             }
    977                             else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
    978                             {
    979                                 ncand = cand;
    980                                 imin = i;
    981                                 jmin = j;
    982                                 center_again = 0;
    983                                 new_loc = k;
    984                             }
    985                         }
    986                     }
    987                     if (k == 8)  /* end side search*/
    988                     {
    989                         if (!center_again)
    990                         {
    991                             k = -1; /* start diagonal search */
    992                             cand -= lx;
    993                             j--;
    994                         }
    995                     }
    996                     else
    997                     {
    998                         next = refine_next[k][0];
    999                         i += next;
   1000                         cand += next;
   1001                         next = refine_next[k][1];
   1002                         j += next;
   1003                         cand += lx * next;
   1004                     }
   1005                 }
   1006                 last_loc = new_loc;
   1007                 step ++;
   1008             }
   1009             if (!center_again)
   1010                 MoveNeighborSAD(dn, last_loc);
   1011 
   1012             *hp_guess = FindMin(dn);
   1013 
   1014         }
   1015 
   1016 #if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
   1017         if (d0 - PREF_NULL_VEC < dmin)
   1018         {
   1019             ncand = ref + i0 + j0 * lx;
   1020             dmin = d0;
   1021             imin = i0;
   1022             jmin = j0;
   1023         }
   1024 #endif
   1025         mot[mbnum][0].sad = dmin;
   1026         mot[mbnum][0].x = (imin - i0) << 1;
   1027         mot[mbnum][0].y = (jmin - j0) << 1;
   1028         imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
   1029         jmin0 = jmin << 1;
   1030         best_cand[0] = ncand;
   1031     }
   1032     /* imin and jmin is the best 1 MV */
   1033 #ifndef NO_INTER4V
   1034     /*******************  Find 4 motion vectors ****************************/
   1035     if (use_4mv && !h263_mode)
   1036     {
   1037 #ifdef _SAD_STAT
   1038         num_Blk += 4;
   1039 #endif
   1040         /* starting from the best 1MV */
   1041         //offset = imin + jmin*lx;
   1042         iorg = i0;
   1043         jorg = j0;
   1044 
   1045         for (comp = 0; comp < 4; comp++)
   1046         {
   1047             i0 = iorg + ((comp & 1) << 3);
   1048             j0 = jorg + ((comp & 2) << 2);
   1049 
   1050             imin = (imin0 >> 1) + ((comp & 1) << 3);    /* starting point from 16x16 MV */
   1051             jmin = (jmin0 >> 1) + ((comp & 2) << 2);
   1052             ncand = ref + imin + jmin * lx;
   1053 
   1054             cur8 = cur + ((comp & 1) << 3) + (((comp & 2) << 2) << 4) ; /* 11/30/05, smaller cache */
   1055 
   1056             /*  find limit of the search (adjusting search range)*/
   1057             ilow = i0 - range;
   1058             ihigh = i0 + range - 1 ;/* 4/9/01 */
   1059             if (ilow < -15)
   1060                 ilow = -15;
   1061             if (ihigh > width - 1)
   1062                 ihigh = width - 1;
   1063             jlow = j0 - range;
   1064             jhigh = j0 + range - 1 ;/* 4/9/01 */
   1065             if (jlow < -15)
   1066                 jlow = -15;
   1067             if (jhigh > height - 1)
   1068                 jhigh = height - 1;
   1069 
   1070             SAD_Block = video->functionPointer->SAD_Block;
   1071 
   1072             if (FS_en)  /* fullsearch enable, center around 16x16 MV */
   1073             {
   1074                 dmin =  fullsearchBlk(video, currVol, ncand, cur8, &imin, &jmin, ilow, ihigh, jlow, jhigh, range);
   1075                 ncand = ref + imin + jmin * lx;
   1076 
   1077                 mot[mbnum][comp+1].sad = dmin;
   1078                 mot[mbnum][comp+1].x = (imin - i0) << 1;
   1079                 mot[mbnum][comp+1].y = (jmin - j0) << 1;
   1080                 best_cand[comp+1] = ncand;
   1081             }
   1082             else    /* no fullsearch, do local search */
   1083             {
   1084                 /* starting point from 16x16 */
   1085                 dmin = (*SAD_Block)(ncand, cur8, 65536, lx, extra_info);
   1086 
   1087                 /******************* local refinement ***************************/
   1088                 center_again = 0;
   1089                 last_loc = 0;
   1090 
   1091                 while (!center_again)
   1092                 {
   1093                     center_again = 1;
   1094                     i = imin;
   1095                     j = jmin - 1;
   1096                     cand = ref + i + j * lx;
   1097 
   1098                     /*  starting from [0,-1] */
   1099                     /* spiral check one step at a time*/
   1100                     for (k = 2; k <= 8; k += 2)
   1101                     {
   1102                         if (!tab_exclude[last_loc][k]) /* exclude last step computation */
   1103                         {       /* not already computed */
   1104                             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
   1105                             {
   1106                                 d = (*SAD_Block)(cand, cur8, dmin, lx, extra_info);
   1107 
   1108                                 if (d < dmin)
   1109                                 {
   1110                                     ncand = cand;
   1111                                     dmin = d;
   1112                                     imin = i;
   1113                                     jmin = j;
   1114                                     center_again = 0;
   1115                                     new_loc = k;
   1116                                 }
   1117                                 else if ((d == dmin) &&
   1118                                          PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
   1119                                 {
   1120                                     ncand = cand;
   1121                                     imin = i;
   1122                                     jmin = j;
   1123                                     center_again = 0;
   1124                                     new_loc = k;
   1125                                 }
   1126                             }
   1127                         }
   1128                         if (k == 8)  /* end side search*/
   1129                         {
   1130                             if (!center_again)
   1131                             {
   1132                                 k = -1; /* start diagonal search */
   1133                                 if (j <= height - 1 && j > 0)   cand -= lx;
   1134                                 j--;
   1135                             }
   1136                         }
   1137                         else
   1138                         {
   1139                             next = refine_next[k][0];
   1140                             cand += next;
   1141                             i += next;
   1142                             next = refine_next[k][1];
   1143                             cand += lx * next;
   1144                             j += next;
   1145                         }
   1146                     }
   1147                     last_loc = new_loc;
   1148                 }
   1149                 mot[mbnum][comp+1].sad = dmin;
   1150                 mot[mbnum][comp+1].x = (imin - i0) << 1;
   1151                 mot[mbnum][comp+1].y = (jmin - j0) << 1;
   1152                 best_cand[comp+1] = ncand;
   1153             }
   1154             /********************************************/
   1155         }
   1156     }
   1157     else
   1158 #endif  /* NO_INTER4V */
   1159     {
   1160         mot[mbnum][1].sad = mot[mbnum][2].sad = mot[mbnum][3].sad = mot[mbnum][4].sad = (dmin + 2) >> 2;
   1161         mot[mbnum][1].x = mot[mbnum][2].x = mot[mbnum][3].x = mot[mbnum][4].x = mot[mbnum][0].x;
   1162         mot[mbnum][1].y = mot[mbnum][2].y = mot[mbnum][3].y = mot[mbnum][4].y = mot[mbnum][0].y;
   1163         best_cand[1] = best_cand[2] = best_cand[3] = best_cand[4] = ncand;
   1164 
   1165     }
   1166     return ;
   1167 }
   1168 
   1169 
   1170 /*===============================================================================
   1171     Function:   fullsearch
   1172     Date:       09/16/2000
   1173     Purpose:    Perform full-search motion estimation over the range of search
   1174                 region in a spiral-outward manner.
   1175     Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
   1176                 current VOP, current coord (also output), boundaries.
   1177 ===============================================================================*/
   1178 
   1179 Int fullsearch(VideoEncData *video, Vol *currVol, UChar *prev, UChar *cur,
   1180                Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh)
   1181 {
   1182     Int range = video->encParams->SearchRange;
   1183     UChar *cand;
   1184     Int i, j, k, l;
   1185     Int d, dmin;
   1186     Int i0 = *imin; /* current position */
   1187     Int j0 = *jmin;
   1188     Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
   1189     void *extra_info = video->sad_extra_info;
   1190 //  UChar h263_mode = video->encParams->H263_Enabled;
   1191     Int lx = video->currVop->pitch; /* with padding */
   1192 
   1193     Int offset = i0 + j0 * lx;
   1194 
   1195     OSCL_UNUSED_ARG(currVol);
   1196 
   1197     cand = prev + offset;
   1198 
   1199     dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info) - PREF_NULL_VEC;
   1200 
   1201     /* perform spiral search */
   1202     for (k = 1; k <= range; k++)
   1203     {
   1204 
   1205         i = i0 - k;
   1206         j = j0 - k;
   1207 
   1208         cand = prev + i + j * lx;
   1209 
   1210         for (l = 0; l < 8*k; l++)
   1211         {
   1212             /* no need for boundary checking again */
   1213             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
   1214             {
   1215                 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
   1216 
   1217                 if (d < dmin)
   1218                 {
   1219                     dmin = d;
   1220                     *imin = i;
   1221                     *jmin = j;
   1222                 }
   1223                 else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - *imin) + PV_ABS(j0 - *jmin))
   1224                 {
   1225                     dmin = d;
   1226                     *imin = i;
   1227                     *jmin = j;
   1228                 }
   1229             }
   1230 
   1231             if (l < (k << 1))
   1232             {
   1233                 i++;
   1234                 cand++;
   1235             }
   1236             else if (l < (k << 2))
   1237             {
   1238                 j++;
   1239                 cand += lx;
   1240             }
   1241             else if (l < ((k << 2) + (k << 1)))
   1242             {
   1243                 i--;
   1244                 cand--;
   1245             }
   1246             else
   1247             {
   1248                 j--;
   1249                 cand -= lx;
   1250             }
   1251         }
   1252     }
   1253 
   1254     return dmin;
   1255 }
   1256 
   1257 #ifndef NO_INTER4V
   1258 /*===============================================================================
   1259     Function:   fullsearchBlk
   1260     Date:       01/9/2001
   1261     Purpose:    Perform full-search motion estimation of an 8x8 block over the range
   1262                 of search region in a spiral-outward manner centered at the 16x16 MV.
   1263     Input/Output:   VideoEncData, MB coordinate, pointer to the initial MV on the
   1264                 reference, pointer to coor of current block, search range.
   1265 ===============================================================================*/
   1266 Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
   1267                   Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range)
   1268 {
   1269     UChar *cand, *ref;
   1270     Int i, j, k, l, istart, jstart;
   1271     Int d, dmin;
   1272     Int lx = video->currVop->pitch; /* with padding */
   1273     Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
   1274     void *extra_info = video->sad_extra_info;
   1275 
   1276     OSCL_UNUSED_ARG(currVol);
   1277 
   1278     /* starting point centered at 16x16 MV */
   1279     ref = cent;
   1280     istart = *imin;
   1281     jstart = *jmin;
   1282 
   1283     dmin = (*SAD_Block)(ref, cur, 65536, lx, (void*)extra_info);
   1284 
   1285     cand = ref;
   1286     /* perform spiral search */
   1287     for (k = 1; k <= range; k++)
   1288     {
   1289 
   1290         i = istart - k;
   1291         j = jstart - k;
   1292         cand -= (lx + 1);  /* candidate region */
   1293 
   1294         for (l = 0; l < 8*k; l++)
   1295         {
   1296             /* no need for boundary checking again */
   1297             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
   1298             {
   1299                 d = (*SAD_Block)(cand, cur, dmin, lx, (void*)extra_info);
   1300 
   1301                 if (d < dmin)
   1302                 {
   1303                     dmin = d;
   1304                     *imin = i;
   1305                     *jmin = j;
   1306                 }
   1307                 else if ((d == dmin) &&
   1308                          PV_ABS(istart - i) + PV_ABS(jstart - j) < PV_ABS(istart - *imin) + PV_ABS(jstart - *jmin))
   1309                 {
   1310                     dmin = d;
   1311                     *imin = i;
   1312                     *jmin = j;
   1313                 }
   1314             }
   1315 
   1316             if (l < (k << 1))
   1317             {
   1318                 i++;
   1319                 cand++;
   1320             }
   1321             else if (l < (k << 2))
   1322             {
   1323                 j++;
   1324                 cand += lx;
   1325             }
   1326             else if (l < ((k << 2) + (k << 1)))
   1327             {
   1328                 i--;
   1329                 cand--;
   1330             }
   1331             else
   1332             {
   1333                 j--;
   1334                 cand -= lx;
   1335             }
   1336         }
   1337     }
   1338 
   1339     return dmin;
   1340 }
   1341 #endif /* NO_INTER4V */
   1342 
   1343 /*===============================================================================
   1344     Function:   CandidateSelection
   1345     Date:       09/16/2000
   1346     Purpose:    Fill up the list of candidate using spatio-temporal correlation
   1347                 among neighboring blocks.
   1348     Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
   1349     Modified:    09/23/01, get rid of redundant candidates before passing back.
   1350 ===============================================================================*/
   1351 
   1352 void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
   1353                         VideoEncData *video, Int type_pred)
   1354 {
   1355     MOT **mot = video->mot;
   1356     MOT *pmot;
   1357     Int mbnum = video->mbnum;
   1358     Vol *currVol = video->vol[video->currLayer];
   1359     Int mbwidth = currVol->nMBPerRow;
   1360     Int mbheight = currVol->nMBPerCol;
   1361     Int i, j, same, num1;
   1362 
   1363     *num_can = 0;
   1364 
   1365     if (video->forwardRefVop->predictionType == P_VOP)
   1366     {
   1367         /* Spatio-Temporal Candidate (five candidates) */
   1368         if (type_pred == 0) /* first pass */
   1369         {
   1370             pmot = &mot[mbnum][0]; /* same coordinate previous frame */
   1371             mvx[(*num_can)] = (pmot->x) >> 1;
   1372             mvy[(*num_can)++] = (pmot->y) >> 1;
   1373             if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
   1374             {
   1375                 pmot = &mot[mbnum-1][0];
   1376                 mvx[(*num_can)] = (pmot->x) >> 1;
   1377                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1378             }
   1379             else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
   1380             {
   1381                 pmot = &mot[mbnum+1][0];
   1382                 mvx[(*num_can)] = (pmot->x) >> 1;
   1383                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1384             }
   1385 
   1386             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
   1387             {
   1388                 pmot = &mot[mbnum+mbwidth][0];
   1389                 mvx[(*num_can)] = (pmot->x) >> 1;
   1390                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1391             }
   1392             else if (jmb > 0)   /*upper neighbor previous frame */
   1393             {
   1394                 pmot = &mot[mbnum-mbwidth][0];
   1395                 mvx[(*num_can)] = (pmot->x) >> 1;
   1396                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1397             }
   1398 
   1399             if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
   1400             {
   1401                 pmot = &mot[mbnum-mbwidth-1][0];
   1402                 mvx[(*num_can)] = (pmot->x) >> 1;
   1403                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1404             }
   1405             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
   1406             {
   1407                 pmot = &mot[mbnum-mbwidth+1][0];
   1408                 mvx[(*num_can)] = (pmot->x) >> 1;
   1409                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1410             }
   1411         }
   1412         else    /* second pass */
   1413             /* original ST1 algorithm */
   1414         {
   1415             pmot = &mot[mbnum][0]; /* same coordinate previous frame */
   1416             mvx[(*num_can)] = (pmot->x) >> 1;
   1417             mvy[(*num_can)++] = (pmot->y) >> 1;
   1418 
   1419             if (imb > 0)  /*left neighbor current frame */
   1420             {
   1421                 pmot = &mot[mbnum-1][0];
   1422                 mvx[(*num_can)] = (pmot->x) >> 1;
   1423                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1424             }
   1425             if (jmb > 0)  /*upper neighbor current frame */
   1426             {
   1427                 pmot = &mot[mbnum-mbwidth][0];
   1428                 mvx[(*num_can)] = (pmot->x) >> 1;
   1429                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1430             }
   1431             if (imb < mbwidth - 1)  /*right neighbor previous frame */
   1432             {
   1433                 pmot = &mot[mbnum+1][0];
   1434                 mvx[(*num_can)] = (pmot->x) >> 1;
   1435                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1436             }
   1437             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
   1438             {
   1439                 pmot = &mot[mbnum+mbwidth][0];
   1440                 mvx[(*num_can)] = (pmot->x) >> 1;
   1441                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1442             }
   1443         }
   1444     }
   1445     else  /* only Spatial Candidate (four candidates)*/
   1446     {
   1447         if (type_pred == 0) /*first pass*/
   1448         {
   1449             if (imb > 1)  /* neighbor two blocks away to the left */
   1450             {
   1451                 pmot = &mot[mbnum-2][0];
   1452                 mvx[(*num_can)] = (pmot->x) >> 1;
   1453                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1454             }
   1455             if (imb > 0 && jmb > 0)  /* upper-left neighbor */
   1456             {
   1457                 pmot = &mot[mbnum-mbwidth-1][0];
   1458                 mvx[(*num_can)] = (pmot->x) >> 1;
   1459                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1460             }
   1461             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
   1462             {
   1463                 pmot = &mot[mbnum-mbwidth+1][0];
   1464                 mvx[(*num_can)] = (pmot->x) >> 1;
   1465                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1466             }
   1467         }
   1468 //#ifdef SCENE_CHANGE_DETECTION
   1469         /* second pass (ST2 algorithm)*/
   1470         else if (type_pred == 1) /* 4/7/01 */
   1471         {
   1472             if (imb > 0)  /*left neighbor current frame */
   1473             {
   1474                 pmot = &mot[mbnum-1][0];
   1475                 mvx[(*num_can)] = (pmot->x) >> 1;
   1476                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1477             }
   1478             if (jmb > 0)  /*upper neighbor current frame */
   1479             {
   1480                 pmot = &mot[mbnum-mbwidth][0];
   1481                 mvx[(*num_can)] = (pmot->x) >> 1;
   1482                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1483             }
   1484             if (imb < mbwidth - 1)  /*right neighbor current frame */
   1485             {
   1486                 pmot = &mot[mbnum+1][0];
   1487                 mvx[(*num_can)] = (pmot->x) >> 1;
   1488                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1489             }
   1490             if (jmb < mbheight - 1)  /*bottom neighbor current frame */
   1491             {
   1492                 pmot = &mot[mbnum+mbwidth][0];
   1493                 mvx[(*num_can)] = (pmot->x) >> 1;
   1494                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1495             }
   1496         }
   1497 //#else
   1498         else /* original ST1 algorithm */
   1499         {
   1500             if (imb > 0)  /*left neighbor current frame */
   1501             {
   1502                 pmot = &mot[mbnum-1][0];
   1503                 mvx[(*num_can)] = (pmot->x) >> 1;
   1504                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1505 
   1506                 if (jmb > 0)  /*upper-left neighbor current frame */
   1507                 {
   1508                     pmot = &mot[mbnum-mbwidth-1][0];
   1509                     mvx[(*num_can)] = (pmot->x) >> 1;
   1510                     mvy[(*num_can)++] = (pmot->y) >> 1;
   1511                 }
   1512 
   1513             }
   1514             if (jmb > 0)  /*upper neighbor current frame */
   1515             {
   1516                 pmot = &mot[mbnum-mbwidth][0];
   1517                 mvx[(*num_can)] = (pmot->x) >> 1;
   1518                 mvy[(*num_can)++] = (pmot->y) >> 1;
   1519 
   1520                 if (imb < mbheight - 1)  /*upper-right neighbor current frame */
   1521                 {
   1522                     pmot = &mot[mbnum-mbwidth+1][0];
   1523                     mvx[(*num_can)] = (pmot->x) >> 1;
   1524                     mvy[(*num_can)++] = (pmot->y) >> 1;
   1525                 }
   1526             }
   1527         }
   1528 //#endif
   1529     }
   1530 
   1531     /* 3/23/01, remove redundant candidate (possible k-mean) */
   1532     num1 = *num_can;
   1533     *num_can = 1;
   1534     for (i = 1; i < num1; i++)
   1535     {
   1536         same = 0;
   1537         j = 0;
   1538         while (!same && j < *num_can)
   1539         {
   1540 #if (CANDIDATE_DISTANCE==0)
   1541             if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
   1542 #else
   1543             // modified k-mean, 3/24/01, shouldn't be greater than 3
   1544             if (PV_ABS(mvx[i] - mvx[j]) + PV_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
   1545 #endif
   1546                 same = 1;
   1547             j++;
   1548         }
   1549         if (!same)
   1550         {
   1551             mvx[*num_can] = mvx[i];
   1552             mvy[*num_can] = mvy[i];
   1553             (*num_can)++;
   1554         }
   1555     }
   1556 
   1557 #ifdef _SAD_STAT
   1558     num_cand += (*num_can);
   1559 #endif
   1560 
   1561     if (num1 == 5 && *num_can == 1)
   1562         *num_can = ALL_CAND_EQUAL; /* all are equal */
   1563 
   1564     return ;
   1565 }
   1566 
   1567 /*===========================================================================
   1568     Function:   RasterIntraUpdate
   1569     Date:       2/26/01
   1570     Purpose:    To raster-scan assign INTRA-update .
   1571                 N macroblocks are updated (also was programmable).
   1572 ===========================================================================*/
   1573 void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh)
   1574 {
   1575     Int indx, i;
   1576 
   1577     /* find the last refresh MB */
   1578     indx = 0;
   1579     while (intraArray[indx] == 1 && indx < totalMB)
   1580         indx++;
   1581 
   1582     /* add more  */
   1583     for (i = 0; i < numRefresh && indx < totalMB; i++)
   1584     {
   1585         Mode[indx] = MODE_INTRA;
   1586         intraArray[indx++] = 1;
   1587     }
   1588 
   1589     /* if read the end of frame, reset and loop around */
   1590     if (indx >= totalMB - 1)
   1591     {
   1592         ResetIntraUpdate(intraArray, totalMB);
   1593         indx = 0;
   1594         while (i < numRefresh && indx < totalMB)
   1595         {
   1596             intraArray[indx] = 1;
   1597             Mode[indx++] = MODE_INTRA;
   1598             i++;
   1599         }
   1600     }
   1601 
   1602     return ;
   1603 }
   1604 
   1605 /*===========================================================================
   1606     Function:   ResetIntraUpdate
   1607     Date:       11/28/00
   1608     Purpose:    Reset already intra updated flags to all zero
   1609 ===========================================================================*/
   1610 
   1611 void ResetIntraUpdate(UChar *intraArray, Int totalMB)
   1612 {
   1613     M4VENC_MEMSET(intraArray, 0, sizeof(UChar)*totalMB);
   1614     return ;
   1615 }
   1616 
   1617 /*===========================================================================
   1618     Function:   ResetIntraUpdateRegion
   1619     Date:       12/1/00
   1620     Purpose:    Reset already intra updated flags in one region to all zero
   1621 ===========================================================================*/
   1622 void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
   1623                             Int start_j, Int rheight, Int mbwidth, Int mbheight)
   1624 {
   1625     Int indx, j;
   1626 
   1627     if (start_i + rwidth >= mbwidth)
   1628         rwidth = mbwidth - start_i;
   1629     if (start_j + rheight >= mbheight)
   1630         rheight = mbheight - start_j;
   1631 
   1632     for (j = start_j; j < start_j + rheight; j++)
   1633     {
   1634         indx = j * mbwidth;
   1635         M4VENC_MEMSET(intraArray + indx + start_i, 0, sizeof(UChar)*rwidth);
   1636     }
   1637 
   1638     return ;
   1639 }
   1640 
   1641 /*************************************************************
   1642     Function:   MoveNeighborSAD
   1643     Date:       3/27/01
   1644     Purpose:    Move neighboring SAD around when center has shifted
   1645 *************************************************************/
   1646 
   1647 void MoveNeighborSAD(Int dn[], Int new_loc)
   1648 {
   1649     Int tmp[9];
   1650     tmp[0] = dn[0];
   1651     tmp[1] = dn[1];
   1652     tmp[2] = dn[2];
   1653     tmp[3] = dn[3];
   1654     tmp[4] = dn[4];
   1655     tmp[5] = dn[5];
   1656     tmp[6] = dn[6];
   1657     tmp[7] = dn[7];
   1658     tmp[8] = dn[8];
   1659     dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
   1660 
   1661     switch (new_loc)
   1662     {
   1663         case 0:
   1664             break;
   1665         case 1:
   1666             dn[4] = tmp[2];
   1667             dn[5] = tmp[0];
   1668             dn[6] = tmp[8];
   1669             break;
   1670         case 2:
   1671             dn[4] = tmp[3];
   1672             dn[5] = tmp[4];
   1673             dn[6] = tmp[0];
   1674             dn[7] = tmp[8];
   1675             dn[8] = tmp[1];
   1676             break;
   1677         case 3:
   1678             dn[6] = tmp[4];
   1679             dn[7] = tmp[0];
   1680             dn[8] = tmp[2];
   1681             break;
   1682         case 4:
   1683             dn[1] = tmp[2];
   1684             dn[2] = tmp[3];
   1685             dn[6] = tmp[5];
   1686             dn[7] = tmp[6];
   1687             dn[8] = tmp[0];
   1688             break;
   1689         case 5:
   1690             dn[1] = tmp[0];
   1691             dn[2] = tmp[4];
   1692             dn[8] = tmp[6];
   1693             break;
   1694         case 6:
   1695             dn[1] = tmp[8];
   1696             dn[2] = tmp[0];
   1697             dn[3] = tmp[4];
   1698             dn[4] = tmp[5];
   1699             dn[8] = tmp[7];
   1700             break;
   1701         case 7:
   1702             dn[2] = tmp[8];
   1703             dn[3] = tmp[0];
   1704             dn[4] = tmp[6];
   1705             break;
   1706         case 8:
   1707             dn[2] = tmp[1];
   1708             dn[3] = tmp[2];
   1709             dn[4] = tmp[0];
   1710             dn[5] = tmp[6];
   1711             dn[6] = tmp[7];
   1712             break;
   1713     }
   1714     dn[0] = tmp[new_loc];
   1715 
   1716     return ;
   1717 }
   1718 
   1719 /* 3/28/01, find minimal of dn[9] */
   1720 
   1721 Int FindMin(Int dn[])
   1722 {
   1723     Int min, i;
   1724     Int dmin;
   1725 
   1726     dmin = dn[1];
   1727     min = 1;
   1728     for (i = 2; i < 9; i++)
   1729     {
   1730         if (dn[i] < dmin)
   1731         {
   1732             dmin = dn[i];
   1733             min = i;
   1734         }
   1735     }
   1736 
   1737     return min;
   1738 }
   1739 
   1740 
   1741 
   1742