Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "mcomp.h"
     13 #include "vpx_mem/vpx_mem.h"
     14 
     15 #include <stdio.h>
     16 #include <limits.h>
     17 #include <math.h>
     18 
     19 #ifdef ENTROPY_STATS
     20 static int mv_ref_ct [31] [4] [2];
     21 static int mv_mode_cts [4] [2];
     22 #endif
     23 
     24 static int mv_bits_sadcost[256];
     25 
     26 void vp8cx_init_mv_bits_sadcost()
     27 {
     28     int i;
     29 
     30     for (i = 0; i < 256; i++)
     31     {
     32         mv_bits_sadcost[i] = (int)sqrt(i * 16);
     33     }
     34 }
     35 
     36 
     37 int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
     38 {
     39     // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
     40     // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
     41     // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
     42     // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
     43     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
     44 }
     45 
     46 int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
     47 {
     48     //int i;
     49     //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
     50     //return ( (vp8_mv_bit_cost(mv,  ref, mvcost, 100) + 128) * error_per_bit) >> 8;
     51 
     52     //i = (vp8_mv_bit_cost(mv,  ref, mvcost, 100) * error_per_bit + 128) >> 8;
     53     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
     54     //return (vp8_mv_bit_cost(mv,  ref, mvcost, 128) * error_per_bit + 128) >> 8;
     55 }
     56 
     57 
     58 static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
     59 {
     60     // get the estimated number of bits for a motion vector, to be used for costing in SAD based
     61     // motion estimation
     62     return ((mvcost[0][(mv->row - ref->row) >> 1]  +  mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
     63 }
     64 
     65 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     66 {
     67     int Len;
     68     int search_site_count = 0;
     69 
     70 
     71     // Generate offsets for 4 search sites per step.
     72     Len = MAX_FIRST_STEP;
     73     x->ss[search_site_count].mv.col = 0;
     74     x->ss[search_site_count].mv.row = 0;
     75     x->ss[search_site_count].offset = 0;
     76     search_site_count++;
     77 
     78     while (Len > 0)
     79     {
     80 
     81         // Compute offsets for search sites.
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = -Len;
     84         x->ss[search_site_count].offset = -Len * stride;
     85         search_site_count++;
     86 
     87         // Compute offsets for search sites.
     88         x->ss[search_site_count].mv.col = 0;
     89         x->ss[search_site_count].mv.row = Len;
     90         x->ss[search_site_count].offset = Len * stride;
     91         search_site_count++;
     92 
     93         // Compute offsets for search sites.
     94         x->ss[search_site_count].mv.col = -Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = -Len;
     97         search_site_count++;
     98 
     99         // Compute offsets for search sites.
    100         x->ss[search_site_count].mv.col = Len;
    101         x->ss[search_site_count].mv.row = 0;
    102         x->ss[search_site_count].offset = Len;
    103         search_site_count++;
    104 
    105         // Contract.
    106         Len /= 2;
    107     }
    108 
    109     x->ss_count = search_site_count;
    110     x->searches_per_step = 4;
    111 }
    112 
    113 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    114 {
    115     int Len;
    116     int search_site_count = 0;
    117 
    118     // Generate offsets for 8 search sites per step.
    119     Len = MAX_FIRST_STEP;
    120     x->ss[search_site_count].mv.col = 0;
    121     x->ss[search_site_count].mv.row = 0;
    122     x->ss[search_site_count].offset = 0;
    123     search_site_count++;
    124 
    125     while (Len > 0)
    126     {
    127 
    128         // Compute offsets for search sites.
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = -Len;
    131         x->ss[search_site_count].offset = -Len * stride;
    132         search_site_count++;
    133 
    134         // Compute offsets for search sites.
    135         x->ss[search_site_count].mv.col = 0;
    136         x->ss[search_site_count].mv.row = Len;
    137         x->ss[search_site_count].offset = Len * stride;
    138         search_site_count++;
    139 
    140         // Compute offsets for search sites.
    141         x->ss[search_site_count].mv.col = -Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = -Len;
    144         search_site_count++;
    145 
    146         // Compute offsets for search sites.
    147         x->ss[search_site_count].mv.col = Len;
    148         x->ss[search_site_count].mv.row = 0;
    149         x->ss[search_site_count].offset = Len;
    150         search_site_count++;
    151 
    152         // Compute offsets for search sites.
    153         x->ss[search_site_count].mv.col = -Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride - Len;
    156         search_site_count++;
    157 
    158         // Compute offsets for search sites.
    159         x->ss[search_site_count].mv.col = Len;
    160         x->ss[search_site_count].mv.row = -Len;
    161         x->ss[search_site_count].offset = -Len * stride + Len;
    162         search_site_count++;
    163 
    164         // Compute offsets for search sites.
    165         x->ss[search_site_count].mv.col = -Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride - Len;
    168         search_site_count++;
    169 
    170         // Compute offsets for search sites.
    171         x->ss[search_site_count].mv.col = Len;
    172         x->ss[search_site_count].mv.row = Len;
    173         x->ss[search_site_count].offset = Len * stride + Len;
    174         search_site_count++;
    175 
    176 
    177         // Contract.
    178         Len /= 2;
    179     }
    180 
    181     x->ss_count = search_site_count;
    182     x->searches_per_step = 8;
    183 }
    184 
    185 
    186 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    187 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
    188 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
    189 #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
    190 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    191 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
    192 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
    193 #define MIN(x,y) (((x)<(y))?(x):(y))
    194 #define MAX(x,y) (((x)>(y))?(x):(y))
    195 
    196 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
    197 
    198 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    199 {
    200     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    201     unsigned char *z = (*(b->base_src) + b->src);
    202 
    203     int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
    204     int br = bestmv->row << 2, bc = bestmv->col << 2;
    205     int tr = br, tc = bc;
    206     unsigned int besterr = INT_MAX;
    207     unsigned int left, right, up, down, diag;
    208     unsigned int sse;
    209     unsigned int whichdir;
    210     unsigned int halfiters = 4;
    211     unsigned int quarteriters = 4;
    212 
    213     int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
    214     int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
    215     int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
    216     int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
    217 
    218     // central mv
    219     bestmv->row <<= 3;
    220     bestmv->col <<= 3;
    221 
    222     // calculate central point error
    223     besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    224     besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    225 
    226     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    227     while (--halfiters)
    228     {
    229         // 1/2 pel
    230         CHECK_BETTER(left, tr, tc - 2);
    231         CHECK_BETTER(right, tr, tc + 2);
    232         CHECK_BETTER(up, tr - 2, tc);
    233         CHECK_BETTER(down, tr + 2, tc);
    234 
    235         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    236 
    237         switch (whichdir)
    238         {
    239         case 0:
    240             CHECK_BETTER(diag, tr - 2, tc - 2);
    241             break;
    242         case 1:
    243             CHECK_BETTER(diag, tr - 2, tc + 2);
    244             break;
    245         case 2:
    246             CHECK_BETTER(diag, tr + 2, tc - 2);
    247             break;
    248         case 3:
    249             CHECK_BETTER(diag, tr + 2, tc + 2);
    250             break;
    251         }
    252 
    253         // no reason to check the same one again.
    254         if (tr == br && tc == bc)
    255             break;
    256 
    257         tr = br;
    258         tc = bc;
    259     }
    260 
    261     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    262     // 1/4 pel
    263     while (--quarteriters)
    264     {
    265         CHECK_BETTER(left, tr, tc - 1);
    266         CHECK_BETTER(right, tr, tc + 1);
    267         CHECK_BETTER(up, tr - 1, tc);
    268         CHECK_BETTER(down, tr + 1, tc);
    269 
    270         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    271 
    272         switch (whichdir)
    273         {
    274         case 0:
    275             CHECK_BETTER(diag, tr - 1, tc - 1);
    276             break;
    277         case 1:
    278             CHECK_BETTER(diag, tr - 1, tc + 1);
    279             break;
    280         case 2:
    281             CHECK_BETTER(diag, tr + 1, tc - 1);
    282             break;
    283         case 3:
    284             CHECK_BETTER(diag, tr + 1, tc + 1);
    285             break;
    286         }
    287 
    288         // no reason to check the same one again.
    289         if (tr == br && tc == bc)
    290             break;
    291 
    292         tr = br;
    293         tc = bc;
    294     }
    295 
    296     bestmv->row = br << 1;
    297     bestmv->col = bc << 1;
    298 
    299     if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
    300         return INT_MAX;
    301 
    302     return besterr;
    303 }
    304 #undef MVC
    305 #undef PRE
    306 #undef SP
    307 #undef DIST
    308 #undef ERR
    309 #undef CHECK_BETTER
    310 #undef MIN
    311 #undef MAX
    312 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    313 {
    314     int bestmse = INT_MAX;
    315     MV startmv;
    316     //MV this_mv;
    317     MV this_mv;
    318     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    319     unsigned char *z = (*(b->base_src) + b->src);
    320     int left, right, up, down, diag;
    321     unsigned int sse;
    322     int whichdir ;
    323 
    324 
    325     // Trap uncodable vectors
    326     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    327     {
    328         bestmv->row <<= 3;
    329         bestmv->col <<= 3;
    330         return INT_MAX;
    331     }
    332 
    333     // central mv
    334     bestmv->row <<= 3;
    335     bestmv->col <<= 3;
    336     startmv = *bestmv;
    337 
    338     // calculate central point error
    339     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    340     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    341 
    342     // go left then right and check error
    343     this_mv.row = startmv.row;
    344     this_mv.col = ((startmv.col - 8) | 4);
    345     left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
    346     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    347 
    348     if (left < bestmse)
    349     {
    350         *bestmv = this_mv;
    351         bestmse = left;
    352     }
    353 
    354     this_mv.col += 8;
    355     right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
    356     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    357 
    358     if (right < bestmse)
    359     {
    360         *bestmv = this_mv;
    361         bestmse = right;
    362     }
    363 
    364     // go up then down and check error
    365     this_mv.col = startmv.col;
    366     this_mv.row = ((startmv.row - 8) | 4);
    367     up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    368     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    369 
    370     if (up < bestmse)
    371     {
    372         *bestmv = this_mv;
    373         bestmse = up;
    374     }
    375 
    376     this_mv.row += 8;
    377     down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
    378     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    379 
    380     if (down < bestmse)
    381     {
    382         *bestmv = this_mv;
    383         bestmse = down;
    384     }
    385 
    386 
    387     // now check 1 more diagonal
    388     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    389     //for(whichdir =0;whichdir<4;whichdir++)
    390     //{
    391     this_mv = startmv;
    392 
    393     switch (whichdir)
    394     {
    395     case 0:
    396         this_mv.col = (this_mv.col - 8) | 4;
    397         this_mv.row = (this_mv.row - 8) | 4;
    398         diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    399         break;
    400     case 1:
    401         this_mv.col += 4;
    402         this_mv.row = (this_mv.row - 8) | 4;
    403         diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    404         break;
    405     case 2:
    406         this_mv.col = (this_mv.col - 8) | 4;
    407         this_mv.row += 4;
    408         diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
    409         break;
    410     case 3:
    411         this_mv.col += 4;
    412         this_mv.row += 4;
    413         diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
    414         break;
    415     }
    416 
    417     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    418 
    419     if (diag < bestmse)
    420     {
    421         *bestmv = this_mv;
    422         bestmse = diag;
    423     }
    424 
    425 //  }
    426 
    427 
    428     // time to check quarter pels.
    429     if (bestmv->row < startmv.row)
    430         y -= d->pre_stride;
    431 
    432     if (bestmv->col < startmv.col)
    433         y--;
    434 
    435     startmv = *bestmv;
    436 
    437 
    438 
    439     // go left then right and check error
    440     this_mv.row = startmv.row;
    441 
    442     if (startmv.col & 7)
    443     {
    444         this_mv.col = startmv.col - 2;
    445         left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    446     }
    447     else
    448     {
    449         this_mv.col = (startmv.col - 8) | 6;
    450         left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
    451     }
    452 
    453     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    454 
    455     if (left < bestmse)
    456     {
    457         *bestmv = this_mv;
    458         bestmse = left;
    459     }
    460 
    461     this_mv.col += 4;
    462     right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    463     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    464 
    465     if (right < bestmse)
    466     {
    467         *bestmv = this_mv;
    468         bestmse = right;
    469     }
    470 
    471     // go up then down and check error
    472     this_mv.col = startmv.col;
    473 
    474     if (startmv.row & 7)
    475     {
    476         this_mv.row = startmv.row - 2;
    477         up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    478     }
    479     else
    480     {
    481         this_mv.row = (startmv.row - 8) | 6;
    482         up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    483     }
    484 
    485     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    486 
    487     if (up < bestmse)
    488     {
    489         *bestmv = this_mv;
    490         bestmse = up;
    491     }
    492 
    493     this_mv.row += 4;
    494     down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    495     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    496 
    497     if (down < bestmse)
    498     {
    499         *bestmv = this_mv;
    500         bestmse = down;
    501     }
    502 
    503 
    504     // now check 1 more diagonal
    505     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    506 
    507 //  for(whichdir=0;whichdir<4;whichdir++)
    508 //  {
    509     this_mv = startmv;
    510 
    511     switch (whichdir)
    512     {
    513     case 0:
    514 
    515         if (startmv.row & 7)
    516         {
    517             this_mv.row -= 2;
    518 
    519             if (startmv.col & 7)
    520             {
    521                 this_mv.col -= 2;
    522                 diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    523             }
    524             else
    525             {
    526                 this_mv.col = (startmv.col - 8) | 6;
    527                 diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    528             }
    529         }
    530         else
    531         {
    532             this_mv.row = (startmv.row - 8) | 6;
    533 
    534             if (startmv.col & 7)
    535             {
    536                 this_mv.col -= 2;
    537                 diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    538             }
    539             else
    540             {
    541                 this_mv.col = (startmv.col - 8) | 6;
    542                 diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
    543             }
    544         }
    545 
    546         break;
    547     case 1:
    548         this_mv.col += 2;
    549 
    550         if (startmv.row & 7)
    551         {
    552             this_mv.row -= 2;
    553             diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    554         }
    555         else
    556         {
    557             this_mv.row = (startmv.row - 8) | 6;
    558             diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    559         }
    560 
    561         break;
    562     case 2:
    563         this_mv.row += 2;
    564 
    565         if (startmv.col & 7)
    566         {
    567             this_mv.col -= 2;
    568             diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    569         }
    570         else
    571         {
    572             this_mv.col = (startmv.col - 8) | 6;
    573             diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    574         }
    575 
    576         break;
    577     case 3:
    578         this_mv.col += 2;
    579         this_mv.row += 2;
    580         diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    581         break;
    582     }
    583 
    584     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    585 
    586     if (diag < bestmse)
    587     {
    588         *bestmv = this_mv;
    589         bestmse = diag;
    590     }
    591 
    592 //  }
    593 
    594     return bestmse;
    595 }
    596 
    597 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    598 {
    599     int bestmse = INT_MAX;
    600     MV startmv;
    601     //MV this_mv;
    602     MV this_mv;
    603     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    604     unsigned char *z = (*(b->base_src) + b->src);
    605     int left, right, up, down, diag;
    606     unsigned int sse;
    607 
    608     // Trap uncodable vectors
    609     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    610     {
    611         bestmv->row <<= 3;
    612         bestmv->col <<= 3;
    613         return INT_MAX;
    614     }
    615 
    616     // central mv
    617     bestmv->row <<= 3;
    618     bestmv->col <<= 3;
    619     startmv = *bestmv;
    620 
    621     // calculate central point error
    622     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    623     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    624 
    625     // go left then right and check error
    626     this_mv.row = startmv.row;
    627     this_mv.col = ((startmv.col - 8) | 4);
    628     left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
    629     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    630 
    631     if (left < bestmse)
    632     {
    633         *bestmv = this_mv;
    634         bestmse = left;
    635     }
    636 
    637     this_mv.col += 8;
    638     right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
    639     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    640 
    641     if (right < bestmse)
    642     {
    643         *bestmv = this_mv;
    644         bestmse = right;
    645     }
    646 
    647     // go up then down and check error
    648     this_mv.col = startmv.col;
    649     this_mv.row = ((startmv.row - 8) | 4);
    650     up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    651     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    652 
    653     if (up < bestmse)
    654     {
    655         *bestmv = this_mv;
    656         bestmse = up;
    657     }
    658 
    659     this_mv.row += 8;
    660     down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
    661     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    662 
    663     if (down < bestmse)
    664     {
    665         *bestmv = this_mv;
    666         bestmse = down;
    667     }
    668 
    669     // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
    670 #if 0
    671     // now check 1 more diagonal -
    672     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    673     this_mv = startmv;
    674 
    675     switch (whichdir)
    676     {
    677     case 0:
    678         this_mv.col = (this_mv.col - 8) | 4;
    679         this_mv.row = (this_mv.row - 8) | 4;
    680         diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    681         break;
    682     case 1:
    683         this_mv.col += 4;
    684         this_mv.row = (this_mv.row - 8) | 4;
    685         diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    686         break;
    687     case 2:
    688         this_mv.col = (this_mv.col - 8) | 4;
    689         this_mv.row += 4;
    690         diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    691         break;
    692     case 3:
    693         this_mv.col += 4;
    694         this_mv.row += 4;
    695         diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    696         break;
    697     }
    698 
    699     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    700 
    701     if (diag < bestmse)
    702     {
    703         *bestmv = this_mv;
    704         bestmse = diag;
    705     }
    706 
    707 #else
    708     this_mv.col = (this_mv.col - 8) | 4;
    709     this_mv.row = (this_mv.row - 8) | 4;
    710     diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    711     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    712 
    713     if (diag < bestmse)
    714     {
    715         *bestmv = this_mv;
    716         bestmse = diag;
    717     }
    718 
    719     this_mv.col += 8;
    720     diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    721     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    722 
    723     if (diag < bestmse)
    724     {
    725         *bestmv = this_mv;
    726         bestmse = diag;
    727     }
    728 
    729     this_mv.col = (this_mv.col - 8) | 4;
    730     this_mv.row = startmv.row + 4;
    731     diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
    732     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    733 
    734     if (diag < bestmse)
    735     {
    736         *bestmv = this_mv;
    737         bestmse = diag;
    738     }
    739 
    740     this_mv.col += 8;
    741     diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
    742     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    743 
    744     if (diag < bestmse)
    745     {
    746         *bestmv = this_mv;
    747         bestmse = diag;
    748     }
    749 
    750 #endif
    751     return bestmse;
    752 }
    753 
    754 
    755 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    756 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
    757 #define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
    758 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
    759 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
    760 static const MV next_chkpts[6][3] =
    761 {
    762     {{ -2, 0}, { -1, -2}, {1, -2}},
    763     {{ -1, -2}, {1, -2}, {2, 0}},
    764     {{1, -2}, {2, 0}, {1, 2}},
    765     {{2, 0}, {1, 2}, { -1, 2}},
    766     {{1, 2}, { -1, 2}, { -2, 0}},
    767     {{ -1, 2}, { -2, 0}, { -1, -2}}
    768 };
    769 int vp8_hex_search
    770 (
    771     MACROBLOCK *x,
    772     BLOCK *b,
    773     BLOCKD *d,
    774     MV *ref_mv,
    775     MV *best_mv,
    776     int search_param,
    777     int error_per_bit,
    778     int *num00,
    779     const vp8_variance_fn_ptr_t *vfp,
    780     int *mvsadcost[2],
    781     int *mvcost[2]
    782 )
    783 {
    784     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    785     MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
    786     int i, j;
    787     unsigned char *src = (*(b->base_src) + b->src);
    788     int src_stride = b->src_stride;
    789     int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
    790     unsigned int besterr, thiserr = 0x7fffffff;
    791     int k = -1, tk;
    792 
    793     if (bc < x->mv_col_min) bc = x->mv_col_min;
    794 
    795     if (bc > x->mv_col_max) bc = x->mv_col_max;
    796 
    797     if (br < x->mv_row_min) br = x->mv_row_min;
    798 
    799     if (br > x->mv_row_max) br = x->mv_row_max;
    800 
    801     rr >>= 1;
    802     rc >>= 1;
    803 
    804     besterr = ERR(br, bc, thiserr);
    805 
    806     // hex search
    807     //j=0
    808     tr = br;
    809     tc = bc;
    810 
    811     for (i = 0; i < 6; i++)
    812     {
    813         int nr = tr + hex[i].row, nc = tc + hex[i].col;
    814 
    815         if (nc < x->mv_col_min) continue;
    816 
    817         if (nc > x->mv_col_max) continue;
    818 
    819         if (nr < x->mv_row_min) continue;
    820 
    821         if (nr > x->mv_row_max) continue;
    822 
    823         //CHECK_BETTER(thiserr,nr,nc);
    824         if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    825         {
    826             besterr = thiserr;
    827             br = nr;
    828             bc = nc;
    829             k = i;
    830         }
    831     }
    832 
    833     if (tr == br && tc == bc)
    834         goto cal_neighbors;
    835 
    836     for (j = 1; j < 127; j++)
    837     {
    838         tr = br;
    839         tc = bc;
    840         tk = k;
    841 
    842         for (i = 0; i < 3; i++)
    843         {
    844             int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
    845 
    846             if (nc < x->mv_col_min) continue;
    847 
    848             if (nc > x->mv_col_max) continue;
    849 
    850             if (nr < x->mv_row_min) continue;
    851 
    852             if (nr > x->mv_row_max) continue;
    853 
    854             //CHECK_BETTER(thiserr,nr,nc);
    855             if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    856             {
    857                 besterr = thiserr;
    858                 br = nr;
    859                 bc = nc; //k=(tk+5+i)%6;}
    860                 k = tk + 5 + i;
    861 
    862                 if (k >= 12) k -= 12;
    863                 else if (k >= 6) k -= 6;
    864             }
    865         }
    866 
    867         if (tr == br && tc == bc)
    868             break;
    869     }
    870 
    871     // check 8 1 away neighbors
    872 cal_neighbors:
    873     tr = br;
    874     tc = bc;
    875 
    876     for (i = 0; i < 8; i++)
    877     {
    878         int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
    879 
    880         if (nc < x->mv_col_min) continue;
    881 
    882         if (nc > x->mv_col_max) continue;
    883 
    884         if (nr < x->mv_row_min) continue;
    885 
    886         if (nr > x->mv_row_max) continue;
    887 
    888         CHECK_BETTER(thiserr, nr, nc);
    889     }
    890 
    891     best_mv->row = br;
    892     best_mv->col = bc;
    893 
    894     return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
    895 }
    896 #undef MVC
    897 #undef PRE
    898 #undef SP
    899 #undef DIST
    900 #undef ERR
    901 #undef CHECK_BETTER
    902 
    903 
    904 int vp8_diamond_search_sad
    905 (
    906     MACROBLOCK *x,
    907     BLOCK *b,
    908     BLOCKD *d,
    909     MV *ref_mv,
    910     MV *best_mv,
    911     int search_param,
    912     int error_per_bit,
    913     int *num00,
    914     vp8_variance_fn_ptr_t *fn_ptr,
    915     int *mvsadcost[2],
    916     int *mvcost[2]
    917 )
    918 {
    919     int i, j, step;
    920 
    921     unsigned char *what = (*(b->base_src) + b->src);
    922     int what_stride = b->src_stride;
    923     unsigned char *in_what;
    924     int in_what_stride = d->pre_stride;
    925     unsigned char *best_address;
    926 
    927     int tot_steps;
    928     MV this_mv;
    929 
    930     int bestsad = INT_MAX;
    931     int best_site = 0;
    932     int last_site = 0;
    933 
    934     int ref_row = ref_mv->row >> 3;
    935     int ref_col = ref_mv->col >> 3;
    936     int this_row_offset;
    937     int this_col_offset;
    938     search_site *ss;
    939 
    940     unsigned char *check_here;
    941     int thissad;
    942 
    943     // Work out the start point for the search
    944     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
    945     best_address = in_what;
    946 
    947     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
    948     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
    949     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    950     {
    951         // Check the starting position
    952         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
    953     }
    954 
    955     // search_param determines the length of the initial step and hence the number of iterations
    956     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
    957     ss = &x->ss[search_param * x->searches_per_step];
    958     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
    959 
    960     i = 1;
    961     best_mv->row = ref_row;
    962     best_mv->col = ref_col;
    963 
    964     *num00 = 0;
    965 
    966     for (step = 0; step < tot_steps ; step++)
    967     {
    968         for (j = 0 ; j < x->searches_per_step ; j++)
    969         {
    970             // Trap illegal vectors
    971             this_row_offset = best_mv->row + ss[i].mv.row;
    972             this_col_offset = best_mv->col + ss[i].mv.col;
    973 
    974             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
    975             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
    976 
    977             {
    978                 check_here = ss[i].offset + best_address;
    979                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
    980 
    981                 if (thissad < bestsad)
    982                 {
    983                     this_mv.row = this_row_offset << 3;
    984                     this_mv.col = this_col_offset << 3;
    985                     thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
    986 
    987                     if (thissad < bestsad)
    988                     {
    989                         bestsad = thissad;
    990                         best_site = i;
    991                     }
    992                 }
    993             }
    994 
    995             i++;
    996         }
    997 
    998         if (best_site != last_site)
    999         {
   1000             best_mv->row += ss[best_site].mv.row;
   1001             best_mv->col += ss[best_site].mv.col;
   1002             best_address += ss[best_site].offset;
   1003             last_site = best_site;
   1004         }
   1005         else if (best_address == in_what)
   1006             (*num00)++;
   1007     }
   1008 
   1009     this_mv.row = best_mv->row << 3;
   1010     this_mv.col = best_mv->col << 3;
   1011 
   1012     if (bestsad == INT_MAX)
   1013         return INT_MAX;
   1014 
   1015     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1016     + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1017 }
   1018 
   1019 int vp8_diamond_search_sadx4
   1020 (
   1021     MACROBLOCK *x,
   1022     BLOCK *b,
   1023     BLOCKD *d,
   1024     MV *ref_mv,
   1025     MV *best_mv,
   1026     int search_param,
   1027     int error_per_bit,
   1028     int *num00,
   1029     vp8_variance_fn_ptr_t *fn_ptr,
   1030     int *mvsadcost[2],
   1031     int *mvcost[2]
   1032 )
   1033 {
   1034     int i, j, step;
   1035 
   1036     unsigned char *what = (*(b->base_src) + b->src);
   1037     int what_stride = b->src_stride;
   1038     unsigned char *in_what;
   1039     int in_what_stride = d->pre_stride;
   1040     unsigned char *best_address;
   1041 
   1042     int tot_steps;
   1043     MV this_mv;
   1044 
   1045     int bestsad = INT_MAX;
   1046     int best_site = 0;
   1047     int last_site = 0;
   1048 
   1049     int ref_row = ref_mv->row >> 3;
   1050     int ref_col = ref_mv->col >> 3;
   1051     int this_row_offset;
   1052     int this_col_offset;
   1053     search_site *ss;
   1054 
   1055     unsigned char *check_here;
   1056     unsigned int thissad;
   1057 
   1058     // Work out the start point for the search
   1059     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
   1060     best_address = in_what;
   1061 
   1062     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1063     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1064     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1065     {
   1066         // Check the starting position
   1067         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1068     }
   1069 
   1070     // search_param determines the length of the initial step and hence the number of iterations
   1071     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1072     ss = &x->ss[search_param * x->searches_per_step];
   1073     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1074 
   1075     i = 1;
   1076     best_mv->row = ref_row;
   1077     best_mv->col = ref_col;
   1078 
   1079     *num00 = 0;
   1080 
   1081     for (step = 0; step < tot_steps ; step++)
   1082     {
   1083         int all_in = 1, t;
   1084 
   1085         // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
   1086         // checking 4 bounds for each points.
   1087         all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
   1088         all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
   1089         all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
   1090         all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
   1091 
   1092         if (all_in)
   1093         {
   1094             unsigned int sad_array[4];
   1095 
   1096             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1097             {
   1098                 unsigned char *block_offset[4];
   1099 
   1100                 for (t = 0; t < 4; t++)
   1101                     block_offset[t] = ss[i+t].offset + best_address;
   1102 
   1103                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1104 
   1105                 for (t = 0; t < 4; t++, i++)
   1106                 {
   1107                     if (sad_array[t] < bestsad)
   1108                     {
   1109                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
   1110                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
   1111                         sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1112 
   1113                         if (sad_array[t] < bestsad)
   1114                         {
   1115                             bestsad = sad_array[t];
   1116                             best_site = i;
   1117                         }
   1118                     }
   1119                 }
   1120             }
   1121         }
   1122         else
   1123         {
   1124             for (j = 0 ; j < x->searches_per_step ; j++)
   1125             {
   1126                 // Trap illegal vectors
   1127                 this_row_offset = best_mv->row + ss[i].mv.row;
   1128                 this_col_offset = best_mv->col + ss[i].mv.col;
   1129 
   1130                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1131                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1132                 {
   1133                     check_here = ss[i].offset + best_address;
   1134                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1135 
   1136                     if (thissad < bestsad)
   1137                     {
   1138                         this_mv.row = this_row_offset << 3;
   1139                         this_mv.col = this_col_offset << 3;
   1140                         thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1141 
   1142                         if (thissad < bestsad)
   1143                         {
   1144                             bestsad = thissad;
   1145                             best_site = i;
   1146                         }
   1147                     }
   1148                 }
   1149                 i++;
   1150             }
   1151         }
   1152 
   1153         if (best_site != last_site)
   1154         {
   1155             best_mv->row += ss[best_site].mv.row;
   1156             best_mv->col += ss[best_site].mv.col;
   1157             best_address += ss[best_site].offset;
   1158             last_site = best_site;
   1159         }
   1160         else if (best_address == in_what)
   1161             (*num00)++;
   1162     }
   1163 
   1164     this_mv.row = best_mv->row << 3;
   1165     this_mv.col = best_mv->col << 3;
   1166 
   1167     if (bestsad == INT_MAX)
   1168         return INT_MAX;
   1169 
   1170     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1171     + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1172 }
   1173 
   1174 
   1175 #if !(CONFIG_REALTIME_ONLY)
   1176 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
   1177 {
   1178     unsigned char *what = (*(b->base_src) + b->src);
   1179     int what_stride = b->src_stride;
   1180     unsigned char *in_what;
   1181     int in_what_stride = d->pre_stride;
   1182     int mv_stride = d->pre_stride;
   1183     unsigned char *bestaddress;
   1184     MV *best_mv = &d->bmi.mv.as_mv;
   1185     MV this_mv;
   1186     int bestsad = INT_MAX;
   1187     int r, c;
   1188 
   1189     unsigned char *check_here;
   1190     int thissad;
   1191 
   1192     int ref_row = ref_mv->row >> 3;
   1193     int ref_col = ref_mv->col >> 3;
   1194 
   1195     int row_min = ref_row - distance;
   1196     int row_max = ref_row + distance;
   1197     int col_min = ref_col - distance;
   1198     int col_max = ref_col + distance;
   1199 
   1200     // Work out the mid point for the search
   1201     in_what = *(d->base_pre) + d->pre;
   1202     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1203 
   1204     best_mv->row = ref_row;
   1205     best_mv->col = ref_col;
   1206 
   1207     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1208     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1209     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1210     {
   1211         // Baseline value at the centre
   1212 
   1213         //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
   1214         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1215     }
   1216 
   1217     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1218     if (col_min < x->mv_col_min)
   1219         col_min = x->mv_col_min;
   1220 
   1221     if (col_max > x->mv_col_max)
   1222         col_max = x->mv_col_max;
   1223 
   1224     if (row_min < x->mv_row_min)
   1225         row_min = x->mv_row_min;
   1226 
   1227     if (row_max > x->mv_row_max)
   1228         row_max = x->mv_row_max;
   1229 
   1230     for (r = row_min; r < row_max ; r++)
   1231     {
   1232         this_mv.row = r << 3;
   1233         check_here = r * mv_stride + in_what + col_min;
   1234 
   1235         for (c = col_min; c < col_max; c++)
   1236         {
   1237             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1238 
   1239             this_mv.col = c << 3;
   1240             //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
   1241             //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
   1242             thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
   1243 
   1244             if (thissad < bestsad)
   1245             {
   1246                 bestsad = thissad;
   1247                 best_mv->row = r;
   1248                 best_mv->col = c;
   1249                 bestaddress = check_here;
   1250             }
   1251 
   1252             check_here++;
   1253         }
   1254     }
   1255 
   1256     this_mv.row = best_mv->row << 3;
   1257     this_mv.col = best_mv->col << 3;
   1258 
   1259     if (bestsad < INT_MAX)
   1260         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1261         + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1262     else
   1263         return INT_MAX;
   1264 }
   1265 
   1266 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
   1267 {
   1268     unsigned char *what = (*(b->base_src) + b->src);
   1269     int what_stride = b->src_stride;
   1270     unsigned char *in_what;
   1271     int in_what_stride = d->pre_stride;
   1272     int mv_stride = d->pre_stride;
   1273     unsigned char *bestaddress;
   1274     MV *best_mv = &d->bmi.mv.as_mv;
   1275     MV this_mv;
   1276     int bestsad = INT_MAX;
   1277     int r, c;
   1278 
   1279     unsigned char *check_here;
   1280     unsigned int thissad;
   1281 
   1282     int ref_row = ref_mv->row >> 3;
   1283     int ref_col = ref_mv->col >> 3;
   1284 
   1285     int row_min = ref_row - distance;
   1286     int row_max = ref_row + distance;
   1287     int col_min = ref_col - distance;
   1288     int col_max = ref_col + distance;
   1289 
   1290     unsigned int sad_array[3];
   1291 
   1292     // Work out the mid point for the search
   1293     in_what = *(d->base_pre) + d->pre;
   1294     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1295 
   1296     best_mv->row = ref_row;
   1297     best_mv->col = ref_col;
   1298 
   1299     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1300     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1301     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1302     {
   1303         // Baseline value at the centre
   1304         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1305     }
   1306 
   1307     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1308     if (col_min < x->mv_col_min)
   1309         col_min = x->mv_col_min;
   1310 
   1311     if (col_max > x->mv_col_max)
   1312         col_max = x->mv_col_max;
   1313 
   1314     if (row_min < x->mv_row_min)
   1315         row_min = x->mv_row_min;
   1316 
   1317     if (row_max > x->mv_row_max)
   1318         row_max = x->mv_row_max;
   1319 
   1320     for (r = row_min; r < row_max ; r++)
   1321     {
   1322         this_mv.row = r << 3;
   1323         check_here = r * mv_stride + in_what + col_min;
   1324         c = col_min;
   1325 
   1326         while ((c + 2) < col_max)
   1327         {
   1328             int i;
   1329 
   1330             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1331 
   1332             for (i = 0; i < 3; i++)
   1333             {
   1334                 thissad = sad_array[i];
   1335 
   1336                 if (thissad < bestsad)
   1337                 {
   1338                     this_mv.col = c << 3;
   1339                     thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1340 
   1341                     if (thissad < bestsad)
   1342                     {
   1343                         bestsad = thissad;
   1344                         best_mv->row = r;
   1345                         best_mv->col = c;
   1346                         bestaddress = check_here;
   1347                     }
   1348                 }
   1349 
   1350                 check_here++;
   1351                 c++;
   1352             }
   1353         }
   1354 
   1355         while (c < col_max)
   1356         {
   1357             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1358 
   1359             if (thissad < bestsad)
   1360             {
   1361                 this_mv.col = c << 3;
   1362                 thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1363 
   1364                 if (thissad < bestsad)
   1365                 {
   1366                     bestsad = thissad;
   1367                     best_mv->row = r;
   1368                     best_mv->col = c;
   1369                     bestaddress = check_here;
   1370                 }
   1371             }
   1372 
   1373             check_here ++;
   1374             c ++;
   1375         }
   1376 
   1377     }
   1378 
   1379     this_mv.row = best_mv->row << 3;
   1380     this_mv.col = best_mv->col << 3;
   1381 
   1382     if (bestsad < INT_MAX)
   1383         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1384         + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1385     else
   1386         return INT_MAX;
   1387 }
   1388 #endif
   1389 
   1390 
   1391 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
   1392 {
   1393     unsigned char *what = (*(b->base_src) + b->src);
   1394     int what_stride = b->src_stride;
   1395     unsigned char *in_what;
   1396     int in_what_stride = d->pre_stride;
   1397     int mv_stride = d->pre_stride;
   1398     unsigned char *bestaddress;
   1399     MV *best_mv = &d->bmi.mv.as_mv;
   1400     MV this_mv;
   1401     int bestsad = INT_MAX;
   1402     int r, c;
   1403 
   1404     unsigned char *check_here;
   1405     unsigned int thissad;
   1406 
   1407     int ref_row = ref_mv->row >> 3;
   1408     int ref_col = ref_mv->col >> 3;
   1409 
   1410     int row_min = ref_row - distance;
   1411     int row_max = ref_row + distance;
   1412     int col_min = ref_col - distance;
   1413     int col_max = ref_col + distance;
   1414 
   1415     unsigned short sad_array8[8];
   1416     unsigned int sad_array[3];
   1417 
   1418     // Work out the mid point for the search
   1419     in_what = *(d->base_pre) + d->pre;
   1420     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1421 
   1422     best_mv->row = ref_row;
   1423     best_mv->col = ref_col;
   1424 
   1425     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1426     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1427     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1428     {
   1429         // Baseline value at the centre
   1430         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1431     }
   1432 
   1433     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1434     if (col_min < x->mv_col_min)
   1435         col_min = x->mv_col_min;
   1436 
   1437     if (col_max > x->mv_col_max)
   1438         col_max = x->mv_col_max;
   1439 
   1440     if (row_min < x->mv_row_min)
   1441         row_min = x->mv_row_min;
   1442 
   1443     if (row_max > x->mv_row_max)
   1444         row_max = x->mv_row_max;
   1445 
   1446     for (r = row_min; r < row_max ; r++)
   1447     {
   1448         this_mv.row = r << 3;
   1449         check_here = r * mv_stride + in_what + col_min;
   1450         c = col_min;
   1451 
   1452         while ((c + 7) < col_max)
   1453         {
   1454             int i;
   1455 
   1456             fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
   1457 
   1458             for (i = 0; i < 8; i++)
   1459             {
   1460                 thissad = (unsigned int)sad_array8[i];
   1461 
   1462                 if (thissad < bestsad)
   1463                 {
   1464                     this_mv.col = c << 3;
   1465                     thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1466 
   1467                     if (thissad < bestsad)
   1468                     {
   1469                         bestsad = thissad;
   1470                         best_mv->row = r;
   1471                         best_mv->col = c;
   1472                         bestaddress = check_here;
   1473                     }
   1474                 }
   1475 
   1476                 check_here++;
   1477                 c++;
   1478             }
   1479         }
   1480 
   1481         while ((c + 2) < col_max)
   1482         {
   1483             int i;
   1484 
   1485             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1486 
   1487             for (i = 0; i < 3; i++)
   1488             {
   1489                 thissad = sad_array[i];
   1490 
   1491                 if (thissad < bestsad)
   1492                 {
   1493                     this_mv.col = c << 3;
   1494                     thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1495 
   1496                     if (thissad < bestsad)
   1497                     {
   1498                         bestsad = thissad;
   1499                         best_mv->row = r;
   1500                         best_mv->col = c;
   1501                         bestaddress = check_here;
   1502                     }
   1503                 }
   1504 
   1505                 check_here++;
   1506                 c++;
   1507             }
   1508         }
   1509 
   1510         while (c < col_max)
   1511         {
   1512             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1513 
   1514             if (thissad < bestsad)
   1515             {
   1516                 this_mv.col = c << 3;
   1517                 thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1518 
   1519                 if (thissad < bestsad)
   1520                 {
   1521                     bestsad = thissad;
   1522                     best_mv->row = r;
   1523                     best_mv->col = c;
   1524                     bestaddress = check_here;
   1525                 }
   1526             }
   1527 
   1528             check_here ++;
   1529             c ++;
   1530         }
   1531     }
   1532 
   1533     this_mv.row = best_mv->row << 3;
   1534     this_mv.col = best_mv->col << 3;
   1535 
   1536     if (bestsad < INT_MAX)
   1537         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1538         + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1539     else
   1540         return INT_MAX;
   1541 }
   1542 
   1543 #ifdef ENTROPY_STATS
   1544 void print_mode_context(void)
   1545 {
   1546     FILE *f = fopen("modecont.c", "w");
   1547     int i, j;
   1548 
   1549     fprintf(f, "#include \"entropy.h\"\n");
   1550     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1551     fprintf(f, "{\n");
   1552 
   1553     for (j = 0; j < 6; j++)
   1554     {
   1555         fprintf(f, "  { // %d \n", j);
   1556         fprintf(f, "    ");
   1557 
   1558         for (i = 0; i < 4; i++)
   1559         {
   1560             int overal_prob;
   1561             int this_prob;
   1562             int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
   1563 
   1564             // Overall probs
   1565             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1566 
   1567             if (count)
   1568                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1569             else
   1570                 overal_prob = 128;
   1571 
   1572             if (overal_prob == 0)
   1573                 overal_prob = 1;
   1574 
   1575             // context probs
   1576             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1577 
   1578             if (count)
   1579                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1580             else
   1581                 this_prob = 128;
   1582 
   1583             if (this_prob == 0)
   1584                 this_prob = 1;
   1585 
   1586             fprintf(f, "%5d, ", this_prob);
   1587             //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
   1588             //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
   1589         }
   1590 
   1591         fprintf(f, "  },\n");
   1592     }
   1593 
   1594     fprintf(f, "};\n");
   1595     fclose(f);
   1596 }
   1597 
   1598 /* MV ref count ENTROPY_STATS stats code */
   1599 #ifdef ENTROPY_STATS
   1600 void init_mv_ref_counts()
   1601 {
   1602     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1603     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1604 }
   1605 
   1606 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1607 {
   1608     if (m == ZEROMV)
   1609     {
   1610         ++mv_ref_ct [ct[0]] [0] [0];
   1611         ++mv_mode_cts[0][0];
   1612     }
   1613     else
   1614     {
   1615         ++mv_ref_ct [ct[0]] [0] [1];
   1616         ++mv_mode_cts[0][1];
   1617 
   1618         if (m == NEARESTMV)
   1619         {
   1620             ++mv_ref_ct [ct[1]] [1] [0];
   1621             ++mv_mode_cts[1][0];
   1622         }
   1623         else
   1624         {
   1625             ++mv_ref_ct [ct[1]] [1] [1];
   1626             ++mv_mode_cts[1][1];
   1627 
   1628             if (m == NEARMV)
   1629             {
   1630                 ++mv_ref_ct [ct[2]] [2] [0];
   1631                 ++mv_mode_cts[2][0];
   1632             }
   1633             else
   1634             {
   1635                 ++mv_ref_ct [ct[2]] [2] [1];
   1636                 ++mv_mode_cts[2][1];
   1637 
   1638                 if (m == NEWMV)
   1639                 {
   1640                     ++mv_ref_ct [ct[3]] [3] [0];
   1641                     ++mv_mode_cts[3][0];
   1642                 }
   1643                 else
   1644                 {
   1645                     ++mv_ref_ct [ct[3]] [3] [1];
   1646                     ++mv_mode_cts[3][1];
   1647                 }
   1648             }
   1649         }
   1650     }
   1651 }
   1652 
   1653 #endif/* END MV ref count ENTROPY_STATS stats code */
   1654 
   1655 #endif
   1656