Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "mcomp.h"
     13 #include "vpx_mem/vpx_mem.h"
     14 
     15 #include <stdio.h>
     16 #include <limits.h>
     17 #include <math.h>
     18 
     19 #ifdef ENTROPY_STATS
     20 static int mv_ref_ct [31] [4] [2];
     21 static int mv_mode_cts [4] [2];
     22 #endif
     23 
     24 static int mv_bits_sadcost[256];
     25 
     26 void vp8cx_init_mv_bits_sadcost()
     27 {
     28     int i;
     29 
     30     for (i = 0; i < 256; i++)
     31     {
     32         mv_bits_sadcost[i] = (int)sqrt(i * 16);
     33     }
     34 }
     35 
     36 
     37 int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
     38 {
     39     // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
     40     // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
     41     // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
     42     // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
     43     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
     44 }
     45 
     46 static int mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
     47 {
     48     //int i;
     49     //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
     50     //return ( (vp8_mv_bit_cost(mv,  ref, mvcost, 100) + 128) * error_per_bit) >> 8;
     51 
     52     //i = (vp8_mv_bit_cost(mv,  ref, mvcost, 100) * error_per_bit + 128) >> 8;
     53     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
     54     //return (vp8_mv_bit_cost(mv,  ref, mvcost, 128) * error_per_bit + 128) >> 8;
     55 }
     56 
     57 
     58 static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
     59 {
     60     // get the estimated number of bits for a motion vector, to be used for costing in SAD based
     61     // motion estimation
     62     return ((mvcost[0][(mv->row - ref->row) >> 1]  +  mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
     63 }
     64 
     65 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     66 {
     67     int Len;
     68     int search_site_count = 0;
     69 
     70 
     71     // Generate offsets for 4 search sites per step.
     72     Len = MAX_FIRST_STEP;
     73     x->ss[search_site_count].mv.col = 0;
     74     x->ss[search_site_count].mv.row = 0;
     75     x->ss[search_site_count].offset = 0;
     76     search_site_count++;
     77 
     78     while (Len > 0)
     79     {
     80 
     81         // Compute offsets for search sites.
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = -Len;
     84         x->ss[search_site_count].offset = -Len * stride;
     85         search_site_count++;
     86 
     87         // Compute offsets for search sites.
     88         x->ss[search_site_count].mv.col = 0;
     89         x->ss[search_site_count].mv.row = Len;
     90         x->ss[search_site_count].offset = Len * stride;
     91         search_site_count++;
     92 
     93         // Compute offsets for search sites.
     94         x->ss[search_site_count].mv.col = -Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = -Len;
     97         search_site_count++;
     98 
     99         // Compute offsets for search sites.
    100         x->ss[search_site_count].mv.col = Len;
    101         x->ss[search_site_count].mv.row = 0;
    102         x->ss[search_site_count].offset = Len;
    103         search_site_count++;
    104 
    105         // Contract.
    106         Len /= 2;
    107     }
    108 
    109     x->ss_count = search_site_count;
    110     x->searches_per_step = 4;
    111 }
    112 
    113 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    114 {
    115     int Len;
    116     int search_site_count = 0;
    117 
    118     // Generate offsets for 8 search sites per step.
    119     Len = MAX_FIRST_STEP;
    120     x->ss[search_site_count].mv.col = 0;
    121     x->ss[search_site_count].mv.row = 0;
    122     x->ss[search_site_count].offset = 0;
    123     search_site_count++;
    124 
    125     while (Len > 0)
    126     {
    127 
    128         // Compute offsets for search sites.
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = -Len;
    131         x->ss[search_site_count].offset = -Len * stride;
    132         search_site_count++;
    133 
    134         // Compute offsets for search sites.
    135         x->ss[search_site_count].mv.col = 0;
    136         x->ss[search_site_count].mv.row = Len;
    137         x->ss[search_site_count].offset = Len * stride;
    138         search_site_count++;
    139 
    140         // Compute offsets for search sites.
    141         x->ss[search_site_count].mv.col = -Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = -Len;
    144         search_site_count++;
    145 
    146         // Compute offsets for search sites.
    147         x->ss[search_site_count].mv.col = Len;
    148         x->ss[search_site_count].mv.row = 0;
    149         x->ss[search_site_count].offset = Len;
    150         search_site_count++;
    151 
    152         // Compute offsets for search sites.
    153         x->ss[search_site_count].mv.col = -Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride - Len;
    156         search_site_count++;
    157 
    158         // Compute offsets for search sites.
    159         x->ss[search_site_count].mv.col = Len;
    160         x->ss[search_site_count].mv.row = -Len;
    161         x->ss[search_site_count].offset = -Len * stride + Len;
    162         search_site_count++;
    163 
    164         // Compute offsets for search sites.
    165         x->ss[search_site_count].mv.col = -Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride - Len;
    168         search_site_count++;
    169 
    170         // Compute offsets for search sites.
    171         x->ss[search_site_count].mv.col = Len;
    172         x->ss[search_site_count].mv.row = Len;
    173         x->ss[search_site_count].offset = Len * stride + Len;
    174         search_site_count++;
    175 
    176 
    177         // Contract.
    178         Len /= 2;
    179     }
    180 
    181     x->ss_count = search_site_count;
    182     x->searches_per_step = 8;
    183 }
    184 
    185 
    186 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    187 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
    188 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
    189 #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
    190 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    191 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
    192 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
    193 #define MIN(x,y) (((x)<(y))?(x):(y))
    194 #define MAX(x,y) (((x)>(y))?(x):(y))
    195 
    196 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
    197 
    198 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    199 {
    200     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    201     unsigned char *z = (*(b->base_src) + b->src);
    202 
    203     int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
    204     int br = bestmv->row << 2, bc = bestmv->col << 2;
    205     int tr = br, tc = bc;
    206     unsigned int besterr = INT_MAX;
    207     unsigned int left, right, up, down, diag;
    208     unsigned int sse;
    209     unsigned int whichdir;
    210     unsigned int halfiters = 4;
    211     unsigned int quarteriters = 4;
    212 
    213     int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
    214     int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
    215     int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
    216     int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
    217 
    218     // central mv
    219     bestmv->row <<= 3;
    220     bestmv->col <<= 3;
    221 
    222     // calculate central point error
    223     besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    224     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    225 
    226     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    227     while (--halfiters)
    228     {
    229         // 1/2 pel
    230         CHECK_BETTER(left, tr, tc - 2);
    231         CHECK_BETTER(right, tr, tc + 2);
    232         CHECK_BETTER(up, tr - 2, tc);
    233         CHECK_BETTER(down, tr + 2, tc);
    234 
    235         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    236 
    237         switch (whichdir)
    238         {
    239         case 0:
    240             CHECK_BETTER(diag, tr - 2, tc - 2);
    241             break;
    242         case 1:
    243             CHECK_BETTER(diag, tr - 2, tc + 2);
    244             break;
    245         case 2:
    246             CHECK_BETTER(diag, tr + 2, tc - 2);
    247             break;
    248         case 3:
    249             CHECK_BETTER(diag, tr + 2, tc + 2);
    250             break;
    251         }
    252 
    253         // no reason to check the same one again.
    254         if (tr == br && tc == bc)
    255             break;
    256 
    257         tr = br;
    258         tc = bc;
    259     }
    260 
    261     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    262     // 1/4 pel
    263     while (--quarteriters)
    264     {
    265         CHECK_BETTER(left, tr, tc - 1);
    266         CHECK_BETTER(right, tr, tc + 1);
    267         CHECK_BETTER(up, tr - 1, tc);
    268         CHECK_BETTER(down, tr + 1, tc);
    269 
    270         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    271 
    272         switch (whichdir)
    273         {
    274         case 0:
    275             CHECK_BETTER(diag, tr - 1, tc - 1);
    276             break;
    277         case 1:
    278             CHECK_BETTER(diag, tr - 1, tc + 1);
    279             break;
    280         case 2:
    281             CHECK_BETTER(diag, tr + 1, tc - 1);
    282             break;
    283         case 3:
    284             CHECK_BETTER(diag, tr + 1, tc + 1);
    285             break;
    286         }
    287 
    288         // no reason to check the same one again.
    289         if (tr == br && tc == bc)
    290             break;
    291 
    292         tr = br;
    293         tc = bc;
    294     }
    295 
    296     bestmv->row = br << 1;
    297     bestmv->col = bc << 1;
    298 
    299     if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
    300         return INT_MAX;
    301 
    302     return besterr;
    303 }
    304 #undef MVC
    305 #undef PRE
    306 #undef SP
    307 #undef DIST
    308 #undef ERR
    309 #undef CHECK_BETTER
    310 #undef MIN
    311 #undef MAX
    312 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    313 {
    314     int bestmse = INT_MAX;
    315     MV startmv;
    316     //MV this_mv;
    317     MV this_mv;
    318     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    319     unsigned char *z = (*(b->base_src) + b->src);
    320     int left, right, up, down, diag;
    321     unsigned int sse;
    322     int whichdir ;
    323 
    324 
    325     // Trap uncodable vectors
    326     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    327     {
    328         bestmv->row <<= 3;
    329         bestmv->col <<= 3;
    330         return INT_MAX;
    331     }
    332 
    333     // central mv
    334     bestmv->row <<= 3;
    335     bestmv->col <<= 3;
    336     startmv = *bestmv;
    337 
    338     // calculate central point error
    339     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    340     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    341 
    342     // go left then right and check error
    343     this_mv.row = startmv.row;
    344     this_mv.col = ((startmv.col - 8) | 4);
    345     left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
    346     left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    347 
    348     if (left < bestmse)
    349     {
    350         *bestmv = this_mv;
    351         bestmse = left;
    352     }
    353 
    354     this_mv.col += 8;
    355     right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
    356     right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    357 
    358     if (right < bestmse)
    359     {
    360         *bestmv = this_mv;
    361         bestmse = right;
    362     }
    363 
    364     // go up then down and check error
    365     this_mv.col = startmv.col;
    366     this_mv.row = ((startmv.row - 8) | 4);
    367     up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    368     up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    369 
    370     if (up < bestmse)
    371     {
    372         *bestmv = this_mv;
    373         bestmse = up;
    374     }
    375 
    376     this_mv.row += 8;
    377     down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
    378     down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    379 
    380     if (down < bestmse)
    381     {
    382         *bestmv = this_mv;
    383         bestmse = down;
    384     }
    385 
    386 
    387     // now check 1 more diagonal
    388     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    389     //for(whichdir =0;whichdir<4;whichdir++)
    390     //{
    391     this_mv = startmv;
    392 
    393     switch (whichdir)
    394     {
    395     case 0:
    396         this_mv.col = (this_mv.col - 8) | 4;
    397         this_mv.row = (this_mv.row - 8) | 4;
    398         diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    399         break;
    400     case 1:
    401         this_mv.col += 4;
    402         this_mv.row = (this_mv.row - 8) | 4;
    403         diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    404         break;
    405     case 2:
    406         this_mv.col = (this_mv.col - 8) | 4;
    407         this_mv.row += 4;
    408         diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
    409         break;
    410     case 3:
    411     default:
    412         this_mv.col += 4;
    413         this_mv.row += 4;
    414         diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
    415         break;
    416     }
    417 
    418     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    419 
    420     if (diag < bestmse)
    421     {
    422         *bestmv = this_mv;
    423         bestmse = diag;
    424     }
    425 
    426 //  }
    427 
    428 
    429     // time to check quarter pels.
    430     if (bestmv->row < startmv.row)
    431         y -= d->pre_stride;
    432 
    433     if (bestmv->col < startmv.col)
    434         y--;
    435 
    436     startmv = *bestmv;
    437 
    438 
    439 
    440     // go left then right and check error
    441     this_mv.row = startmv.row;
    442 
    443     if (startmv.col & 7)
    444     {
    445         this_mv.col = startmv.col - 2;
    446         left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    447     }
    448     else
    449     {
    450         this_mv.col = (startmv.col - 8) | 6;
    451         left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
    452     }
    453 
    454     left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    455 
    456     if (left < bestmse)
    457     {
    458         *bestmv = this_mv;
    459         bestmse = left;
    460     }
    461 
    462     this_mv.col += 4;
    463     right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    464     right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    465 
    466     if (right < bestmse)
    467     {
    468         *bestmv = this_mv;
    469         bestmse = right;
    470     }
    471 
    472     // go up then down and check error
    473     this_mv.col = startmv.col;
    474 
    475     if (startmv.row & 7)
    476     {
    477         this_mv.row = startmv.row - 2;
    478         up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    479     }
    480     else
    481     {
    482         this_mv.row = (startmv.row - 8) | 6;
    483         up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    484     }
    485 
    486     up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    487 
    488     if (up < bestmse)
    489     {
    490         *bestmv = this_mv;
    491         bestmse = up;
    492     }
    493 
    494     this_mv.row += 4;
    495     down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    496     down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    497 
    498     if (down < bestmse)
    499     {
    500         *bestmv = this_mv;
    501         bestmse = down;
    502     }
    503 
    504 
    505     // now check 1 more diagonal
    506     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    507 
    508 //  for(whichdir=0;whichdir<4;whichdir++)
    509 //  {
    510     this_mv = startmv;
    511 
    512     switch (whichdir)
    513     {
    514     case 0:
    515 
    516         if (startmv.row & 7)
    517         {
    518             this_mv.row -= 2;
    519 
    520             if (startmv.col & 7)
    521             {
    522                 this_mv.col -= 2;
    523                 diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    524             }
    525             else
    526             {
    527                 this_mv.col = (startmv.col - 8) | 6;
    528                 diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    529             }
    530         }
    531         else
    532         {
    533             this_mv.row = (startmv.row - 8) | 6;
    534 
    535             if (startmv.col & 7)
    536             {
    537                 this_mv.col -= 2;
    538                 diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    539             }
    540             else
    541             {
    542                 this_mv.col = (startmv.col - 8) | 6;
    543                 diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
    544             }
    545         }
    546 
    547         break;
    548     case 1:
    549         this_mv.col += 2;
    550 
    551         if (startmv.row & 7)
    552         {
    553             this_mv.row -= 2;
    554             diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    555         }
    556         else
    557         {
    558             this_mv.row = (startmv.row - 8) | 6;
    559             diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    560         }
    561 
    562         break;
    563     case 2:
    564         this_mv.row += 2;
    565 
    566         if (startmv.col & 7)
    567         {
    568             this_mv.col -= 2;
    569             diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    570         }
    571         else
    572         {
    573             this_mv.col = (startmv.col - 8) | 6;
    574             diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    575         }
    576 
    577         break;
    578     case 3:
    579         this_mv.col += 2;
    580         this_mv.row += 2;
    581         diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    582         break;
    583     }
    584 
    585     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    586 
    587     if (diag < bestmse)
    588     {
    589         *bestmv = this_mv;
    590         bestmse = diag;
    591     }
    592 
    593 //  }
    594 
    595     return bestmse;
    596 }
    597 
    598 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
    599 {
    600     int bestmse = INT_MAX;
    601     MV startmv;
    602     //MV this_mv;
    603     MV this_mv;
    604     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    605     unsigned char *z = (*(b->base_src) + b->src);
    606     int left, right, up, down, diag;
    607     unsigned int sse;
    608 
    609     // Trap uncodable vectors
    610     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    611     {
    612         bestmv->row <<= 3;
    613         bestmv->col <<= 3;
    614         return INT_MAX;
    615     }
    616 
    617     // central mv
    618     bestmv->row <<= 3;
    619     bestmv->col <<= 3;
    620     startmv = *bestmv;
    621 
    622     // calculate central point error
    623     bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
    624     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    625 
    626     // go left then right and check error
    627     this_mv.row = startmv.row;
    628     this_mv.col = ((startmv.col - 8) | 4);
    629     left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
    630     left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    631 
    632     if (left < bestmse)
    633     {
    634         *bestmv = this_mv;
    635         bestmse = left;
    636     }
    637 
    638     this_mv.col += 8;
    639     right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
    640     right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    641 
    642     if (right < bestmse)
    643     {
    644         *bestmv = this_mv;
    645         bestmse = right;
    646     }
    647 
    648     // go up then down and check error
    649     this_mv.col = startmv.col;
    650     this_mv.row = ((startmv.row - 8) | 4);
    651     up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    652     up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    653 
    654     if (up < bestmse)
    655     {
    656         *bestmv = this_mv;
    657         bestmse = up;
    658     }
    659 
    660     this_mv.row += 8;
    661     down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
    662     down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    663 
    664     if (down < bestmse)
    665     {
    666         *bestmv = this_mv;
    667         bestmse = down;
    668     }
    669 
    670     // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
    671 #if 0
    672     // now check 1 more diagonal -
    673     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    674     this_mv = startmv;
    675 
    676     switch (whichdir)
    677     {
    678     case 0:
    679         this_mv.col = (this_mv.col - 8) | 4;
    680         this_mv.row = (this_mv.row - 8) | 4;
    681         diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    682         break;
    683     case 1:
    684         this_mv.col += 4;
    685         this_mv.row = (this_mv.row - 8) | 4;
    686         diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    687         break;
    688     case 2:
    689         this_mv.col = (this_mv.col - 8) | 4;
    690         this_mv.row += 4;
    691         diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    692         break;
    693     case 3:
    694         this_mv.col += 4;
    695         this_mv.row += 4;
    696         diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    697         break;
    698     }
    699 
    700     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    701 
    702     if (diag < bestmse)
    703     {
    704         *bestmv = this_mv;
    705         bestmse = diag;
    706     }
    707 
    708 #else
    709     this_mv.col = (this_mv.col - 8) | 4;
    710     this_mv.row = (this_mv.row - 8) | 4;
    711     diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    712     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    713 
    714     if (diag < bestmse)
    715     {
    716         *bestmv = this_mv;
    717         bestmse = diag;
    718     }
    719 
    720     this_mv.col += 8;
    721     diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    722     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    723 
    724     if (diag < bestmse)
    725     {
    726         *bestmv = this_mv;
    727         bestmse = diag;
    728     }
    729 
    730     this_mv.col = (this_mv.col - 8) | 4;
    731     this_mv.row = startmv.row + 4;
    732     diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
    733     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    734 
    735     if (diag < bestmse)
    736     {
    737         *bestmv = this_mv;
    738         bestmse = diag;
    739     }
    740 
    741     this_mv.col += 8;
    742     diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
    743     diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    744 
    745     if (diag < bestmse)
    746     {
    747         *bestmv = this_mv;
    748         bestmse = diag;
    749     }
    750 
    751 #endif
    752     return bestmse;
    753 }
    754 
    755 
    756 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    757 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
    758 #define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
    759 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
    760 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
    761 static const MV next_chkpts[6][3] =
    762 {
    763     {{ -2, 0}, { -1, -2}, {1, -2}},
    764     {{ -1, -2}, {1, -2}, {2, 0}},
    765     {{1, -2}, {2, 0}, {1, 2}},
    766     {{2, 0}, {1, 2}, { -1, 2}},
    767     {{1, 2}, { -1, 2}, { -2, 0}},
    768     {{ -1, 2}, { -2, 0}, { -1, -2}}
    769 };
    770 int vp8_hex_search
    771 (
    772     MACROBLOCK *x,
    773     BLOCK *b,
    774     BLOCKD *d,
    775     MV *ref_mv,
    776     MV *best_mv,
    777     int search_param,
    778     int error_per_bit,
    779     int *num00,
    780     const vp8_variance_fn_ptr_t *vfp,
    781     int *mvsadcost[2],
    782     int *mvcost[2],
    783     MV *center_mv
    784 )
    785 {
    786     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    787     MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ;
    788     int i, j;
    789     unsigned char *src = (*(b->base_src) + b->src);
    790     int src_stride = b->src_stride;
    791     int rr = center_mv->row, rc = center_mv->col;
    792     int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc;
    793     unsigned int besterr, thiserr = 0x7fffffff;
    794     int k = -1, tk;
    795 
    796     if (bc < x->mv_col_min) bc = x->mv_col_min;
    797 
    798     if (bc > x->mv_col_max) bc = x->mv_col_max;
    799 
    800     if (br < x->mv_row_min) br = x->mv_row_min;
    801 
    802     if (br > x->mv_row_max) br = x->mv_row_max;
    803 
    804     rr >>= 1;
    805     rc >>= 1;
    806 
    807     besterr = ERR(br, bc, thiserr);
    808 
    809     // hex search
    810     //j=0
    811     tr = br;
    812     tc = bc;
    813 
    814     for (i = 0; i < 6; i++)
    815     {
    816         int nr = tr + hex[i].row, nc = tc + hex[i].col;
    817 
    818         if (nc < x->mv_col_min) continue;
    819 
    820         if (nc > x->mv_col_max) continue;
    821 
    822         if (nr < x->mv_row_min) continue;
    823 
    824         if (nr > x->mv_row_max) continue;
    825 
    826         //CHECK_BETTER(thiserr,nr,nc);
    827         if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    828         {
    829             besterr = thiserr;
    830             br = nr;
    831             bc = nc;
    832             k = i;
    833         }
    834     }
    835 
    836     if (tr == br && tc == bc)
    837         goto cal_neighbors;
    838 
    839     for (j = 1; j < 127; j++)
    840     {
    841         tr = br;
    842         tc = bc;
    843         tk = k;
    844 
    845         for (i = 0; i < 3; i++)
    846         {
    847             int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
    848 
    849             if (nc < x->mv_col_min) continue;
    850 
    851             if (nc > x->mv_col_max) continue;
    852 
    853             if (nr < x->mv_row_min) continue;
    854 
    855             if (nr > x->mv_row_max) continue;
    856 
    857             //CHECK_BETTER(thiserr,nr,nc);
    858             if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    859             {
    860                 besterr = thiserr;
    861                 br = nr;
    862                 bc = nc; //k=(tk+5+i)%6;}
    863                 k = tk + 5 + i;
    864 
    865                 if (k >= 12) k -= 12;
    866                 else if (k >= 6) k -= 6;
    867             }
    868         }
    869 
    870         if (tr == br && tc == bc)
    871             break;
    872     }
    873 
    874     // check 8 1 away neighbors
    875 cal_neighbors:
    876     tr = br;
    877     tc = bc;
    878 
    879     for (i = 0; i < 8; i++)
    880     {
    881         int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
    882 
    883         if (nc < x->mv_col_min) continue;
    884 
    885         if (nc > x->mv_col_max) continue;
    886 
    887         if (nr < x->mv_row_min) continue;
    888 
    889         if (nr > x->mv_row_max) continue;
    890 
    891         CHECK_BETTER(thiserr, nr, nc);
    892     }
    893 
    894     best_mv->row = br;
    895     best_mv->col = bc;
    896 
    897     return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
    898 }
    899 #undef MVC
    900 #undef PRE
    901 #undef SP
    902 #undef DIST
    903 #undef ERR
    904 #undef CHECK_BETTER
    905 
    906 
    907 int vp8_diamond_search_sad
    908 (
    909     MACROBLOCK *x,
    910     BLOCK *b,
    911     BLOCKD *d,
    912     MV *ref_mv,
    913     MV *best_mv,
    914     int search_param,
    915     int error_per_bit,
    916     int *num00,
    917     vp8_variance_fn_ptr_t *fn_ptr,
    918     int *mvsadcost[2],
    919     int *mvcost[2],
    920     MV *center_mv
    921 )
    922 {
    923     int i, j, step;
    924 
    925     unsigned char *what = (*(b->base_src) + b->src);
    926     int what_stride = b->src_stride;
    927     unsigned char *in_what;
    928     int in_what_stride = d->pre_stride;
    929     unsigned char *best_address;
    930 
    931     int tot_steps;
    932     MV this_mv;
    933 
    934     int bestsad = INT_MAX;
    935     int best_site = 0;
    936     int last_site = 0;
    937 
    938     int ref_row = ref_mv->row >> 3;
    939     int ref_col = ref_mv->col >> 3;
    940     int this_row_offset;
    941     int this_col_offset;
    942     search_site *ss;
    943 
    944     unsigned char *check_here;
    945     int thissad;
    946 
    947     *num00 = 0;
    948 
    949     // Work out the start point for the search
    950     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
    951     best_address = in_what;
    952 
    953     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
    954     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
    955     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
    956     {
    957         // Check the starting position
    958         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
    959     }
    960 
    961     // search_param determines the length of the initial step and hence the number of iterations
    962     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
    963     ss = &x->ss[search_param * x->searches_per_step];
    964     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
    965 
    966     i = 1;
    967     best_mv->row = ref_row;
    968     best_mv->col = ref_col;
    969 
    970     for (step = 0; step < tot_steps ; step++)
    971     {
    972         for (j = 0 ; j < x->searches_per_step ; j++)
    973         {
    974             // Trap illegal vectors
    975             this_row_offset = best_mv->row + ss[i].mv.row;
    976             this_col_offset = best_mv->col + ss[i].mv.col;
    977 
    978             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
    979             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
    980 
    981             {
    982                 check_here = ss[i].offset + best_address;
    983                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
    984 
    985                 if (thissad < bestsad)
    986                 {
    987                     this_mv.row = this_row_offset << 3;
    988                     this_mv.col = this_col_offset << 3;
    989                     thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
    990 
    991                     if (thissad < bestsad)
    992                     {
    993                         bestsad = thissad;
    994                         best_site = i;
    995                     }
    996                 }
    997             }
    998 
    999             i++;
   1000         }
   1001 
   1002         if (best_site != last_site)
   1003         {
   1004             best_mv->row += ss[best_site].mv.row;
   1005             best_mv->col += ss[best_site].mv.col;
   1006             best_address += ss[best_site].offset;
   1007             last_site = best_site;
   1008         }
   1009         else if (best_address == in_what)
   1010             (*num00)++;
   1011     }
   1012 
   1013     this_mv.row = best_mv->row << 3;
   1014     this_mv.col = best_mv->col << 3;
   1015 
   1016     if (bestsad == INT_MAX)
   1017         return INT_MAX;
   1018 
   1019     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1020     + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
   1021 }
   1022 
   1023 int vp8_diamond_search_sadx4
   1024 (
   1025     MACROBLOCK *x,
   1026     BLOCK *b,
   1027     BLOCKD *d,
   1028     MV *ref_mv,
   1029     MV *best_mv,
   1030     int search_param,
   1031     int error_per_bit,
   1032     int *num00,
   1033     vp8_variance_fn_ptr_t *fn_ptr,
   1034     int *mvsadcost[2],
   1035     int *mvcost[2],
   1036     MV *center_mv
   1037 )
   1038 {
   1039     int i, j, step;
   1040 
   1041     unsigned char *what = (*(b->base_src) + b->src);
   1042     int what_stride = b->src_stride;
   1043     unsigned char *in_what;
   1044     int in_what_stride = d->pre_stride;
   1045     unsigned char *best_address;
   1046 
   1047     int tot_steps;
   1048     MV this_mv;
   1049 
   1050     int bestsad = INT_MAX;
   1051     int best_site = 0;
   1052     int last_site = 0;
   1053 
   1054     int ref_row = ref_mv->row >> 3;
   1055     int ref_col = ref_mv->col >> 3;
   1056     int this_row_offset;
   1057     int this_col_offset;
   1058     search_site *ss;
   1059 
   1060     unsigned char *check_here;
   1061     unsigned int thissad;
   1062 
   1063     *num00 = 0;
   1064 
   1065     // Work out the start point for the search
   1066     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
   1067     best_address = in_what;
   1068 
   1069     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1070     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1071     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1072     {
   1073         // Check the starting position
   1074         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
   1075     }
   1076 
   1077     // search_param determines the length of the initial step and hence the number of iterations
   1078     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1079     ss = &x->ss[search_param * x->searches_per_step];
   1080     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1081 
   1082     i = 1;
   1083     best_mv->row = ref_row;
   1084     best_mv->col = ref_col;
   1085 
   1086     for (step = 0; step < tot_steps ; step++)
   1087     {
   1088         int all_in = 1, t;
   1089 
   1090         // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
   1091         // checking 4 bounds for each points.
   1092         all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
   1093         all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
   1094         all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
   1095         all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
   1096 
   1097         if (all_in)
   1098         {
   1099             unsigned int sad_array[4];
   1100 
   1101             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1102             {
   1103                 unsigned char *block_offset[4];
   1104 
   1105                 for (t = 0; t < 4; t++)
   1106                     block_offset[t] = ss[i+t].offset + best_address;
   1107 
   1108                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1109 
   1110                 for (t = 0; t < 4; t++, i++)
   1111                 {
   1112                     if (sad_array[t] < bestsad)
   1113                     {
   1114                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
   1115                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
   1116                         sad_array[t] += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1117 
   1118                         if (sad_array[t] < bestsad)
   1119                         {
   1120                             bestsad = sad_array[t];
   1121                             best_site = i;
   1122                         }
   1123                     }
   1124                 }
   1125             }
   1126         }
   1127         else
   1128         {
   1129             for (j = 0 ; j < x->searches_per_step ; j++)
   1130             {
   1131                 // Trap illegal vectors
   1132                 this_row_offset = best_mv->row + ss[i].mv.row;
   1133                 this_col_offset = best_mv->col + ss[i].mv.col;
   1134 
   1135                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1136                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1137                 {
   1138                     check_here = ss[i].offset + best_address;
   1139                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1140 
   1141                     if (thissad < bestsad)
   1142                     {
   1143                         this_mv.row = this_row_offset << 3;
   1144                         this_mv.col = this_col_offset << 3;
   1145                         thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1146 
   1147                         if (thissad < bestsad)
   1148                         {
   1149                             bestsad = thissad;
   1150                             best_site = i;
   1151                         }
   1152                     }
   1153                 }
   1154                 i++;
   1155             }
   1156         }
   1157 
   1158         if (best_site != last_site)
   1159         {
   1160             best_mv->row += ss[best_site].mv.row;
   1161             best_mv->col += ss[best_site].mv.col;
   1162             best_address += ss[best_site].offset;
   1163             last_site = best_site;
   1164         }
   1165         else if (best_address == in_what)
   1166             (*num00)++;
   1167     }
   1168 
   1169     this_mv.row = best_mv->row << 3;
   1170     this_mv.col = best_mv->col << 3;
   1171 
   1172     if (bestsad == INT_MAX)
   1173         return INT_MAX;
   1174 
   1175     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1176     + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
   1177 }
   1178 
   1179 
   1180 #if !(CONFIG_REALTIME_ONLY)
   1181 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
   1182 {
   1183     unsigned char *what = (*(b->base_src) + b->src);
   1184     int what_stride = b->src_stride;
   1185     unsigned char *in_what;
   1186     int in_what_stride = d->pre_stride;
   1187     int mv_stride = d->pre_stride;
   1188     unsigned char *bestaddress;
   1189     MV *best_mv = &d->bmi.mv.as_mv;
   1190     MV this_mv;
   1191     int bestsad = INT_MAX;
   1192     int r, c;
   1193 
   1194     unsigned char *check_here;
   1195     int thissad;
   1196 
   1197     int ref_row = ref_mv->row >> 3;
   1198     int ref_col = ref_mv->col >> 3;
   1199 
   1200     int row_min = ref_row - distance;
   1201     int row_max = ref_row + distance;
   1202     int col_min = ref_col - distance;
   1203     int col_max = ref_col + distance;
   1204 
   1205     // Work out the mid point for the search
   1206     in_what = *(d->base_pre) + d->pre;
   1207     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1208 
   1209     best_mv->row = ref_row;
   1210     best_mv->col = ref_col;
   1211 
   1212     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1213     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1214     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1215     {
   1216         // Baseline value at the centre
   1217 
   1218         //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
   1219         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
   1220     }
   1221 
   1222     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1223     if (col_min < x->mv_col_min)
   1224         col_min = x->mv_col_min;
   1225 
   1226     if (col_max > x->mv_col_max)
   1227         col_max = x->mv_col_max;
   1228 
   1229     if (row_min < x->mv_row_min)
   1230         row_min = x->mv_row_min;
   1231 
   1232     if (row_max > x->mv_row_max)
   1233         row_max = x->mv_row_max;
   1234 
   1235     for (r = row_min; r < row_max ; r++)
   1236     {
   1237         this_mv.row = r << 3;
   1238         check_here = r * mv_stride + in_what + col_min;
   1239 
   1240         for (c = col_min; c < col_max; c++)
   1241         {
   1242             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1243 
   1244             this_mv.col = c << 3;
   1245             //thissad += (int)sqrt(mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
   1246             //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
   1247             thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
   1248 
   1249             if (thissad < bestsad)
   1250             {
   1251                 bestsad = thissad;
   1252                 best_mv->row = r;
   1253                 best_mv->col = c;
   1254                 bestaddress = check_here;
   1255             }
   1256 
   1257             check_here++;
   1258         }
   1259     }
   1260 
   1261     this_mv.row = best_mv->row << 3;
   1262     this_mv.col = best_mv->col << 3;
   1263 
   1264     if (bestsad < INT_MAX)
   1265         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1266         + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
   1267     else
   1268         return INT_MAX;
   1269 }
   1270 
   1271 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
   1272 {
   1273     unsigned char *what = (*(b->base_src) + b->src);
   1274     int what_stride = b->src_stride;
   1275     unsigned char *in_what;
   1276     int in_what_stride = d->pre_stride;
   1277     int mv_stride = d->pre_stride;
   1278     unsigned char *bestaddress;
   1279     MV *best_mv = &d->bmi.mv.as_mv;
   1280     MV this_mv;
   1281     int bestsad = INT_MAX;
   1282     int r, c;
   1283 
   1284     unsigned char *check_here;
   1285     unsigned int thissad;
   1286 
   1287     int ref_row = ref_mv->row >> 3;
   1288     int ref_col = ref_mv->col >> 3;
   1289 
   1290     int row_min = ref_row - distance;
   1291     int row_max = ref_row + distance;
   1292     int col_min = ref_col - distance;
   1293     int col_max = ref_col + distance;
   1294 
   1295     unsigned int sad_array[3];
   1296 
   1297     // Work out the mid point for the search
   1298     in_what = *(d->base_pre) + d->pre;
   1299     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1300 
   1301     best_mv->row = ref_row;
   1302     best_mv->col = ref_col;
   1303 
   1304     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1305     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1306     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1307     {
   1308         // Baseline value at the centre
   1309         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
   1310     }
   1311 
   1312     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1313     if (col_min < x->mv_col_min)
   1314         col_min = x->mv_col_min;
   1315 
   1316     if (col_max > x->mv_col_max)
   1317         col_max = x->mv_col_max;
   1318 
   1319     if (row_min < x->mv_row_min)
   1320         row_min = x->mv_row_min;
   1321 
   1322     if (row_max > x->mv_row_max)
   1323         row_max = x->mv_row_max;
   1324 
   1325     for (r = row_min; r < row_max ; r++)
   1326     {
   1327         this_mv.row = r << 3;
   1328         check_here = r * mv_stride + in_what + col_min;
   1329         c = col_min;
   1330 
   1331         while ((c + 2) < col_max)
   1332         {
   1333             int i;
   1334 
   1335             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1336 
   1337             for (i = 0; i < 3; i++)
   1338             {
   1339                 thissad = sad_array[i];
   1340 
   1341                 if (thissad < bestsad)
   1342                 {
   1343                     this_mv.col = c << 3;
   1344                     thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1345 
   1346                     if (thissad < bestsad)
   1347                     {
   1348                         bestsad = thissad;
   1349                         best_mv->row = r;
   1350                         best_mv->col = c;
   1351                         bestaddress = check_here;
   1352                     }
   1353                 }
   1354 
   1355                 check_here++;
   1356                 c++;
   1357             }
   1358         }
   1359 
   1360         while (c < col_max)
   1361         {
   1362             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1363 
   1364             if (thissad < bestsad)
   1365             {
   1366                 this_mv.col = c << 3;
   1367                 thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1368 
   1369                 if (thissad < bestsad)
   1370                 {
   1371                     bestsad = thissad;
   1372                     best_mv->row = r;
   1373                     best_mv->col = c;
   1374                     bestaddress = check_here;
   1375                 }
   1376             }
   1377 
   1378             check_here ++;
   1379             c ++;
   1380         }
   1381 
   1382     }
   1383 
   1384     this_mv.row = best_mv->row << 3;
   1385     this_mv.col = best_mv->col << 3;
   1386 
   1387     if (bestsad < INT_MAX)
   1388         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1389         + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
   1390     else
   1391         return INT_MAX;
   1392 }
   1393 
   1394 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
   1395 {
   1396     unsigned char *what = (*(b->base_src) + b->src);
   1397     int what_stride = b->src_stride;
   1398     unsigned char *in_what;
   1399     int in_what_stride = d->pre_stride;
   1400     int mv_stride = d->pre_stride;
   1401     unsigned char *bestaddress;
   1402     MV *best_mv = &d->bmi.mv.as_mv;
   1403     MV this_mv;
   1404     int bestsad = INT_MAX;
   1405     int r, c;
   1406 
   1407     unsigned char *check_here;
   1408     unsigned int thissad;
   1409 
   1410     int ref_row = ref_mv->row >> 3;
   1411     int ref_col = ref_mv->col >> 3;
   1412 
   1413     int row_min = ref_row - distance;
   1414     int row_max = ref_row + distance;
   1415     int col_min = ref_col - distance;
   1416     int col_max = ref_col + distance;
   1417 
   1418     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1419     unsigned int sad_array[3];
   1420 
   1421     // Work out the mid point for the search
   1422     in_what = *(d->base_pre) + d->pre;
   1423     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1424 
   1425     best_mv->row = ref_row;
   1426     best_mv->col = ref_col;
   1427 
   1428     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1429     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1430     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1431     {
   1432         // Baseline value at the centre
   1433         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
   1434     }
   1435 
   1436     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1437     if (col_min < x->mv_col_min)
   1438         col_min = x->mv_col_min;
   1439 
   1440     if (col_max > x->mv_col_max)
   1441         col_max = x->mv_col_max;
   1442 
   1443     if (row_min < x->mv_row_min)
   1444         row_min = x->mv_row_min;
   1445 
   1446     if (row_max > x->mv_row_max)
   1447         row_max = x->mv_row_max;
   1448 
   1449     for (r = row_min; r < row_max ; r++)
   1450     {
   1451         this_mv.row = r << 3;
   1452         check_here = r * mv_stride + in_what + col_min;
   1453         c = col_min;
   1454 
   1455         while ((c + 7) < col_max)
   1456         {
   1457             int i;
   1458 
   1459             fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
   1460 
   1461             for (i = 0; i < 8; i++)
   1462             {
   1463                 thissad = (unsigned int)sad_array8[i];
   1464 
   1465                 if (thissad < bestsad)
   1466                 {
   1467                     this_mv.col = c << 3;
   1468                     thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1469 
   1470                     if (thissad < bestsad)
   1471                     {
   1472                         bestsad = thissad;
   1473                         best_mv->row = r;
   1474                         best_mv->col = c;
   1475                         bestaddress = check_here;
   1476                     }
   1477                 }
   1478 
   1479                 check_here++;
   1480                 c++;
   1481             }
   1482         }
   1483 
   1484         while ((c + 2) < col_max)
   1485         {
   1486             int i;
   1487 
   1488             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1489 
   1490             for (i = 0; i < 3; i++)
   1491             {
   1492                 thissad = sad_array[i];
   1493 
   1494                 if (thissad < bestsad)
   1495                 {
   1496                     this_mv.col = c << 3;
   1497                     thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1498 
   1499                     if (thissad < bestsad)
   1500                     {
   1501                         bestsad = thissad;
   1502                         best_mv->row = r;
   1503                         best_mv->col = c;
   1504                         bestaddress = check_here;
   1505                     }
   1506                 }
   1507 
   1508                 check_here++;
   1509                 c++;
   1510             }
   1511         }
   1512 
   1513         while (c < col_max)
   1514         {
   1515             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1516 
   1517             if (thissad < bestsad)
   1518             {
   1519                 this_mv.col = c << 3;
   1520                 thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
   1521 
   1522                 if (thissad < bestsad)
   1523                 {
   1524                     bestsad = thissad;
   1525                     best_mv->row = r;
   1526                     best_mv->col = c;
   1527                     bestaddress = check_here;
   1528                 }
   1529             }
   1530 
   1531             check_here ++;
   1532             c ++;
   1533         }
   1534     }
   1535 
   1536     this_mv.row = best_mv->row << 3;
   1537     this_mv.col = best_mv->col << 3;
   1538 
   1539     if (bestsad < INT_MAX)
   1540         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1541         + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
   1542     else
   1543         return INT_MAX;
   1544 }
   1545 #endif /* !(CONFIG_REALTIME_ONLY) */
   1546 
   1547 #ifdef ENTROPY_STATS
   1548 void print_mode_context(void)
   1549 {
   1550     FILE *f = fopen("modecont.c", "w");
   1551     int i, j;
   1552 
   1553     fprintf(f, "#include \"entropy.h\"\n");
   1554     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1555     fprintf(f, "{\n");
   1556 
   1557     for (j = 0; j < 6; j++)
   1558     {
   1559         fprintf(f, "  { // %d \n", j);
   1560         fprintf(f, "    ");
   1561 
   1562         for (i = 0; i < 4; i++)
   1563         {
   1564             int overal_prob;
   1565             int this_prob;
   1566             int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
   1567 
   1568             // Overall probs
   1569             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1570 
   1571             if (count)
   1572                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1573             else
   1574                 overal_prob = 128;
   1575 
   1576             if (overal_prob == 0)
   1577                 overal_prob = 1;
   1578 
   1579             // context probs
   1580             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1581 
   1582             if (count)
   1583                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1584             else
   1585                 this_prob = 128;
   1586 
   1587             if (this_prob == 0)
   1588                 this_prob = 1;
   1589 
   1590             fprintf(f, "%5d, ", this_prob);
   1591             //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
   1592             //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
   1593         }
   1594 
   1595         fprintf(f, "  },\n");
   1596     }
   1597 
   1598     fprintf(f, "};\n");
   1599     fclose(f);
   1600 }
   1601 
   1602 /* MV ref count ENTROPY_STATS stats code */
   1603 #ifdef ENTROPY_STATS
   1604 void init_mv_ref_counts()
   1605 {
   1606     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1607     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1608 }
   1609 
   1610 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1611 {
   1612     if (m == ZEROMV)
   1613     {
   1614         ++mv_ref_ct [ct[0]] [0] [0];
   1615         ++mv_mode_cts[0][0];
   1616     }
   1617     else
   1618     {
   1619         ++mv_ref_ct [ct[0]] [0] [1];
   1620         ++mv_mode_cts[0][1];
   1621 
   1622         if (m == NEARESTMV)
   1623         {
   1624             ++mv_ref_ct [ct[1]] [1] [0];
   1625             ++mv_mode_cts[1][0];
   1626         }
   1627         else
   1628         {
   1629             ++mv_ref_ct [ct[1]] [1] [1];
   1630             ++mv_mode_cts[1][1];
   1631 
   1632             if (m == NEARMV)
   1633             {
   1634                 ++mv_ref_ct [ct[2]] [2] [0];
   1635                 ++mv_mode_cts[2][0];
   1636             }
   1637             else
   1638             {
   1639                 ++mv_ref_ct [ct[2]] [2] [1];
   1640                 ++mv_mode_cts[2][1];
   1641 
   1642                 if (m == NEWMV)
   1643                 {
   1644                     ++mv_ref_ct [ct[3]] [3] [0];
   1645                     ++mv_mode_cts[3][0];
   1646                 }
   1647                 else
   1648                 {
   1649                     ++mv_ref_ct [ct[3]] [3] [1];
   1650                     ++mv_mode_cts[3][1];
   1651                 }
   1652             }
   1653         }
   1654     }
   1655 }
   1656 
   1657 #endif/* END MV ref count ENTROPY_STATS stats code */
   1658 
   1659 #endif
   1660