Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "./vp8_rtcd.h"
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "onyx_int.h"
     15 #include "mcomp.h"
     16 #include "vpx_mem/vpx_mem.h"
     17 #include "vpx_config.h"
     18 #include <stdio.h>
     19 #include <limits.h>
     20 #include <math.h>
     21 #include "vp8/common/findnearmv.h"
     22 #include "vp8/common/common.h"
     23 #include "vpx_dsp/vpx_dsp_common.h"
     24 
     25 #ifdef VP8_ENTROPY_STATS
     26 static int mv_ref_ct [31] [4] [2];
     27 static int mv_mode_cts [4] [2];
     28 #endif
     29 
     30 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     31 {
     32     /* MV costing is based on the distribution of vectors in the previous
     33      * frame and as such will tend to over state the cost of vectors. In
     34      * addition coding a new vector can have a knock on effect on the cost
     35      * of subsequent vectors and the quality of prediction from NEAR and
     36      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     37      * limited extent, for some account to be taken of these factors.
     38      */
     39     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     40 }
     41 
     42 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     43 {
     44     /* Ignore mv costing if mvcost is NULL */
     45     if (mvcost)
     46         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     47                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     48                  * error_per_bit + 128) >> 8;
     49     return 0;
     50 }
     51 
     52 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     53 {
     54     /* Calculate sad error cost on full pixel basis. */
     55     /* Ignore mv costing if mvsadcost is NULL */
     56     if (mvsadcost)
     57         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     58                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     59                 * error_per_bit + 128) >> 8;
     60     return 0;
     61 }
     62 
     63 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     64 {
     65     int Len;
     66     int search_site_count = 0;
     67 
     68 
     69     /* Generate offsets for 4 search sites per step. */
     70     Len = MAX_FIRST_STEP;
     71     x->ss[search_site_count].mv.col = 0;
     72     x->ss[search_site_count].mv.row = 0;
     73     x->ss[search_site_count].offset = 0;
     74     search_site_count++;
     75 
     76     while (Len > 0)
     77     {
     78 
     79         /* Compute offsets for search sites. */
     80         x->ss[search_site_count].mv.col = 0;
     81         x->ss[search_site_count].mv.row = -Len;
     82         x->ss[search_site_count].offset = -Len * stride;
     83         search_site_count++;
     84 
     85         /* Compute offsets for search sites. */
     86         x->ss[search_site_count].mv.col = 0;
     87         x->ss[search_site_count].mv.row = Len;
     88         x->ss[search_site_count].offset = Len * stride;
     89         search_site_count++;
     90 
     91         /* Compute offsets for search sites. */
     92         x->ss[search_site_count].mv.col = -Len;
     93         x->ss[search_site_count].mv.row = 0;
     94         x->ss[search_site_count].offset = -Len;
     95         search_site_count++;
     96 
     97         /* Compute offsets for search sites. */
     98         x->ss[search_site_count].mv.col = Len;
     99         x->ss[search_site_count].mv.row = 0;
    100         x->ss[search_site_count].offset = Len;
    101         search_site_count++;
    102 
    103         /* Contract. */
    104         Len /= 2;
    105     }
    106 
    107     x->ss_count = search_site_count;
    108     x->searches_per_step = 4;
    109 }
    110 
    111 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    112 {
    113     int Len;
    114     int search_site_count = 0;
    115 
    116     /* Generate offsets for 8 search sites per step. */
    117     Len = MAX_FIRST_STEP;
    118     x->ss[search_site_count].mv.col = 0;
    119     x->ss[search_site_count].mv.row = 0;
    120     x->ss[search_site_count].offset = 0;
    121     search_site_count++;
    122 
    123     while (Len > 0)
    124     {
    125 
    126         /* Compute offsets for search sites. */
    127         x->ss[search_site_count].mv.col = 0;
    128         x->ss[search_site_count].mv.row = -Len;
    129         x->ss[search_site_count].offset = -Len * stride;
    130         search_site_count++;
    131 
    132         /* Compute offsets for search sites. */
    133         x->ss[search_site_count].mv.col = 0;
    134         x->ss[search_site_count].mv.row = Len;
    135         x->ss[search_site_count].offset = Len * stride;
    136         search_site_count++;
    137 
    138         /* Compute offsets for search sites. */
    139         x->ss[search_site_count].mv.col = -Len;
    140         x->ss[search_site_count].mv.row = 0;
    141         x->ss[search_site_count].offset = -Len;
    142         search_site_count++;
    143 
    144         /* Compute offsets for search sites. */
    145         x->ss[search_site_count].mv.col = Len;
    146         x->ss[search_site_count].mv.row = 0;
    147         x->ss[search_site_count].offset = Len;
    148         search_site_count++;
    149 
    150         /* Compute offsets for search sites. */
    151         x->ss[search_site_count].mv.col = -Len;
    152         x->ss[search_site_count].mv.row = -Len;
    153         x->ss[search_site_count].offset = -Len * stride - Len;
    154         search_site_count++;
    155 
    156         /* Compute offsets for search sites. */
    157         x->ss[search_site_count].mv.col = Len;
    158         x->ss[search_site_count].mv.row = -Len;
    159         x->ss[search_site_count].offset = -Len * stride + Len;
    160         search_site_count++;
    161 
    162         /* Compute offsets for search sites. */
    163         x->ss[search_site_count].mv.col = -Len;
    164         x->ss[search_site_count].mv.row = Len;
    165         x->ss[search_site_count].offset = Len * stride - Len;
    166         search_site_count++;
    167 
    168         /* Compute offsets for search sites. */
    169         x->ss[search_site_count].mv.col = Len;
    170         x->ss[search_site_count].mv.row = Len;
    171         x->ss[search_site_count].offset = Len * stride + Len;
    172         search_site_count++;
    173 
    174 
    175         /* Contract. */
    176         Len /= 2;
    177     }
    178 
    179     x->ss_count = search_site_count;
    180     x->searches_per_step = 8;
    181 }
    182 
    183 /*
    184  * To avoid the penalty for crossing cache-line read, preload the reference
    185  * area in a small buffer, which is aligned to make sure there won't be crossing
    186  * cache-line read while reading from this buffer. This reduced the cpu
    187  * cycles spent on reading ref data in sub-pixel filter functions.
    188  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    189  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    190  * could reduce the area.
    191  */
    192 
    193 /* estimated cost of a motion vector (r,c) */
    194 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    195 /* pointer to predictor base of a motionvector */
    196 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    197 /* convert motion vector component to offset for svf calc */
    198 #define SP(x) (((x)&3)<<1)
    199 /* returns subpixel variance error function. */
    200 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    201 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    202 /* returns distortion + motion vector cost */
    203 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    204 /* checks if (r,c) has better score than previous best */
    205 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    206 
    207 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    208                                              int_mv *bestmv, int_mv *ref_mv,
    209                                              int error_per_bit,
    210                                              const vp8_variance_fn_ptr_t *vfp,
    211                                              int *mvcost[2], int *distortion,
    212                                              unsigned int *sse1)
    213 {
    214     unsigned char *z = (*(b->base_src) + b->src);
    215 
    216     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    217     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    218     int tr = br, tc = bc;
    219     unsigned int besterr;
    220     unsigned int left, right, up, down, diag;
    221     unsigned int sse;
    222     unsigned int whichdir;
    223     unsigned int halfiters = 4;
    224     unsigned int quarteriters = 4;
    225     int thismse;
    226 
    227     int minc = VPXMAX(x->mv_col_min * 4,
    228                       (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    229     int maxc = VPXMIN(x->mv_col_max * 4,
    230                       (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    231     int minr = VPXMAX(x->mv_row_min * 4,
    232                       (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    233     int maxr = VPXMIN(x->mv_row_max * 4,
    234                       (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    235 
    236     int y_stride;
    237     int offset;
    238     int pre_stride = x->e_mbd.pre.y_stride;
    239     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    240 
    241 
    242 #if ARCH_X86 || ARCH_X86_64
    243     MACROBLOCKD *xd = &x->e_mbd;
    244     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    245     unsigned char *y;
    246     int buf_r1, buf_r2, buf_c1;
    247 
    248     /* Clamping to avoid out-of-range data access */
    249     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    250     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    251     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    252     y_stride = 32;
    253 
    254     /* Copy to intermediate buffer before searching. */
    255     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    256     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    257 #else
    258     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    259     y_stride = pre_stride;
    260 #endif
    261 
    262     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    263 
    264     /* central mv */
    265     bestmv->as_mv.row *= 8;
    266     bestmv->as_mv.col *= 8;
    267 
    268     /* calculate central point error */
    269     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    270     *distortion = besterr;
    271     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    272 
    273     /* TODO: Each subsequent iteration checks at least one point in common
    274      * with the last iteration could be 2 ( if diag selected)
    275      */
    276     while (--halfiters)
    277     {
    278         /* 1/2 pel */
    279         CHECK_BETTER(left, tr, tc - 2);
    280         CHECK_BETTER(right, tr, tc + 2);
    281         CHECK_BETTER(up, tr - 2, tc);
    282         CHECK_BETTER(down, tr + 2, tc);
    283 
    284         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    285 
    286         switch (whichdir)
    287         {
    288         case 0:
    289             CHECK_BETTER(diag, tr - 2, tc - 2);
    290             break;
    291         case 1:
    292             CHECK_BETTER(diag, tr - 2, tc + 2);
    293             break;
    294         case 2:
    295             CHECK_BETTER(diag, tr + 2, tc - 2);
    296             break;
    297         case 3:
    298             CHECK_BETTER(diag, tr + 2, tc + 2);
    299             break;
    300         }
    301 
    302         /* no reason to check the same one again. */
    303         if (tr == br && tc == bc)
    304             break;
    305 
    306         tr = br;
    307         tc = bc;
    308     }
    309 
    310     /* TODO: Each subsequent iteration checks at least one point in common
    311      * with the last iteration could be 2 ( if diag selected)
    312      */
    313 
    314     /* 1/4 pel */
    315     while (--quarteriters)
    316     {
    317         CHECK_BETTER(left, tr, tc - 1);
    318         CHECK_BETTER(right, tr, tc + 1);
    319         CHECK_BETTER(up, tr - 1, tc);
    320         CHECK_BETTER(down, tr + 1, tc);
    321 
    322         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    323 
    324         switch (whichdir)
    325         {
    326         case 0:
    327             CHECK_BETTER(diag, tr - 1, tc - 1);
    328             break;
    329         case 1:
    330             CHECK_BETTER(diag, tr - 1, tc + 1);
    331             break;
    332         case 2:
    333             CHECK_BETTER(diag, tr + 1, tc - 1);
    334             break;
    335         case 3:
    336             CHECK_BETTER(diag, tr + 1, tc + 1);
    337             break;
    338         }
    339 
    340         /* no reason to check the same one again. */
    341         if (tr == br && tc == bc)
    342             break;
    343 
    344         tr = br;
    345         tc = bc;
    346     }
    347 
    348     bestmv->as_mv.row = br * 2;
    349     bestmv->as_mv.col = bc * 2;
    350 
    351     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    352         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    353         return INT_MAX;
    354 
    355     return besterr;
    356 }
    357 #undef MVC
    358 #undef PRE
    359 #undef SP
    360 #undef DIST
    361 #undef IFMVCV
    362 #undef ERR
    363 #undef CHECK_BETTER
    364 
    365 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    366                                  int_mv *bestmv, int_mv *ref_mv,
    367                                  int error_per_bit,
    368                                  const vp8_variance_fn_ptr_t *vfp,
    369                                  int *mvcost[2], int *distortion,
    370                                  unsigned int *sse1)
    371 {
    372     int bestmse = INT_MAX;
    373     int_mv startmv;
    374     int_mv this_mv;
    375     unsigned char *z = (*(b->base_src) + b->src);
    376     int left, right, up, down, diag;
    377     unsigned int sse;
    378     int whichdir ;
    379     int thismse;
    380     int y_stride;
    381     int pre_stride = x->e_mbd.pre.y_stride;
    382     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    383 
    384 #if ARCH_X86 || ARCH_X86_64
    385     MACROBLOCKD *xd = &x->e_mbd;
    386     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    387     unsigned char *y;
    388 
    389     y_stride = 32;
    390     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    391      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    392      y = xd->y_buf + y_stride + 1;
    393 #else
    394      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    395      y_stride = pre_stride;
    396 #endif
    397 
    398     /* central mv */
    399     bestmv->as_mv.row *= 8;
    400     bestmv->as_mv.col *= 8;
    401     startmv = *bestmv;
    402 
    403     /* calculate central point error */
    404     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    405     *distortion = bestmse;
    406     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    407 
    408     /* go left then right and check error */
    409     this_mv.as_mv.row = startmv.as_mv.row;
    410     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    411     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    412     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    413 
    414     if (left < bestmse)
    415     {
    416         *bestmv = this_mv;
    417         bestmse = left;
    418         *distortion = thismse;
    419         *sse1 = sse;
    420     }
    421 
    422     this_mv.as_mv.col += 8;
    423     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    424     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    425 
    426     if (right < bestmse)
    427     {
    428         *bestmv = this_mv;
    429         bestmse = right;
    430         *distortion = thismse;
    431         *sse1 = sse;
    432     }
    433 
    434     /* go up then down and check error */
    435     this_mv.as_mv.col = startmv.as_mv.col;
    436     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    437     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    438     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    439 
    440     if (up < bestmse)
    441     {
    442         *bestmv = this_mv;
    443         bestmse = up;
    444         *distortion = thismse;
    445         *sse1 = sse;
    446     }
    447 
    448     this_mv.as_mv.row += 8;
    449     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    450     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    451 
    452     if (down < bestmse)
    453     {
    454         *bestmv = this_mv;
    455         bestmse = down;
    456         *distortion = thismse;
    457         *sse1 = sse;
    458     }
    459 
    460 
    461     /* now check 1 more diagonal */
    462     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    463     this_mv = startmv;
    464 
    465     switch (whichdir)
    466     {
    467     case 0:
    468         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    469         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    470         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    471         break;
    472     case 1:
    473         this_mv.as_mv.col += 4;
    474         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    475         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    476         break;
    477     case 2:
    478         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    479         this_mv.as_mv.row += 4;
    480         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    481         break;
    482     case 3:
    483     default:
    484         this_mv.as_mv.col += 4;
    485         this_mv.as_mv.row += 4;
    486         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    487         break;
    488     }
    489 
    490     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    491 
    492     if (diag < bestmse)
    493     {
    494         *bestmv = this_mv;
    495         bestmse = diag;
    496         *distortion = thismse;
    497         *sse1 = sse;
    498     }
    499 
    500 
    501     /* time to check quarter pels. */
    502     if (bestmv->as_mv.row < startmv.as_mv.row)
    503         y -= y_stride;
    504 
    505     if (bestmv->as_mv.col < startmv.as_mv.col)
    506         y--;
    507 
    508     startmv = *bestmv;
    509 
    510 
    511 
    512     /* go left then right and check error */
    513     this_mv.as_mv.row = startmv.as_mv.row;
    514 
    515     if (startmv.as_mv.col & 7)
    516     {
    517         this_mv.as_mv.col = startmv.as_mv.col - 2;
    518         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    519     }
    520     else
    521     {
    522         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    523         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    524     }
    525 
    526     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    527 
    528     if (left < bestmse)
    529     {
    530         *bestmv = this_mv;
    531         bestmse = left;
    532         *distortion = thismse;
    533         *sse1 = sse;
    534     }
    535 
    536     this_mv.as_mv.col += 4;
    537     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    538     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    539 
    540     if (right < bestmse)
    541     {
    542         *bestmv = this_mv;
    543         bestmse = right;
    544         *distortion = thismse;
    545         *sse1 = sse;
    546     }
    547 
    548     /* go up then down and check error */
    549     this_mv.as_mv.col = startmv.as_mv.col;
    550 
    551     if (startmv.as_mv.row & 7)
    552     {
    553         this_mv.as_mv.row = startmv.as_mv.row - 2;
    554         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    555     }
    556     else
    557     {
    558         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    559         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    560     }
    561 
    562     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    563 
    564     if (up < bestmse)
    565     {
    566         *bestmv = this_mv;
    567         bestmse = up;
    568         *distortion = thismse;
    569         *sse1 = sse;
    570     }
    571 
    572     this_mv.as_mv.row += 4;
    573     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    574     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    575 
    576     if (down < bestmse)
    577     {
    578         *bestmv = this_mv;
    579         bestmse = down;
    580         *distortion = thismse;
    581         *sse1 = sse;
    582     }
    583 
    584 
    585     /* now check 1 more diagonal */
    586     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    587 
    588     this_mv = startmv;
    589 
    590     switch (whichdir)
    591     {
    592     case 0:
    593 
    594         if (startmv.as_mv.row & 7)
    595         {
    596             this_mv.as_mv.row -= 2;
    597 
    598             if (startmv.as_mv.col & 7)
    599             {
    600                 this_mv.as_mv.col -= 2;
    601                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    602             }
    603             else
    604             {
    605                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    606                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    607             }
    608         }
    609         else
    610         {
    611             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    612 
    613             if (startmv.as_mv.col & 7)
    614             {
    615                 this_mv.as_mv.col -= 2;
    616                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    617             }
    618             else
    619             {
    620                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    621                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    622             }
    623         }
    624 
    625         break;
    626     case 1:
    627         this_mv.as_mv.col += 2;
    628 
    629         if (startmv.as_mv.row & 7)
    630         {
    631             this_mv.as_mv.row -= 2;
    632             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    633         }
    634         else
    635         {
    636             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    637             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    638         }
    639 
    640         break;
    641     case 2:
    642         this_mv.as_mv.row += 2;
    643 
    644         if (startmv.as_mv.col & 7)
    645         {
    646             this_mv.as_mv.col -= 2;
    647             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    648         }
    649         else
    650         {
    651             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    652             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    653         }
    654 
    655         break;
    656     case 3:
    657         this_mv.as_mv.col += 2;
    658         this_mv.as_mv.row += 2;
    659         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    660         break;
    661     }
    662 
    663     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    664 
    665     if (diag < bestmse)
    666     {
    667         *bestmv = this_mv;
    668         bestmse = diag;
    669         *distortion = thismse;
    670         *sse1 = sse;
    671     }
    672 
    673     return bestmse;
    674 }
    675 
    676 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    677                                   int_mv *bestmv, int_mv *ref_mv,
    678                                   int error_per_bit,
    679                                   const vp8_variance_fn_ptr_t *vfp,
    680                                   int *mvcost[2], int *distortion,
    681                                   unsigned int *sse1)
    682 {
    683     int bestmse = INT_MAX;
    684     int_mv startmv;
    685     int_mv this_mv;
    686     unsigned char *z = (*(b->base_src) + b->src);
    687     int left, right, up, down, diag;
    688     unsigned int sse;
    689     int whichdir ;
    690     int thismse;
    691     int y_stride;
    692     int pre_stride = x->e_mbd.pre.y_stride;
    693     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    694 
    695 #if ARCH_X86 || ARCH_X86_64
    696     MACROBLOCKD *xd = &x->e_mbd;
    697     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    698     unsigned char *y;
    699 
    700     y_stride = 32;
    701     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    702     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    703     y = xd->y_buf + y_stride + 1;
    704 #else
    705     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    706     y_stride = pre_stride;
    707 #endif
    708 
    709     /* central mv */
    710     bestmv->as_mv.row *= 8;
    711     bestmv->as_mv.col *= 8;
    712     startmv = *bestmv;
    713 
    714     /* calculate central point error */
    715     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    716     *distortion = bestmse;
    717     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    718 
    719     /* go left then right and check error */
    720     this_mv.as_mv.row = startmv.as_mv.row;
    721     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    722     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    723     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    724 
    725     if (left < bestmse)
    726     {
    727         *bestmv = this_mv;
    728         bestmse = left;
    729         *distortion = thismse;
    730         *sse1 = sse;
    731     }
    732 
    733     this_mv.as_mv.col += 8;
    734     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    735     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    736 
    737     if (right < bestmse)
    738     {
    739         *bestmv = this_mv;
    740         bestmse = right;
    741         *distortion = thismse;
    742         *sse1 = sse;
    743     }
    744 
    745     /* go up then down and check error */
    746     this_mv.as_mv.col = startmv.as_mv.col;
    747     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    748     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    749     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    750 
    751     if (up < bestmse)
    752     {
    753         *bestmv = this_mv;
    754         bestmse = up;
    755         *distortion = thismse;
    756         *sse1 = sse;
    757     }
    758 
    759     this_mv.as_mv.row += 8;
    760     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    761     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    762 
    763     if (down < bestmse)
    764     {
    765         *bestmv = this_mv;
    766         bestmse = down;
    767         *distortion = thismse;
    768         *sse1 = sse;
    769     }
    770 
    771     /* now check 1 more diagonal - */
    772     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    773     this_mv = startmv;
    774 
    775     switch (whichdir)
    776     {
    777     case 0:
    778         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    779         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    780         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    781         break;
    782     case 1:
    783         this_mv.as_mv.col += 4;
    784         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    785         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    786         break;
    787     case 2:
    788         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    789         this_mv.as_mv.row += 4;
    790         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    791         break;
    792     case 3:
    793     default:
    794         this_mv.as_mv.col += 4;
    795         this_mv.as_mv.row += 4;
    796         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    797         break;
    798     }
    799 
    800     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    801 
    802     if (diag < bestmse)
    803     {
    804         *bestmv = this_mv;
    805         bestmse = diag;
    806         *distortion = thismse;
    807         *sse1 = sse;
    808     }
    809 
    810     return bestmse;
    811 }
    812 
    813 #define CHECK_BOUNDS(range) \
    814 {\
    815     all_in = 1;\
    816     all_in &= ((br-range) >= x->mv_row_min);\
    817     all_in &= ((br+range) <= x->mv_row_max);\
    818     all_in &= ((bc-range) >= x->mv_col_min);\
    819     all_in &= ((bc+range) <= x->mv_col_max);\
    820 }
    821 
    822 #define CHECK_POINT \
    823 {\
    824     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    825     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    826     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    827     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    828 }
    829 
    830 #define CHECK_BETTER \
    831 {\
    832     if (thissad < bestsad)\
    833     {\
    834         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    835         if (thissad < bestsad)\
    836         {\
    837             bestsad = thissad;\
    838             best_site = i;\
    839         }\
    840     }\
    841 }
    842 
    843 static const MV next_chkpts[6][3] =
    844 {
    845     {{ -2, 0}, { -1, -2}, {1, -2}},
    846     {{ -1, -2}, {1, -2}, {2, 0}},
    847     {{1, -2}, {2, 0}, {1, 2}},
    848     {{2, 0}, {1, 2}, { -1, 2}},
    849     {{1, 2}, { -1, 2}, { -2, 0}},
    850     {{ -1, 2}, { -2, 0}, { -1, -2}}
    851 };
    852 
    853 int vp8_hex_search
    854 (
    855     MACROBLOCK *x,
    856     BLOCK *b,
    857     BLOCKD *d,
    858     int_mv *ref_mv,
    859     int_mv *best_mv,
    860     int search_param,
    861     int sad_per_bit,
    862     const vp8_variance_fn_ptr_t *vfp,
    863     int *mvsadcost[2],
    864     int *mvcost[2],
    865     int_mv *center_mv
    866 )
    867 {
    868     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    869     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    870     int i, j;
    871 
    872     unsigned char *what = (*(b->base_src) + b->src);
    873     int what_stride = b->src_stride;
    874     int pre_stride = x->e_mbd.pre.y_stride;
    875     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    876 
    877     int in_what_stride = pre_stride;
    878     int br, bc;
    879     int_mv this_mv;
    880     unsigned int bestsad;
    881     unsigned int thissad;
    882     unsigned char *base_offset;
    883     unsigned char *this_offset;
    884     int k = -1;
    885     int all_in;
    886     int best_site = -1;
    887     int hex_range = 127;
    888     int dia_range = 8;
    889 
    890     int_mv fcenter_mv;
    891     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    892     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    893 
    894     (void)mvcost;
    895 
    896     /* adjust ref_mv to make sure it is within MV range */
    897     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    898     br = ref_mv->as_mv.row;
    899     bc = ref_mv->as_mv.col;
    900 
    901     /* Work out the start point for the search */
    902     base_offset = (unsigned char *)(base_pre + d->offset);
    903     this_offset = base_offset + (br * (pre_stride)) + bc;
    904     this_mv.as_mv.row = br;
    905     this_mv.as_mv.col = bc;
    906     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride)
    907             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    908 
    909 #if CONFIG_MULTI_RES_ENCODING
    910     /* Lower search range based on prediction info */
    911     if (search_param >= 6) goto cal_neighbors;
    912     else if (search_param >= 5) hex_range = 4;
    913     else if (search_param >= 4) hex_range = 6;
    914     else if (search_param >= 3) hex_range = 15;
    915     else if (search_param >= 2) hex_range = 31;
    916     else if (search_param >= 1) hex_range = 63;
    917 
    918     dia_range = 8;
    919 #else
    920     (void)search_param;
    921 #endif
    922 
    923     /* hex search */
    924     CHECK_BOUNDS(2)
    925 
    926     if(all_in)
    927     {
    928         for (i = 0; i < 6; i++)
    929         {
    930             this_mv.as_mv.row = br + hex[i].row;
    931             this_mv.as_mv.col = bc + hex[i].col;
    932             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    933             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    934             CHECK_BETTER
    935         }
    936     }else
    937     {
    938         for (i = 0; i < 6; i++)
    939         {
    940             this_mv.as_mv.row = br + hex[i].row;
    941             this_mv.as_mv.col = bc + hex[i].col;
    942             CHECK_POINT
    943             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    944             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    945             CHECK_BETTER
    946         }
    947     }
    948 
    949     if (best_site == -1)
    950         goto cal_neighbors;
    951     else
    952     {
    953         br += hex[best_site].row;
    954         bc += hex[best_site].col;
    955         k = best_site;
    956     }
    957 
    958     for (j = 1; j < hex_range; j++)
    959     {
    960         best_site = -1;
    961         CHECK_BOUNDS(2)
    962 
    963         if(all_in)
    964         {
    965             for (i = 0; i < 3; i++)
    966             {
    967                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    968                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    969                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    970                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    971                 CHECK_BETTER
    972             }
    973         }else
    974         {
    975             for (i = 0; i < 3; i++)
    976             {
    977                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    978                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    979                 CHECK_POINT
    980                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    981                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    982                 CHECK_BETTER
    983             }
    984         }
    985 
    986         if (best_site == -1)
    987             break;
    988         else
    989         {
    990             br += next_chkpts[k][best_site].row;
    991             bc += next_chkpts[k][best_site].col;
    992             k += 5 + best_site;
    993             if (k >= 12) k -= 12;
    994             else if (k >= 6) k -= 6;
    995         }
    996     }
    997 
    998     /* check 4 1-away neighbors */
    999 cal_neighbors:
   1000     for (j = 0; j < dia_range; j++)
   1001     {
   1002         best_site = -1;
   1003         CHECK_BOUNDS(1)
   1004 
   1005         if(all_in)
   1006         {
   1007             for (i = 0; i < 4; i++)
   1008             {
   1009                 this_mv.as_mv.row = br + neighbors[i].row;
   1010                 this_mv.as_mv.col = bc + neighbors[i].col;
   1011                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1012                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
   1013                 CHECK_BETTER
   1014             }
   1015         }else
   1016         {
   1017             for (i = 0; i < 4; i++)
   1018             {
   1019                 this_mv.as_mv.row = br + neighbors[i].row;
   1020                 this_mv.as_mv.col = bc + neighbors[i].col;
   1021                 CHECK_POINT
   1022                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1023                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
   1024                 CHECK_BETTER
   1025             }
   1026         }
   1027 
   1028         if (best_site == -1)
   1029             break;
   1030         else
   1031         {
   1032             br += neighbors[best_site].row;
   1033             bc += neighbors[best_site].col;
   1034         }
   1035     }
   1036 
   1037     best_mv->as_mv.row = br;
   1038     best_mv->as_mv.col = bc;
   1039 
   1040     return bestsad;
   1041 }
   1042 #undef CHECK_BOUNDS
   1043 #undef CHECK_POINT
   1044 #undef CHECK_BETTER
   1045 
   1046 int vp8_diamond_search_sad_c
   1047 (
   1048     MACROBLOCK *x,
   1049     BLOCK *b,
   1050     BLOCKD *d,
   1051     int_mv *ref_mv,
   1052     int_mv *best_mv,
   1053     int search_param,
   1054     int sad_per_bit,
   1055     int *num00,
   1056     vp8_variance_fn_ptr_t *fn_ptr,
   1057     int *mvcost[2],
   1058     int_mv *center_mv
   1059 )
   1060 {
   1061     int i, j, step;
   1062 
   1063     unsigned char *what = (*(b->base_src) + b->src);
   1064     int what_stride = b->src_stride;
   1065     unsigned char *in_what;
   1066     int pre_stride = x->e_mbd.pre.y_stride;
   1067     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1068     int in_what_stride = pre_stride;
   1069     unsigned char *best_address;
   1070 
   1071     int tot_steps;
   1072     int_mv this_mv;
   1073 
   1074     unsigned int bestsad;
   1075     unsigned int thissad;
   1076     int best_site = 0;
   1077     int last_site = 0;
   1078 
   1079     int ref_row;
   1080     int ref_col;
   1081     int this_row_offset;
   1082     int this_col_offset;
   1083     search_site *ss;
   1084 
   1085     unsigned char *check_here;
   1086 
   1087     int *mvsadcost[2];
   1088     int_mv fcenter_mv;
   1089 
   1090     mvsadcost[0] = x->mvsadcost[0];
   1091     mvsadcost[1] = x->mvsadcost[1];
   1092     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1093     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1094 
   1095     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1096     ref_row = ref_mv->as_mv.row;
   1097     ref_col = ref_mv->as_mv.col;
   1098     *num00 = 0;
   1099     best_mv->as_mv.row = ref_row;
   1100     best_mv->as_mv.col = ref_col;
   1101 
   1102     /* Work out the start point for the search */
   1103     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1104     best_address = in_what;
   1105 
   1106     /* Check the starting position */
   1107     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
   1108             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1109 
   1110     /* search_param determines the length of the initial step and hence
   1111      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1112      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1113      */
   1114     ss = &x->ss[search_param * x->searches_per_step];
   1115     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1116 
   1117     i = 1;
   1118 
   1119     for (step = 0; step < tot_steps ; step++)
   1120     {
   1121         for (j = 0 ; j < x->searches_per_step ; j++)
   1122         {
   1123             /* Trap illegal vectors */
   1124             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1125             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1126 
   1127             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1128             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1129 
   1130             {
   1131                 check_here = ss[i].offset + best_address;
   1132                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1133 
   1134                 if (thissad < bestsad)
   1135                 {
   1136                     this_mv.as_mv.row = this_row_offset;
   1137                     this_mv.as_mv.col = this_col_offset;
   1138                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1139                                               mvsadcost, sad_per_bit);
   1140 
   1141                     if (thissad < bestsad)
   1142                     {
   1143                         bestsad = thissad;
   1144                         best_site = i;
   1145                     }
   1146                 }
   1147             }
   1148 
   1149             i++;
   1150         }
   1151 
   1152         if (best_site != last_site)
   1153         {
   1154             best_mv->as_mv.row += ss[best_site].mv.row;
   1155             best_mv->as_mv.col += ss[best_site].mv.col;
   1156             best_address += ss[best_site].offset;
   1157             last_site = best_site;
   1158         }
   1159         else if (best_address == in_what)
   1160             (*num00)++;
   1161     }
   1162 
   1163     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1164     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1165 
   1166     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1167            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1168 }
   1169 
   1170 int vp8_diamond_search_sadx4
   1171 (
   1172     MACROBLOCK *x,
   1173     BLOCK *b,
   1174     BLOCKD *d,
   1175     int_mv *ref_mv,
   1176     int_mv *best_mv,
   1177     int search_param,
   1178     int sad_per_bit,
   1179     int *num00,
   1180     vp8_variance_fn_ptr_t *fn_ptr,
   1181     int *mvcost[2],
   1182     int_mv *center_mv
   1183 )
   1184 {
   1185     int i, j, step;
   1186 
   1187     unsigned char *what = (*(b->base_src) + b->src);
   1188     int what_stride = b->src_stride;
   1189     unsigned char *in_what;
   1190     int pre_stride = x->e_mbd.pre.y_stride;
   1191     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1192     int in_what_stride = pre_stride;
   1193     unsigned char *best_address;
   1194 
   1195     int tot_steps;
   1196     int_mv this_mv;
   1197 
   1198     unsigned int bestsad;
   1199     unsigned int thissad;
   1200     int best_site = 0;
   1201     int last_site = 0;
   1202 
   1203     int ref_row;
   1204     int ref_col;
   1205     int this_row_offset;
   1206     int this_col_offset;
   1207     search_site *ss;
   1208 
   1209     unsigned char *check_here;
   1210 
   1211     int *mvsadcost[2];
   1212     int_mv fcenter_mv;
   1213 
   1214     mvsadcost[0] = x->mvsadcost[0];
   1215     mvsadcost[1] = x->mvsadcost[1];
   1216     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1217     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1218 
   1219     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1220     ref_row = ref_mv->as_mv.row;
   1221     ref_col = ref_mv->as_mv.col;
   1222     *num00 = 0;
   1223     best_mv->as_mv.row = ref_row;
   1224     best_mv->as_mv.col = ref_col;
   1225 
   1226     /* Work out the start point for the search */
   1227     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1228     best_address = in_what;
   1229 
   1230     /* Check the starting position */
   1231     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
   1232             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1233 
   1234     /* search_param determines the length of the initial step and hence the
   1235      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1236      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1237      */
   1238     ss = &x->ss[search_param * x->searches_per_step];
   1239     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1240 
   1241     i = 1;
   1242 
   1243     for (step = 0; step < tot_steps ; step++)
   1244     {
   1245         int all_in = 1, t;
   1246 
   1247         /* To know if all neighbor points are within the bounds, 4 bounds
   1248          * checking are enough instead of checking 4 bounds for each
   1249          * points.
   1250          */
   1251         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1252         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1253         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1254         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1255 
   1256         if (all_in)
   1257         {
   1258             unsigned int sad_array[4];
   1259 
   1260             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1261             {
   1262                 const unsigned char *block_offset[4];
   1263 
   1264                 for (t = 0; t < 4; t++)
   1265                     block_offset[t] = ss[i+t].offset + best_address;
   1266 
   1267                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1268 
   1269                 for (t = 0; t < 4; t++, i++)
   1270                 {
   1271                     if (sad_array[t] < bestsad)
   1272                     {
   1273                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1274                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1275                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1276                                                        mvsadcost, sad_per_bit);
   1277 
   1278                         if (sad_array[t] < bestsad)
   1279                         {
   1280                             bestsad = sad_array[t];
   1281                             best_site = i;
   1282                         }
   1283                     }
   1284                 }
   1285             }
   1286         }
   1287         else
   1288         {
   1289             for (j = 0 ; j < x->searches_per_step ; j++)
   1290             {
   1291                 /* Trap illegal vectors */
   1292                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1293                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1294 
   1295                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1296                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1297                 {
   1298                     check_here = ss[i].offset + best_address;
   1299                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1300 
   1301                     if (thissad < bestsad)
   1302                     {
   1303                         this_mv.as_mv.row = this_row_offset;
   1304                         this_mv.as_mv.col = this_col_offset;
   1305                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1306                                                   mvsadcost, sad_per_bit);
   1307 
   1308                         if (thissad < bestsad)
   1309                         {
   1310                             bestsad = thissad;
   1311                             best_site = i;
   1312                         }
   1313                     }
   1314                 }
   1315                 i++;
   1316             }
   1317         }
   1318 
   1319         if (best_site != last_site)
   1320         {
   1321             best_mv->as_mv.row += ss[best_site].mv.row;
   1322             best_mv->as_mv.col += ss[best_site].mv.col;
   1323             best_address += ss[best_site].offset;
   1324             last_site = best_site;
   1325         }
   1326         else if (best_address == in_what)
   1327             (*num00)++;
   1328     }
   1329 
   1330     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1331     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1332 
   1333     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1334            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1335 }
   1336 
   1337 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1338                         int sad_per_bit, int distance,
   1339                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1340                         int_mv *center_mv)
   1341 {
   1342     unsigned char *what = (*(b->base_src) + b->src);
   1343     int what_stride = b->src_stride;
   1344     unsigned char *in_what;
   1345     int pre_stride = x->e_mbd.pre.y_stride;
   1346     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1347     int in_what_stride = pre_stride;
   1348     int mv_stride = pre_stride;
   1349     unsigned char *bestaddress;
   1350     int_mv *best_mv = &d->bmi.mv;
   1351     int_mv this_mv;
   1352     unsigned int bestsad;
   1353     unsigned int thissad;
   1354     int r, c;
   1355 
   1356     unsigned char *check_here;
   1357 
   1358     int ref_row = ref_mv->as_mv.row;
   1359     int ref_col = ref_mv->as_mv.col;
   1360 
   1361     int row_min = ref_row - distance;
   1362     int row_max = ref_row + distance;
   1363     int col_min = ref_col - distance;
   1364     int col_max = ref_col + distance;
   1365 
   1366     int *mvsadcost[2];
   1367     int_mv fcenter_mv;
   1368 
   1369     mvsadcost[0] = x->mvsadcost[0];
   1370     mvsadcost[1] = x->mvsadcost[1];
   1371     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1372     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1373 
   1374     /* Work out the mid point for the search */
   1375     in_what = base_pre + d->offset;
   1376     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1377 
   1378     best_mv->as_mv.row = ref_row;
   1379     best_mv->as_mv.col = ref_col;
   1380 
   1381     /* Baseline value at the centre */
   1382     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
   1383             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1384 
   1385     /* Apply further limits to prevent us looking using vectors that
   1386      * stretch beyiond the UMV border
   1387      */
   1388     if (col_min < x->mv_col_min)
   1389         col_min = x->mv_col_min;
   1390 
   1391     if (col_max > x->mv_col_max)
   1392         col_max = x->mv_col_max;
   1393 
   1394     if (row_min < x->mv_row_min)
   1395         row_min = x->mv_row_min;
   1396 
   1397     if (row_max > x->mv_row_max)
   1398         row_max = x->mv_row_max;
   1399 
   1400     for (r = row_min; r < row_max ; r++)
   1401     {
   1402         this_mv.as_mv.row = r;
   1403         check_here = r * mv_stride + in_what + col_min;
   1404 
   1405         for (c = col_min; c < col_max; c++)
   1406         {
   1407             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1408 
   1409             this_mv.as_mv.col = c;
   1410             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1411                                       mvsadcost, sad_per_bit);
   1412 
   1413             if (thissad < bestsad)
   1414             {
   1415                 bestsad = thissad;
   1416                 best_mv->as_mv.row = r;
   1417                 best_mv->as_mv.col = c;
   1418                 bestaddress = check_here;
   1419             }
   1420 
   1421             check_here++;
   1422         }
   1423     }
   1424 
   1425     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1426     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1427 
   1428     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1429            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1430 }
   1431 
   1432 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1433                           int sad_per_bit, int distance,
   1434                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1435                           int_mv *center_mv)
   1436 {
   1437     unsigned char *what = (*(b->base_src) + b->src);
   1438     int what_stride = b->src_stride;
   1439     unsigned char *in_what;
   1440     int pre_stride = x->e_mbd.pre.y_stride;
   1441     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1442     int in_what_stride = pre_stride;
   1443     int mv_stride = pre_stride;
   1444     unsigned char *bestaddress;
   1445     int_mv *best_mv = &d->bmi.mv;
   1446     int_mv this_mv;
   1447     unsigned int bestsad;
   1448     unsigned int thissad;
   1449     int r, c;
   1450 
   1451     unsigned char *check_here;
   1452 
   1453     int ref_row = ref_mv->as_mv.row;
   1454     int ref_col = ref_mv->as_mv.col;
   1455 
   1456     int row_min = ref_row - distance;
   1457     int row_max = ref_row + distance;
   1458     int col_min = ref_col - distance;
   1459     int col_max = ref_col + distance;
   1460 
   1461     unsigned int sad_array[3];
   1462 
   1463     int *mvsadcost[2];
   1464     int_mv fcenter_mv;
   1465 
   1466     mvsadcost[0] = x->mvsadcost[0];
   1467     mvsadcost[1] = x->mvsadcost[1];
   1468     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1469     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1470 
   1471     /* Work out the mid point for the search */
   1472     in_what = base_pre + d->offset;
   1473     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1474 
   1475     best_mv->as_mv.row = ref_row;
   1476     best_mv->as_mv.col = ref_col;
   1477 
   1478     /* Baseline value at the centre */
   1479     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
   1480             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1481 
   1482     /* Apply further limits to prevent us looking using vectors that stretch
   1483      * beyond the UMV border
   1484      */
   1485     if (col_min < x->mv_col_min)
   1486         col_min = x->mv_col_min;
   1487 
   1488     if (col_max > x->mv_col_max)
   1489         col_max = x->mv_col_max;
   1490 
   1491     if (row_min < x->mv_row_min)
   1492         row_min = x->mv_row_min;
   1493 
   1494     if (row_max > x->mv_row_max)
   1495         row_max = x->mv_row_max;
   1496 
   1497     for (r = row_min; r < row_max ; r++)
   1498     {
   1499         this_mv.as_mv.row = r;
   1500         check_here = r * mv_stride + in_what + col_min;
   1501         c = col_min;
   1502 
   1503         while ((c + 2) < col_max)
   1504         {
   1505             int i;
   1506 
   1507             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1508 
   1509             for (i = 0; i < 3; i++)
   1510             {
   1511                 thissad = sad_array[i];
   1512 
   1513                 if (thissad < bestsad)
   1514                 {
   1515                     this_mv.as_mv.col = c;
   1516                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1517                                               mvsadcost, sad_per_bit);
   1518 
   1519                     if (thissad < bestsad)
   1520                     {
   1521                         bestsad = thissad;
   1522                         best_mv->as_mv.row = r;
   1523                         best_mv->as_mv.col = c;
   1524                         bestaddress = check_here;
   1525                     }
   1526                 }
   1527 
   1528                 check_here++;
   1529                 c++;
   1530             }
   1531         }
   1532 
   1533         while (c < col_max)
   1534         {
   1535             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
   1536 
   1537             if (thissad < bestsad)
   1538             {
   1539                 this_mv.as_mv.col = c;
   1540                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1541                                           mvsadcost, sad_per_bit);
   1542 
   1543                 if (thissad < bestsad)
   1544                 {
   1545                     bestsad = thissad;
   1546                     best_mv->as_mv.row = r;
   1547                     best_mv->as_mv.col = c;
   1548                     bestaddress = check_here;
   1549                 }
   1550             }
   1551 
   1552             check_here ++;
   1553             c ++;
   1554         }
   1555 
   1556     }
   1557 
   1558     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1559     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1560 
   1561     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1562            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1563 }
   1564 
   1565 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1566                           int sad_per_bit, int distance,
   1567                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1568                           int_mv *center_mv)
   1569 {
   1570     unsigned char *what = (*(b->base_src) + b->src);
   1571     int what_stride = b->src_stride;
   1572     int pre_stride = x->e_mbd.pre.y_stride;
   1573     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1574     unsigned char *in_what;
   1575     int in_what_stride = pre_stride;
   1576     int mv_stride = pre_stride;
   1577     unsigned char *bestaddress;
   1578     int_mv *best_mv = &d->bmi.mv;
   1579     int_mv this_mv;
   1580     unsigned int bestsad;
   1581     unsigned int thissad;
   1582     int r, c;
   1583 
   1584     unsigned char *check_here;
   1585 
   1586     int ref_row = ref_mv->as_mv.row;
   1587     int ref_col = ref_mv->as_mv.col;
   1588 
   1589     int row_min = ref_row - distance;
   1590     int row_max = ref_row + distance;
   1591     int col_min = ref_col - distance;
   1592     int col_max = ref_col + distance;
   1593 
   1594     // TODO(johannkoenig): check if this alignment is necessary.
   1595     DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
   1596     unsigned int sad_array[3];
   1597 
   1598     int *mvsadcost[2];
   1599     int_mv fcenter_mv;
   1600 
   1601     mvsadcost[0] = x->mvsadcost[0];
   1602     mvsadcost[1] = x->mvsadcost[1];
   1603     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1604     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1605 
   1606     /* Work out the mid point for the search */
   1607     in_what = base_pre + d->offset;
   1608     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1609 
   1610     best_mv->as_mv.row = ref_row;
   1611     best_mv->as_mv.col = ref_col;
   1612 
   1613     /* Baseline value at the centre */
   1614     bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride)
   1615             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1616 
   1617     /* Apply further limits to prevent us looking using vectors that stretch
   1618      * beyond the UMV border
   1619      */
   1620     if (col_min < x->mv_col_min)
   1621         col_min = x->mv_col_min;
   1622 
   1623     if (col_max > x->mv_col_max)
   1624         col_max = x->mv_col_max;
   1625 
   1626     if (row_min < x->mv_row_min)
   1627         row_min = x->mv_row_min;
   1628 
   1629     if (row_max > x->mv_row_max)
   1630         row_max = x->mv_row_max;
   1631 
   1632     for (r = row_min; r < row_max ; r++)
   1633     {
   1634         this_mv.as_mv.row = r;
   1635         check_here = r * mv_stride + in_what + col_min;
   1636         c = col_min;
   1637 
   1638         while ((c + 7) < col_max)
   1639         {
   1640             int i;
   1641 
   1642             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1643 
   1644             for (i = 0; i < 8; i++)
   1645             {
   1646                 thissad = sad_array8[i];
   1647 
   1648                 if (thissad < bestsad)
   1649                 {
   1650                     this_mv.as_mv.col = c;
   1651                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1652                                               mvsadcost, sad_per_bit);
   1653 
   1654                     if (thissad < bestsad)
   1655                     {
   1656                         bestsad = thissad;
   1657                         best_mv->as_mv.row = r;
   1658                         best_mv->as_mv.col = c;
   1659                         bestaddress = check_here;
   1660                     }
   1661                 }
   1662 
   1663                 check_here++;
   1664                 c++;
   1665             }
   1666         }
   1667 
   1668         while ((c + 2) < col_max)
   1669         {
   1670             int i;
   1671 
   1672             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1673 
   1674             for (i = 0; i < 3; i++)
   1675             {
   1676                 thissad = sad_array[i];
   1677 
   1678                 if (thissad < bestsad)
   1679                 {
   1680                     this_mv.as_mv.col = c;
   1681                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1682                         mvsadcost, sad_per_bit);
   1683 
   1684                     if (thissad < bestsad)
   1685                     {
   1686                         bestsad = thissad;
   1687                         best_mv->as_mv.row = r;
   1688                         best_mv->as_mv.col = c;
   1689                         bestaddress = check_here;
   1690                     }
   1691                 }
   1692 
   1693                 check_here++;
   1694                 c++;
   1695             }
   1696         }
   1697 
   1698         while (c < col_max)
   1699         {
   1700             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
   1701 
   1702             if (thissad < bestsad)
   1703             {
   1704                 this_mv.as_mv.col = c;
   1705                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1706                     mvsadcost, sad_per_bit);
   1707 
   1708                 if (thissad < bestsad)
   1709                 {
   1710                     bestsad = thissad;
   1711                     best_mv->as_mv.row = r;
   1712                     best_mv->as_mv.col = c;
   1713                     bestaddress = check_here;
   1714                 }
   1715             }
   1716 
   1717             check_here ++;
   1718             c ++;
   1719         }
   1720     }
   1721 
   1722     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1723     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1724 
   1725     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1726            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1727 }
   1728 
   1729 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1730                             int error_per_bit, int search_range,
   1731                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1732                             int_mv *center_mv)
   1733 {
   1734     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1735     int i, j;
   1736     short this_row_offset, this_col_offset;
   1737 
   1738     int what_stride = b->src_stride;
   1739     int pre_stride = x->e_mbd.pre.y_stride;
   1740     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1741     int in_what_stride = pre_stride;
   1742     unsigned char *what = (*(b->base_src) + b->src);
   1743     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1744         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1745     unsigned char *check_here;
   1746     int_mv this_mv;
   1747     unsigned int bestsad;
   1748     unsigned int thissad;
   1749 
   1750     int *mvsadcost[2];
   1751     int_mv fcenter_mv;
   1752 
   1753     mvsadcost[0] = x->mvsadcost[0];
   1754     mvsadcost[1] = x->mvsadcost[1];
   1755     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1756     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1757 
   1758     bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
   1759             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1760 
   1761     for (i=0; i<search_range; i++)
   1762     {
   1763         int best_site = -1;
   1764 
   1765         for (j = 0 ; j < 4 ; j++)
   1766         {
   1767             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1768             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1769 
   1770             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1771             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1772             {
   1773                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1774                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
   1775 
   1776                 if (thissad < bestsad)
   1777                 {
   1778                     this_mv.as_mv.row = this_row_offset;
   1779                     this_mv.as_mv.col = this_col_offset;
   1780                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1781 
   1782                     if (thissad < bestsad)
   1783                     {
   1784                         bestsad = thissad;
   1785                         best_site = j;
   1786                     }
   1787                 }
   1788             }
   1789         }
   1790 
   1791         if (best_site == -1)
   1792             break;
   1793         else
   1794         {
   1795             ref_mv->as_mv.row += neighbors[best_site].row;
   1796             ref_mv->as_mv.col += neighbors[best_site].col;
   1797             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1798         }
   1799     }
   1800 
   1801     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1802     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1803 
   1804     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1805            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1806 }
   1807 
   1808 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1809                               int_mv *ref_mv, int error_per_bit,
   1810                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1811                               int *mvcost[2], int_mv *center_mv)
   1812 {
   1813     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1814     int i, j;
   1815     short this_row_offset, this_col_offset;
   1816 
   1817     int what_stride = b->src_stride;
   1818     int pre_stride = x->e_mbd.pre.y_stride;
   1819     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1820     int in_what_stride = pre_stride;
   1821     unsigned char *what = (*(b->base_src) + b->src);
   1822     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1823         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1824     unsigned char *check_here;
   1825     int_mv this_mv;
   1826     unsigned int bestsad;
   1827     unsigned int thissad;
   1828 
   1829     int *mvsadcost[2];
   1830     int_mv fcenter_mv;
   1831 
   1832     mvsadcost[0] = x->mvsadcost[0];
   1833     mvsadcost[1] = x->mvsadcost[1];
   1834     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1835     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1836 
   1837     bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride)
   1838             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1839 
   1840     for (i=0; i<search_range; i++)
   1841     {
   1842         int best_site = -1;
   1843         int all_in = 1;
   1844 
   1845         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1846         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1847         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1848         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1849 
   1850         if(all_in)
   1851         {
   1852             unsigned int sad_array[4];
   1853             const unsigned char *block_offset[4];
   1854             block_offset[0] = best_address - in_what_stride;
   1855             block_offset[1] = best_address - 1;
   1856             block_offset[2] = best_address + 1;
   1857             block_offset[3] = best_address + in_what_stride;
   1858 
   1859             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1860 
   1861             for (j = 0; j < 4; j++)
   1862             {
   1863                 if (sad_array[j] < bestsad)
   1864                 {
   1865                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1866                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1867                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1868 
   1869                     if (sad_array[j] < bestsad)
   1870                     {
   1871                         bestsad = sad_array[j];
   1872                         best_site = j;
   1873                     }
   1874                 }
   1875             }
   1876         }
   1877         else
   1878         {
   1879             for (j = 0 ; j < 4 ; j++)
   1880             {
   1881                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1882                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1883 
   1884                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1885                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1886                 {
   1887                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1888                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride);
   1889 
   1890                     if (thissad < bestsad)
   1891                     {
   1892                         this_mv.as_mv.row = this_row_offset;
   1893                         this_mv.as_mv.col = this_col_offset;
   1894                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1895 
   1896                         if (thissad < bestsad)
   1897                         {
   1898                             bestsad = thissad;
   1899                             best_site = j;
   1900                         }
   1901                     }
   1902                 }
   1903             }
   1904         }
   1905 
   1906         if (best_site == -1)
   1907             break;
   1908         else
   1909         {
   1910             ref_mv->as_mv.row += neighbors[best_site].row;
   1911             ref_mv->as_mv.col += neighbors[best_site].col;
   1912             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1913         }
   1914     }
   1915 
   1916     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1917     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1918 
   1919     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1920            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1921 }
   1922 
   1923 #ifdef VP8_ENTROPY_STATS
   1924 void print_mode_context(void)
   1925 {
   1926     FILE *f = fopen("modecont.c", "w");
   1927     int i, j;
   1928 
   1929     fprintf(f, "#include \"entropy.h\"\n");
   1930     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1931     fprintf(f, "{\n");
   1932 
   1933     for (j = 0; j < 6; j++)
   1934     {
   1935         fprintf(f, "  { /* %d */\n", j);
   1936         fprintf(f, "    ");
   1937 
   1938         for (i = 0; i < 4; i++)
   1939         {
   1940             int overal_prob;
   1941             int this_prob;
   1942             int count;
   1943 
   1944             /* Overall probs */
   1945             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1946 
   1947             if (count)
   1948                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1949             else
   1950                 overal_prob = 128;
   1951 
   1952             if (overal_prob == 0)
   1953                 overal_prob = 1;
   1954 
   1955             /* context probs */
   1956             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1957 
   1958             if (count)
   1959                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1960             else
   1961                 this_prob = 128;
   1962 
   1963             if (this_prob == 0)
   1964                 this_prob = 1;
   1965 
   1966             fprintf(f, "%5d, ", this_prob);
   1967         }
   1968 
   1969         fprintf(f, "  },\n");
   1970     }
   1971 
   1972     fprintf(f, "};\n");
   1973     fclose(f);
   1974 }
   1975 
   1976 /* MV ref count VP8_ENTROPY_STATS stats code */
   1977 #ifdef VP8_ENTROPY_STATS
   1978 void init_mv_ref_counts()
   1979 {
   1980     memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1981     memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1982 }
   1983 
   1984 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1985 {
   1986     if (m == ZEROMV)
   1987     {
   1988         ++mv_ref_ct [ct[0]] [0] [0];
   1989         ++mv_mode_cts[0][0];
   1990     }
   1991     else
   1992     {
   1993         ++mv_ref_ct [ct[0]] [0] [1];
   1994         ++mv_mode_cts[0][1];
   1995 
   1996         if (m == NEARESTMV)
   1997         {
   1998             ++mv_ref_ct [ct[1]] [1] [0];
   1999             ++mv_mode_cts[1][0];
   2000         }
   2001         else
   2002         {
   2003             ++mv_ref_ct [ct[1]] [1] [1];
   2004             ++mv_mode_cts[1][1];
   2005 
   2006             if (m == NEARMV)
   2007             {
   2008                 ++mv_ref_ct [ct[2]] [2] [0];
   2009                 ++mv_mode_cts[2][0];
   2010             }
   2011             else
   2012             {
   2013                 ++mv_ref_ct [ct[2]] [2] [1];
   2014                 ++mv_mode_cts[2][1];
   2015 
   2016                 if (m == NEWMV)
   2017                 {
   2018                     ++mv_ref_ct [ct[3]] [3] [0];
   2019                     ++mv_mode_cts[3][0];
   2020                 }
   2021                 else
   2022                 {
   2023                     ++mv_ref_ct [ct[3]] [3] [1];
   2024                     ++mv_mode_cts[3][1];
   2025                 }
   2026             }
   2027         }
   2028     }
   2029 }
   2030 
   2031 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
   2032 
   2033 #endif
   2034