Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "onyx_int.h"
     13 #include "mcomp.h"
     14 #include "vpx_mem/vpx_mem.h"
     15 #include "vpx_config.h"
     16 #include <stdio.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include "vp8/common/findnearmv.h"
     20 
     21 #ifdef VP8_ENTROPY_STATS
     22 static int mv_ref_ct [31] [4] [2];
     23 static int mv_mode_cts [4] [2];
     24 #endif
     25 
     26 int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
     27 {
     28     /* MV costing is based on the distribution of vectors in the previous
     29      * frame and as such will tend to over state the cost of vectors. In
     30      * addition coding a new vector can have a knock on effect on the cost
     31      * of subsequent vectors and the quality of prediction from NEAR and
     32      * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
     33      * limited extent, for some account to be taken of these factors.
     34      */
     35     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
     36 }
     37 
     38 static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
     39 {
     40     /* Ignore mv costing if mvcost is NULL */
     41     if (mvcost)
     42         return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
     43                  mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
     44                  * error_per_bit + 128) >> 8;
     45     return 0;
     46 }
     47 
     48 static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
     49 {
     50     /* Calculate sad error cost on full pixel basis. */
     51     /* Ignore mv costing if mvsadcost is NULL */
     52     if (mvsadcost)
     53         return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
     54                  mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
     55                 * error_per_bit + 128) >> 8;
     56     return 0;
     57 }
     58 
     59 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
     60 {
     61     int Len;
     62     int search_site_count = 0;
     63 
     64 
     65     /* Generate offsets for 4 search sites per step. */
     66     Len = MAX_FIRST_STEP;
     67     x->ss[search_site_count].mv.col = 0;
     68     x->ss[search_site_count].mv.row = 0;
     69     x->ss[search_site_count].offset = 0;
     70     search_site_count++;
     71 
     72     while (Len > 0)
     73     {
     74 
     75         /* Compute offsets for search sites. */
     76         x->ss[search_site_count].mv.col = 0;
     77         x->ss[search_site_count].mv.row = -Len;
     78         x->ss[search_site_count].offset = -Len * stride;
     79         search_site_count++;
     80 
     81         /* Compute offsets for search sites. */
     82         x->ss[search_site_count].mv.col = 0;
     83         x->ss[search_site_count].mv.row = Len;
     84         x->ss[search_site_count].offset = Len * stride;
     85         search_site_count++;
     86 
     87         /* Compute offsets for search sites. */
     88         x->ss[search_site_count].mv.col = -Len;
     89         x->ss[search_site_count].mv.row = 0;
     90         x->ss[search_site_count].offset = -Len;
     91         search_site_count++;
     92 
     93         /* Compute offsets for search sites. */
     94         x->ss[search_site_count].mv.col = Len;
     95         x->ss[search_site_count].mv.row = 0;
     96         x->ss[search_site_count].offset = Len;
     97         search_site_count++;
     98 
     99         /* Contract. */
    100         Len /= 2;
    101     }
    102 
    103     x->ss_count = search_site_count;
    104     x->searches_per_step = 4;
    105 }
    106 
    107 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    108 {
    109     int Len;
    110     int search_site_count = 0;
    111 
    112     /* Generate offsets for 8 search sites per step. */
    113     Len = MAX_FIRST_STEP;
    114     x->ss[search_site_count].mv.col = 0;
    115     x->ss[search_site_count].mv.row = 0;
    116     x->ss[search_site_count].offset = 0;
    117     search_site_count++;
    118 
    119     while (Len > 0)
    120     {
    121 
    122         /* Compute offsets for search sites. */
    123         x->ss[search_site_count].mv.col = 0;
    124         x->ss[search_site_count].mv.row = -Len;
    125         x->ss[search_site_count].offset = -Len * stride;
    126         search_site_count++;
    127 
    128         /* Compute offsets for search sites. */
    129         x->ss[search_site_count].mv.col = 0;
    130         x->ss[search_site_count].mv.row = Len;
    131         x->ss[search_site_count].offset = Len * stride;
    132         search_site_count++;
    133 
    134         /* Compute offsets for search sites. */
    135         x->ss[search_site_count].mv.col = -Len;
    136         x->ss[search_site_count].mv.row = 0;
    137         x->ss[search_site_count].offset = -Len;
    138         search_site_count++;
    139 
    140         /* Compute offsets for search sites. */
    141         x->ss[search_site_count].mv.col = Len;
    142         x->ss[search_site_count].mv.row = 0;
    143         x->ss[search_site_count].offset = Len;
    144         search_site_count++;
    145 
    146         /* Compute offsets for search sites. */
    147         x->ss[search_site_count].mv.col = -Len;
    148         x->ss[search_site_count].mv.row = -Len;
    149         x->ss[search_site_count].offset = -Len * stride - Len;
    150         search_site_count++;
    151 
    152         /* Compute offsets for search sites. */
    153         x->ss[search_site_count].mv.col = Len;
    154         x->ss[search_site_count].mv.row = -Len;
    155         x->ss[search_site_count].offset = -Len * stride + Len;
    156         search_site_count++;
    157 
    158         /* Compute offsets for search sites. */
    159         x->ss[search_site_count].mv.col = -Len;
    160         x->ss[search_site_count].mv.row = Len;
    161         x->ss[search_site_count].offset = Len * stride - Len;
    162         search_site_count++;
    163 
    164         /* Compute offsets for search sites. */
    165         x->ss[search_site_count].mv.col = Len;
    166         x->ss[search_site_count].mv.row = Len;
    167         x->ss[search_site_count].offset = Len * stride + Len;
    168         search_site_count++;
    169 
    170 
    171         /* Contract. */
    172         Len /= 2;
    173     }
    174 
    175     x->ss_count = search_site_count;
    176     x->searches_per_step = 8;
    177 }
    178 
    179 /*
    180  * To avoid the penalty for crossing cache-line read, preload the reference
    181  * area in a small buffer, which is aligned to make sure there won't be crossing
    182  * cache-line read while reading from this buffer. This reduced the cpu
    183  * cycles spent on reading ref data in sub-pixel filter functions.
    184  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
    185  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
    186  * could reduce the area.
    187  */
    188 
    189 /* estimated cost of a motion vector (r,c) */
    190 #define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
    191 /* pointer to predictor base of a motionvector */
    192 #define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
    193 /* convert motion vector component to offset for svf calc */
    194 #define SP(x) (((x)&3)<<1)
    195 /* returns subpixel variance error function. */
    196 #define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
    197 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    198 /* returns distortion + motion vector cost */
    199 #define ERR(r,c) (MVC(r,c)+DIST(r,c))
    200 /* checks if (r,c) has better score than previous best */
    201 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
    202 
    203 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    204                                              int_mv *bestmv, int_mv *ref_mv,
    205                                              int error_per_bit,
    206                                              const vp8_variance_fn_ptr_t *vfp,
    207                                              int *mvcost[2], int *distortion,
    208                                              unsigned int *sse1)
    209 {
    210     unsigned char *z = (*(b->base_src) + b->src);
    211 
    212     int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
    213     int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
    214     int tr = br, tc = bc;
    215     unsigned int besterr;
    216     unsigned int left, right, up, down, diag;
    217     unsigned int sse;
    218     unsigned int whichdir;
    219     unsigned int halfiters = 4;
    220     unsigned int quarteriters = 4;
    221     int thismse;
    222 
    223     int minc = MAX(x->mv_col_min * 4,
    224                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
    225     int maxc = MIN(x->mv_col_max * 4,
    226                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
    227     int minr = MAX(x->mv_row_min * 4,
    228                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
    229     int maxr = MIN(x->mv_row_max * 4,
    230                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
    231 
    232     int y_stride;
    233     int offset;
    234     int pre_stride = x->e_mbd.pre.y_stride;
    235     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    236 
    237 
    238 #if ARCH_X86_32 || ARCH_X86_64
    239     MACROBLOCKD *xd = &x->e_mbd;
    240     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    241     unsigned char *y;
    242     int buf_r1, buf_r2, buf_c1;
    243 
    244     /* Clamping to avoid out-of-range data access */
    245     buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
    246     buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
    247     buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
    248     y_stride = 32;
    249 
    250     /* Copy to intermediate buffer before searching. */
    251     vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
    252     y = xd->y_buf + y_stride*buf_r1 +buf_c1;
    253 #else
    254     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    255     y_stride = pre_stride;
    256 #endif
    257 
    258     offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
    259 
    260     /* central mv */
    261     bestmv->as_mv.row *= 8;
    262     bestmv->as_mv.col *= 8;
    263 
    264     /* calculate central point error */
    265     besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    266     *distortion = besterr;
    267     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    268 
    269     /* TODO: Each subsequent iteration checks at least one point in common
    270      * with the last iteration could be 2 ( if diag selected)
    271      */
    272     while (--halfiters)
    273     {
    274         /* 1/2 pel */
    275         CHECK_BETTER(left, tr, tc - 2);
    276         CHECK_BETTER(right, tr, tc + 2);
    277         CHECK_BETTER(up, tr - 2, tc);
    278         CHECK_BETTER(down, tr + 2, tc);
    279 
    280         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    281 
    282         switch (whichdir)
    283         {
    284         case 0:
    285             CHECK_BETTER(diag, tr - 2, tc - 2);
    286             break;
    287         case 1:
    288             CHECK_BETTER(diag, tr - 2, tc + 2);
    289             break;
    290         case 2:
    291             CHECK_BETTER(diag, tr + 2, tc - 2);
    292             break;
    293         case 3:
    294             CHECK_BETTER(diag, tr + 2, tc + 2);
    295             break;
    296         }
    297 
    298         /* no reason to check the same one again. */
    299         if (tr == br && tc == bc)
    300             break;
    301 
    302         tr = br;
    303         tc = bc;
    304     }
    305 
    306     /* TODO: Each subsequent iteration checks at least one point in common
    307      * with the last iteration could be 2 ( if diag selected)
    308      */
    309 
    310     /* 1/4 pel */
    311     while (--quarteriters)
    312     {
    313         CHECK_BETTER(left, tr, tc - 1);
    314         CHECK_BETTER(right, tr, tc + 1);
    315         CHECK_BETTER(up, tr - 1, tc);
    316         CHECK_BETTER(down, tr + 1, tc);
    317 
    318         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    319 
    320         switch (whichdir)
    321         {
    322         case 0:
    323             CHECK_BETTER(diag, tr - 1, tc - 1);
    324             break;
    325         case 1:
    326             CHECK_BETTER(diag, tr - 1, tc + 1);
    327             break;
    328         case 2:
    329             CHECK_BETTER(diag, tr + 1, tc - 1);
    330             break;
    331         case 3:
    332             CHECK_BETTER(diag, tr + 1, tc + 1);
    333             break;
    334         }
    335 
    336         /* no reason to check the same one again. */
    337         if (tr == br && tc == bc)
    338             break;
    339 
    340         tr = br;
    341         tc = bc;
    342     }
    343 
    344     bestmv->as_mv.row = br * 2;
    345     bestmv->as_mv.col = bc * 2;
    346 
    347     if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
    348         (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
    349         return INT_MAX;
    350 
    351     return besterr;
    352 }
    353 #undef MVC
    354 #undef PRE
    355 #undef SP
    356 #undef DIST
    357 #undef IFMVCV
    358 #undef ERR
    359 #undef CHECK_BETTER
    360 
    361 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    362                                  int_mv *bestmv, int_mv *ref_mv,
    363                                  int error_per_bit,
    364                                  const vp8_variance_fn_ptr_t *vfp,
    365                                  int *mvcost[2], int *distortion,
    366                                  unsigned int *sse1)
    367 {
    368     int bestmse = INT_MAX;
    369     int_mv startmv;
    370     int_mv this_mv;
    371     unsigned char *z = (*(b->base_src) + b->src);
    372     int left, right, up, down, diag;
    373     unsigned int sse;
    374     int whichdir ;
    375     int thismse;
    376     int y_stride;
    377     int pre_stride = x->e_mbd.pre.y_stride;
    378     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    379 
    380 #if ARCH_X86_32 || ARCH_X86_64
    381     MACROBLOCKD *xd = &x->e_mbd;
    382     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    383     unsigned char *y;
    384 
    385     y_stride = 32;
    386     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    387      vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    388      y = xd->y_buf + y_stride + 1;
    389 #else
    390      unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    391      y_stride = pre_stride;
    392 #endif
    393 
    394     /* central mv */
    395     bestmv->as_mv.row <<= 3;
    396     bestmv->as_mv.col <<= 3;
    397     startmv = *bestmv;
    398 
    399     /* calculate central point error */
    400     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    401     *distortion = bestmse;
    402     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    403 
    404     /* go left then right and check error */
    405     this_mv.as_mv.row = startmv.as_mv.row;
    406     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    407     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    408     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    409 
    410     if (left < bestmse)
    411     {
    412         *bestmv = this_mv;
    413         bestmse = left;
    414         *distortion = thismse;
    415         *sse1 = sse;
    416     }
    417 
    418     this_mv.as_mv.col += 8;
    419     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    420     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    421 
    422     if (right < bestmse)
    423     {
    424         *bestmv = this_mv;
    425         bestmse = right;
    426         *distortion = thismse;
    427         *sse1 = sse;
    428     }
    429 
    430     /* go up then down and check error */
    431     this_mv.as_mv.col = startmv.as_mv.col;
    432     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    433     thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    434     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    435 
    436     if (up < bestmse)
    437     {
    438         *bestmv = this_mv;
    439         bestmse = up;
    440         *distortion = thismse;
    441         *sse1 = sse;
    442     }
    443 
    444     this_mv.as_mv.row += 8;
    445     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    446     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    447 
    448     if (down < bestmse)
    449     {
    450         *bestmv = this_mv;
    451         bestmse = down;
    452         *distortion = thismse;
    453         *sse1 = sse;
    454     }
    455 
    456 
    457     /* now check 1 more diagonal */
    458     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    459     this_mv = startmv;
    460 
    461     switch (whichdir)
    462     {
    463     case 0:
    464         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    465         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    466         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    467         break;
    468     case 1:
    469         this_mv.as_mv.col += 4;
    470         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    471         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    472         break;
    473     case 2:
    474         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    475         this_mv.as_mv.row += 4;
    476         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    477         break;
    478     case 3:
    479     default:
    480         this_mv.as_mv.col += 4;
    481         this_mv.as_mv.row += 4;
    482         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    483         break;
    484     }
    485 
    486     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    487 
    488     if (diag < bestmse)
    489     {
    490         *bestmv = this_mv;
    491         bestmse = diag;
    492         *distortion = thismse;
    493         *sse1 = sse;
    494     }
    495 
    496 
    497     /* time to check quarter pels. */
    498     if (bestmv->as_mv.row < startmv.as_mv.row)
    499         y -= y_stride;
    500 
    501     if (bestmv->as_mv.col < startmv.as_mv.col)
    502         y--;
    503 
    504     startmv = *bestmv;
    505 
    506 
    507 
    508     /* go left then right and check error */
    509     this_mv.as_mv.row = startmv.as_mv.row;
    510 
    511     if (startmv.as_mv.col & 7)
    512     {
    513         this_mv.as_mv.col = startmv.as_mv.col - 2;
    514         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    515     }
    516     else
    517     {
    518         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    519         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    520     }
    521 
    522     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    523 
    524     if (left < bestmse)
    525     {
    526         *bestmv = this_mv;
    527         bestmse = left;
    528         *distortion = thismse;
    529         *sse1 = sse;
    530     }
    531 
    532     this_mv.as_mv.col += 4;
    533     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    534     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    535 
    536     if (right < bestmse)
    537     {
    538         *bestmv = this_mv;
    539         bestmse = right;
    540         *distortion = thismse;
    541         *sse1 = sse;
    542     }
    543 
    544     /* go up then down and check error */
    545     this_mv.as_mv.col = startmv.as_mv.col;
    546 
    547     if (startmv.as_mv.row & 7)
    548     {
    549         this_mv.as_mv.row = startmv.as_mv.row - 2;
    550         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    551     }
    552     else
    553     {
    554         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    555         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    556     }
    557 
    558     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    559 
    560     if (up < bestmse)
    561     {
    562         *bestmv = this_mv;
    563         bestmse = up;
    564         *distortion = thismse;
    565         *sse1 = sse;
    566     }
    567 
    568     this_mv.as_mv.row += 4;
    569     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    570     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    571 
    572     if (down < bestmse)
    573     {
    574         *bestmv = this_mv;
    575         bestmse = down;
    576         *distortion = thismse;
    577         *sse1 = sse;
    578     }
    579 
    580 
    581     /* now check 1 more diagonal */
    582     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    583 
    584     this_mv = startmv;
    585 
    586     switch (whichdir)
    587     {
    588     case 0:
    589 
    590         if (startmv.as_mv.row & 7)
    591         {
    592             this_mv.as_mv.row -= 2;
    593 
    594             if (startmv.as_mv.col & 7)
    595             {
    596                 this_mv.as_mv.col -= 2;
    597                 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    598             }
    599             else
    600             {
    601                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    602                 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
    603             }
    604         }
    605         else
    606         {
    607             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    608 
    609             if (startmv.as_mv.col & 7)
    610             {
    611                 this_mv.as_mv.col -= 2;
    612                 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    613             }
    614             else
    615             {
    616                 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    617                 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
    618             }
    619         }
    620 
    621         break;
    622     case 1:
    623         this_mv.as_mv.col += 2;
    624 
    625         if (startmv.as_mv.row & 7)
    626         {
    627             this_mv.as_mv.row -= 2;
    628             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    629         }
    630         else
    631         {
    632             this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
    633             thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
    634         }
    635 
    636         break;
    637     case 2:
    638         this_mv.as_mv.row += 2;
    639 
    640         if (startmv.as_mv.col & 7)
    641         {
    642             this_mv.as_mv.col -= 2;
    643             thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    644         }
    645         else
    646         {
    647             this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
    648             thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    649         }
    650 
    651         break;
    652     case 3:
    653         this_mv.as_mv.col += 2;
    654         this_mv.as_mv.row += 2;
    655         thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
    656         break;
    657     }
    658 
    659     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    660 
    661     if (diag < bestmse)
    662     {
    663         *bestmv = this_mv;
    664         bestmse = diag;
    665         *distortion = thismse;
    666         *sse1 = sse;
    667     }
    668 
    669     return bestmse;
    670 }
    671 
    672 int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    673                                   int_mv *bestmv, int_mv *ref_mv,
    674                                   int error_per_bit,
    675                                   const vp8_variance_fn_ptr_t *vfp,
    676                                   int *mvcost[2], int *distortion,
    677                                   unsigned int *sse1)
    678 {
    679     int bestmse = INT_MAX;
    680     int_mv startmv;
    681     int_mv this_mv;
    682     unsigned char *z = (*(b->base_src) + b->src);
    683     int left, right, up, down, diag;
    684     unsigned int sse;
    685     int whichdir ;
    686     int thismse;
    687     int y_stride;
    688     int pre_stride = x->e_mbd.pre.y_stride;
    689     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    690 
    691 #if ARCH_X86_32 || ARCH_X86_64
    692     MACROBLOCKD *xd = &x->e_mbd;
    693     unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    694     unsigned char *y;
    695 
    696     y_stride = 32;
    697     /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
    698     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
    699     y = xd->y_buf + y_stride + 1;
    700 #else
    701     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
    702     y_stride = pre_stride;
    703 #endif
    704 
    705     /* central mv */
    706     bestmv->as_mv.row *= 8;
    707     bestmv->as_mv.col *= 8;
    708     startmv = *bestmv;
    709 
    710     /* calculate central point error */
    711     bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
    712     *distortion = bestmse;
    713     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    714 
    715     /* go left then right and check error */
    716     this_mv.as_mv.row = startmv.as_mv.row;
    717     this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
    718     thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
    719     left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    720 
    721     if (left < bestmse)
    722     {
    723         *bestmv = this_mv;
    724         bestmse = left;
    725         *distortion = thismse;
    726         *sse1 = sse;
    727     }
    728 
    729     this_mv.as_mv.col += 8;
    730     thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
    731     right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    732 
    733     if (right < bestmse)
    734     {
    735         *bestmv = this_mv;
    736         bestmse = right;
    737         *distortion = thismse;
    738         *sse1 = sse;
    739     }
    740 
    741     /* go up then down and check error */
    742     this_mv.as_mv.col = startmv.as_mv.col;
    743     this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
    744     thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
    745     up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    746 
    747     if (up < bestmse)
    748     {
    749         *bestmv = this_mv;
    750         bestmse = up;
    751         *distortion = thismse;
    752         *sse1 = sse;
    753     }
    754 
    755     this_mv.as_mv.row += 8;
    756     thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
    757     down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    758 
    759     if (down < bestmse)
    760     {
    761         *bestmv = this_mv;
    762         bestmse = down;
    763         *distortion = thismse;
    764         *sse1 = sse;
    765     }
    766 
    767     /* now check 1 more diagonal - */
    768     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    769     this_mv = startmv;
    770 
    771     switch (whichdir)
    772     {
    773     case 0:
    774         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    775         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    776         thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
    777         break;
    778     case 1:
    779         this_mv.as_mv.col += 4;
    780         this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
    781         thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
    782         break;
    783     case 2:
    784         this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
    785         this_mv.as_mv.row += 4;
    786         thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
    787         break;
    788     case 3:
    789     default:
    790         this_mv.as_mv.col += 4;
    791         this_mv.as_mv.row += 4;
    792         thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
    793         break;
    794     }
    795 
    796     diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    797 
    798     if (diag < bestmse)
    799     {
    800         *bestmv = this_mv;
    801         bestmse = diag;
    802         *distortion = thismse;
    803         *sse1 = sse;
    804     }
    805 
    806     return bestmse;
    807 }
    808 
    809 #define CHECK_BOUNDS(range) \
    810 {\
    811     all_in = 1;\
    812     all_in &= ((br-range) >= x->mv_row_min);\
    813     all_in &= ((br+range) <= x->mv_row_max);\
    814     all_in &= ((bc-range) >= x->mv_col_min);\
    815     all_in &= ((bc+range) <= x->mv_col_max);\
    816 }
    817 
    818 #define CHECK_POINT \
    819 {\
    820     if (this_mv.as_mv.col < x->mv_col_min) continue;\
    821     if (this_mv.as_mv.col > x->mv_col_max) continue;\
    822     if (this_mv.as_mv.row < x->mv_row_min) continue;\
    823     if (this_mv.as_mv.row > x->mv_row_max) continue;\
    824 }
    825 
    826 #define CHECK_BETTER \
    827 {\
    828     if (thissad < bestsad)\
    829     {\
    830         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
    831         if (thissad < bestsad)\
    832         {\
    833             bestsad = thissad;\
    834             best_site = i;\
    835         }\
    836     }\
    837 }
    838 
    839 static const MV next_chkpts[6][3] =
    840 {
    841     {{ -2, 0}, { -1, -2}, {1, -2}},
    842     {{ -1, -2}, {1, -2}, {2, 0}},
    843     {{1, -2}, {2, 0}, {1, 2}},
    844     {{2, 0}, {1, 2}, { -1, 2}},
    845     {{1, 2}, { -1, 2}, { -2, 0}},
    846     {{ -1, 2}, { -2, 0}, { -1, -2}}
    847 };
    848 
    849 int vp8_hex_search
    850 (
    851     MACROBLOCK *x,
    852     BLOCK *b,
    853     BLOCKD *d,
    854     int_mv *ref_mv,
    855     int_mv *best_mv,
    856     int search_param,
    857     int sad_per_bit,
    858     const vp8_variance_fn_ptr_t *vfp,
    859     int *mvsadcost[2],
    860     int *mvcost[2],
    861     int_mv *center_mv
    862 )
    863 {
    864     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    865     MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
    866     int i, j;
    867 
    868     unsigned char *what = (*(b->base_src) + b->src);
    869     int what_stride = b->src_stride;
    870     int pre_stride = x->e_mbd.pre.y_stride;
    871     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    872 
    873     int in_what_stride = pre_stride;
    874     int br, bc;
    875     int_mv this_mv;
    876     unsigned int bestsad;
    877     unsigned int thissad;
    878     unsigned char *base_offset;
    879     unsigned char *this_offset;
    880     int k = -1;
    881     int all_in;
    882     int best_site = -1;
    883     int hex_range = 127;
    884     int dia_range = 8;
    885 
    886     int_mv fcenter_mv;
    887     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    888     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    889 
    890     (void)mvcost;
    891     (void)search_param;
    892     /* adjust ref_mv to make sure it is within MV range */
    893     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
    894     br = ref_mv->as_mv.row;
    895     bc = ref_mv->as_mv.col;
    896 
    897     /* Work out the start point for the search */
    898     base_offset = (unsigned char *)(base_pre + d->offset);
    899     this_offset = base_offset + (br * (pre_stride)) + bc;
    900     this_mv.as_mv.row = br;
    901     this_mv.as_mv.col = bc;
    902     bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
    903             + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    904 
    905 #if CONFIG_MULTI_RES_ENCODING
    906     /* Lower search range based on prediction info */
    907     if (search_param >= 6) goto cal_neighbors;
    908     else if (search_param >= 5) hex_range = 4;
    909     else if (search_param >= 4) hex_range = 6;
    910     else if (search_param >= 3) hex_range = 15;
    911     else if (search_param >= 2) hex_range = 31;
    912     else if (search_param >= 1) hex_range = 63;
    913 
    914     dia_range = 8;
    915 #endif
    916 
    917     /* hex search */
    918     CHECK_BOUNDS(2)
    919 
    920     if(all_in)
    921     {
    922         for (i = 0; i < 6; i++)
    923         {
    924             this_mv.as_mv.row = br + hex[i].row;
    925             this_mv.as_mv.col = bc + hex[i].col;
    926             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    927             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    928             CHECK_BETTER
    929         }
    930     }else
    931     {
    932         for (i = 0; i < 6; i++)
    933         {
    934             this_mv.as_mv.row = br + hex[i].row;
    935             this_mv.as_mv.col = bc + hex[i].col;
    936             CHECK_POINT
    937             this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
    938             thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    939             CHECK_BETTER
    940         }
    941     }
    942 
    943     if (best_site == -1)
    944         goto cal_neighbors;
    945     else
    946     {
    947         br += hex[best_site].row;
    948         bc += hex[best_site].col;
    949         k = best_site;
    950     }
    951 
    952     for (j = 1; j < hex_range; j++)
    953     {
    954         best_site = -1;
    955         CHECK_BOUNDS(2)
    956 
    957         if(all_in)
    958         {
    959             for (i = 0; i < 3; i++)
    960             {
    961                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    962                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    963                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    964                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    965                 CHECK_BETTER
    966             }
    967         }else
    968         {
    969             for (i = 0; i < 3; i++)
    970             {
    971                 this_mv.as_mv.row = br + next_chkpts[k][i].row;
    972                 this_mv.as_mv.col = bc + next_chkpts[k][i].col;
    973                 CHECK_POINT
    974                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
    975                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
    976                 CHECK_BETTER
    977             }
    978         }
    979 
    980         if (best_site == -1)
    981             break;
    982         else
    983         {
    984             br += next_chkpts[k][best_site].row;
    985             bc += next_chkpts[k][best_site].col;
    986             k += 5 + best_site;
    987             if (k >= 12) k -= 12;
    988             else if (k >= 6) k -= 6;
    989         }
    990     }
    991 
    992     /* check 4 1-away neighbors */
    993 cal_neighbors:
    994     for (j = 0; j < dia_range; j++)
    995     {
    996         best_site = -1;
    997         CHECK_BOUNDS(1)
    998 
    999         if(all_in)
   1000         {
   1001             for (i = 0; i < 4; i++)
   1002             {
   1003                 this_mv.as_mv.row = br + neighbors[i].row;
   1004                 this_mv.as_mv.col = bc + neighbors[i].col;
   1005                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1006                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1007                 CHECK_BETTER
   1008             }
   1009         }else
   1010         {
   1011             for (i = 0; i < 4; i++)
   1012             {
   1013                 this_mv.as_mv.row = br + neighbors[i].row;
   1014                 this_mv.as_mv.col = bc + neighbors[i].col;
   1015                 CHECK_POINT
   1016                 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1017                 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1018                 CHECK_BETTER
   1019             }
   1020         }
   1021 
   1022         if (best_site == -1)
   1023             break;
   1024         else
   1025         {
   1026             br += neighbors[best_site].row;
   1027             bc += neighbors[best_site].col;
   1028         }
   1029     }
   1030 
   1031     best_mv->as_mv.row = br;
   1032     best_mv->as_mv.col = bc;
   1033 
   1034     return bestsad;
   1035 }
   1036 #undef CHECK_BOUNDS
   1037 #undef CHECK_POINT
   1038 #undef CHECK_BETTER
   1039 
   1040 int vp8_diamond_search_sad_c
   1041 (
   1042     MACROBLOCK *x,
   1043     BLOCK *b,
   1044     BLOCKD *d,
   1045     int_mv *ref_mv,
   1046     int_mv *best_mv,
   1047     int search_param,
   1048     int sad_per_bit,
   1049     int *num00,
   1050     vp8_variance_fn_ptr_t *fn_ptr,
   1051     int *mvcost[2],
   1052     int_mv *center_mv
   1053 )
   1054 {
   1055     int i, j, step;
   1056 
   1057     unsigned char *what = (*(b->base_src) + b->src);
   1058     int what_stride = b->src_stride;
   1059     unsigned char *in_what;
   1060     int pre_stride = x->e_mbd.pre.y_stride;
   1061     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1062     int in_what_stride = pre_stride;
   1063     unsigned char *best_address;
   1064 
   1065     int tot_steps;
   1066     int_mv this_mv;
   1067 
   1068     unsigned int bestsad;
   1069     unsigned int thissad;
   1070     int best_site = 0;
   1071     int last_site = 0;
   1072 
   1073     int ref_row;
   1074     int ref_col;
   1075     int this_row_offset;
   1076     int this_col_offset;
   1077     search_site *ss;
   1078 
   1079     unsigned char *check_here;
   1080 
   1081     int *mvsadcost[2];
   1082     int_mv fcenter_mv;
   1083 
   1084     mvsadcost[0] = x->mvsadcost[0];
   1085     mvsadcost[1] = x->mvsadcost[1];
   1086     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1087     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1088 
   1089     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1090     ref_row = ref_mv->as_mv.row;
   1091     ref_col = ref_mv->as_mv.col;
   1092     *num00 = 0;
   1093     best_mv->as_mv.row = ref_row;
   1094     best_mv->as_mv.col = ref_col;
   1095 
   1096     /* Work out the start point for the search */
   1097     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1098     best_address = in_what;
   1099 
   1100     /* Check the starting position */
   1101     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1102             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1103 
   1104     /* search_param determines the length of the initial step and hence
   1105      * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
   1106      * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1107      */
   1108     ss = &x->ss[search_param * x->searches_per_step];
   1109     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1110 
   1111     i = 1;
   1112 
   1113     for (step = 0; step < tot_steps ; step++)
   1114     {
   1115         for (j = 0 ; j < x->searches_per_step ; j++)
   1116         {
   1117             /* Trap illegal vectors */
   1118             this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1119             this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1120 
   1121             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1122             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1123 
   1124             {
   1125                 check_here = ss[i].offset + best_address;
   1126                 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1127 
   1128                 if (thissad < bestsad)
   1129                 {
   1130                     this_mv.as_mv.row = this_row_offset;
   1131                     this_mv.as_mv.col = this_col_offset;
   1132                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1133                                               mvsadcost, sad_per_bit);
   1134 
   1135                     if (thissad < bestsad)
   1136                     {
   1137                         bestsad = thissad;
   1138                         best_site = i;
   1139                     }
   1140                 }
   1141             }
   1142 
   1143             i++;
   1144         }
   1145 
   1146         if (best_site != last_site)
   1147         {
   1148             best_mv->as_mv.row += ss[best_site].mv.row;
   1149             best_mv->as_mv.col += ss[best_site].mv.col;
   1150             best_address += ss[best_site].offset;
   1151             last_site = best_site;
   1152         }
   1153         else if (best_address == in_what)
   1154             (*num00)++;
   1155     }
   1156 
   1157     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1158     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1159 
   1160     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1161            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1162 }
   1163 
   1164 int vp8_diamond_search_sadx4
   1165 (
   1166     MACROBLOCK *x,
   1167     BLOCK *b,
   1168     BLOCKD *d,
   1169     int_mv *ref_mv,
   1170     int_mv *best_mv,
   1171     int search_param,
   1172     int sad_per_bit,
   1173     int *num00,
   1174     vp8_variance_fn_ptr_t *fn_ptr,
   1175     int *mvcost[2],
   1176     int_mv *center_mv
   1177 )
   1178 {
   1179     int i, j, step;
   1180 
   1181     unsigned char *what = (*(b->base_src) + b->src);
   1182     int what_stride = b->src_stride;
   1183     unsigned char *in_what;
   1184     int pre_stride = x->e_mbd.pre.y_stride;
   1185     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1186     int in_what_stride = pre_stride;
   1187     unsigned char *best_address;
   1188 
   1189     int tot_steps;
   1190     int_mv this_mv;
   1191 
   1192     unsigned int bestsad;
   1193     unsigned int thissad;
   1194     int best_site = 0;
   1195     int last_site = 0;
   1196 
   1197     int ref_row;
   1198     int ref_col;
   1199     int this_row_offset;
   1200     int this_col_offset;
   1201     search_site *ss;
   1202 
   1203     unsigned char *check_here;
   1204 
   1205     int *mvsadcost[2];
   1206     int_mv fcenter_mv;
   1207 
   1208     mvsadcost[0] = x->mvsadcost[0];
   1209     mvsadcost[1] = x->mvsadcost[1];
   1210     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1211     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1212 
   1213     vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1214     ref_row = ref_mv->as_mv.row;
   1215     ref_col = ref_mv->as_mv.col;
   1216     *num00 = 0;
   1217     best_mv->as_mv.row = ref_row;
   1218     best_mv->as_mv.col = ref_col;
   1219 
   1220     /* Work out the start point for the search */
   1221     in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
   1222     best_address = in_what;
   1223 
   1224     /* Check the starting position */
   1225     bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
   1226             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1227 
   1228     /* search_param determines the length of the initial step and hence the
   1229      * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
   1230      * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1231      */
   1232     ss = &x->ss[search_param * x->searches_per_step];
   1233     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1234 
   1235     i = 1;
   1236 
   1237     for (step = 0; step < tot_steps ; step++)
   1238     {
   1239         int all_in = 1, t;
   1240 
   1241         /* To know if all neighbor points are within the bounds, 4 bounds
   1242          * checking are enough instead of checking 4 bounds for each
   1243          * points.
   1244          */
   1245         all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
   1246         all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
   1247         all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
   1248         all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
   1249 
   1250         if (all_in)
   1251         {
   1252             unsigned int sad_array[4];
   1253 
   1254             for (j = 0 ; j < x->searches_per_step ; j += 4)
   1255             {
   1256                 const unsigned char *block_offset[4];
   1257 
   1258                 for (t = 0; t < 4; t++)
   1259                     block_offset[t] = ss[i+t].offset + best_address;
   1260 
   1261                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1262 
   1263                 for (t = 0; t < 4; t++, i++)
   1264                 {
   1265                     if (sad_array[t] < bestsad)
   1266                     {
   1267                         this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
   1268                         this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
   1269                         sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
   1270                                                        mvsadcost, sad_per_bit);
   1271 
   1272                         if (sad_array[t] < bestsad)
   1273                         {
   1274                             bestsad = sad_array[t];
   1275                             best_site = i;
   1276                         }
   1277                     }
   1278                 }
   1279             }
   1280         }
   1281         else
   1282         {
   1283             for (j = 0 ; j < x->searches_per_step ; j++)
   1284             {
   1285                 /* Trap illegal vectors */
   1286                 this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
   1287                 this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
   1288 
   1289                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1290                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1291                 {
   1292                     check_here = ss[i].offset + best_address;
   1293                     thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1294 
   1295                     if (thissad < bestsad)
   1296                     {
   1297                         this_mv.as_mv.row = this_row_offset;
   1298                         this_mv.as_mv.col = this_col_offset;
   1299                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1300                                                   mvsadcost, sad_per_bit);
   1301 
   1302                         if (thissad < bestsad)
   1303                         {
   1304                             bestsad = thissad;
   1305                             best_site = i;
   1306                         }
   1307                     }
   1308                 }
   1309                 i++;
   1310             }
   1311         }
   1312 
   1313         if (best_site != last_site)
   1314         {
   1315             best_mv->as_mv.row += ss[best_site].mv.row;
   1316             best_mv->as_mv.col += ss[best_site].mv.col;
   1317             best_address += ss[best_site].offset;
   1318             last_site = best_site;
   1319         }
   1320         else if (best_address == in_what)
   1321             (*num00)++;
   1322     }
   1323 
   1324     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1325     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1326 
   1327     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1328            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1329 }
   1330 
   1331 int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1332                         int sad_per_bit, int distance,
   1333                         vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1334                         int_mv *center_mv)
   1335 {
   1336     unsigned char *what = (*(b->base_src) + b->src);
   1337     int what_stride = b->src_stride;
   1338     unsigned char *in_what;
   1339     int pre_stride = x->e_mbd.pre.y_stride;
   1340     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1341     int in_what_stride = pre_stride;
   1342     int mv_stride = pre_stride;
   1343     unsigned char *bestaddress;
   1344     int_mv *best_mv = &d->bmi.mv;
   1345     int_mv this_mv;
   1346     unsigned int bestsad;
   1347     unsigned int thissad;
   1348     int r, c;
   1349 
   1350     unsigned char *check_here;
   1351 
   1352     int ref_row = ref_mv->as_mv.row;
   1353     int ref_col = ref_mv->as_mv.col;
   1354 
   1355     int row_min = ref_row - distance;
   1356     int row_max = ref_row + distance;
   1357     int col_min = ref_col - distance;
   1358     int col_max = ref_col + distance;
   1359 
   1360     int *mvsadcost[2];
   1361     int_mv fcenter_mv;
   1362 
   1363     mvsadcost[0] = x->mvsadcost[0];
   1364     mvsadcost[1] = x->mvsadcost[1];
   1365     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1366     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1367 
   1368     /* Work out the mid point for the search */
   1369     in_what = base_pre + d->offset;
   1370     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1371 
   1372     best_mv->as_mv.row = ref_row;
   1373     best_mv->as_mv.col = ref_col;
   1374 
   1375     /* Baseline value at the centre */
   1376     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1377                           in_what_stride, UINT_MAX)
   1378             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1379 
   1380     /* Apply further limits to prevent us looking using vectors that
   1381      * stretch beyiond the UMV border
   1382      */
   1383     if (col_min < x->mv_col_min)
   1384         col_min = x->mv_col_min;
   1385 
   1386     if (col_max > x->mv_col_max)
   1387         col_max = x->mv_col_max;
   1388 
   1389     if (row_min < x->mv_row_min)
   1390         row_min = x->mv_row_min;
   1391 
   1392     if (row_max > x->mv_row_max)
   1393         row_max = x->mv_row_max;
   1394 
   1395     for (r = row_min; r < row_max ; r++)
   1396     {
   1397         this_mv.as_mv.row = r;
   1398         check_here = r * mv_stride + in_what + col_min;
   1399 
   1400         for (c = col_min; c < col_max; c++)
   1401         {
   1402             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1403 
   1404             this_mv.as_mv.col = c;
   1405             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1406                                       mvsadcost, sad_per_bit);
   1407 
   1408             if (thissad < bestsad)
   1409             {
   1410                 bestsad = thissad;
   1411                 best_mv->as_mv.row = r;
   1412                 best_mv->as_mv.col = c;
   1413                 bestaddress = check_here;
   1414             }
   1415 
   1416             check_here++;
   1417         }
   1418     }
   1419 
   1420     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1421     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1422 
   1423     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1424            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1425 }
   1426 
   1427 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1428                           int sad_per_bit, int distance,
   1429                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1430                           int_mv *center_mv)
   1431 {
   1432     unsigned char *what = (*(b->base_src) + b->src);
   1433     int what_stride = b->src_stride;
   1434     unsigned char *in_what;
   1435     int pre_stride = x->e_mbd.pre.y_stride;
   1436     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1437     int in_what_stride = pre_stride;
   1438     int mv_stride = pre_stride;
   1439     unsigned char *bestaddress;
   1440     int_mv *best_mv = &d->bmi.mv;
   1441     int_mv this_mv;
   1442     unsigned int bestsad;
   1443     unsigned int thissad;
   1444     int r, c;
   1445 
   1446     unsigned char *check_here;
   1447 
   1448     int ref_row = ref_mv->as_mv.row;
   1449     int ref_col = ref_mv->as_mv.col;
   1450 
   1451     int row_min = ref_row - distance;
   1452     int row_max = ref_row + distance;
   1453     int col_min = ref_col - distance;
   1454     int col_max = ref_col + distance;
   1455 
   1456     unsigned int sad_array[3];
   1457 
   1458     int *mvsadcost[2];
   1459     int_mv fcenter_mv;
   1460 
   1461     mvsadcost[0] = x->mvsadcost[0];
   1462     mvsadcost[1] = x->mvsadcost[1];
   1463     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1464     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1465 
   1466     /* Work out the mid point for the search */
   1467     in_what = base_pre + d->offset;
   1468     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1469 
   1470     best_mv->as_mv.row = ref_row;
   1471     best_mv->as_mv.col = ref_col;
   1472 
   1473     /* Baseline value at the centre */
   1474     bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
   1475                           in_what_stride, UINT_MAX)
   1476             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1477 
   1478     /* Apply further limits to prevent us looking using vectors that stretch
   1479      * beyond the UMV border
   1480      */
   1481     if (col_min < x->mv_col_min)
   1482         col_min = x->mv_col_min;
   1483 
   1484     if (col_max > x->mv_col_max)
   1485         col_max = x->mv_col_max;
   1486 
   1487     if (row_min < x->mv_row_min)
   1488         row_min = x->mv_row_min;
   1489 
   1490     if (row_max > x->mv_row_max)
   1491         row_max = x->mv_row_max;
   1492 
   1493     for (r = row_min; r < row_max ; r++)
   1494     {
   1495         this_mv.as_mv.row = r;
   1496         check_here = r * mv_stride + in_what + col_min;
   1497         c = col_min;
   1498 
   1499         while ((c + 2) < col_max)
   1500         {
   1501             int i;
   1502 
   1503             fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
   1504 
   1505             for (i = 0; i < 3; i++)
   1506             {
   1507                 thissad = sad_array[i];
   1508 
   1509                 if (thissad < bestsad)
   1510                 {
   1511                     this_mv.as_mv.col = c;
   1512                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1513                                               mvsadcost, sad_per_bit);
   1514 
   1515                     if (thissad < bestsad)
   1516                     {
   1517                         bestsad = thissad;
   1518                         best_mv->as_mv.row = r;
   1519                         best_mv->as_mv.col = c;
   1520                         bestaddress = check_here;
   1521                     }
   1522                 }
   1523 
   1524                 check_here++;
   1525                 c++;
   1526             }
   1527         }
   1528 
   1529         while (c < col_max)
   1530         {
   1531             thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
   1532 
   1533             if (thissad < bestsad)
   1534             {
   1535                 this_mv.as_mv.col = c;
   1536                 thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1537                                           mvsadcost, sad_per_bit);
   1538 
   1539                 if (thissad < bestsad)
   1540                 {
   1541                     bestsad = thissad;
   1542                     best_mv->as_mv.row = r;
   1543                     best_mv->as_mv.col = c;
   1544                     bestaddress = check_here;
   1545                 }
   1546             }
   1547 
   1548             check_here ++;
   1549             c ++;
   1550         }
   1551 
   1552     }
   1553 
   1554     this_mv.as_mv.row = best_mv->as_mv.row << 3;
   1555     this_mv.as_mv.col = best_mv->as_mv.col << 3;
   1556 
   1557     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1558            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1559 }
   1560 
   1561 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1562                           int sad_per_bit, int distance,
   1563                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1564                           int_mv *center_mv)
   1565 {
   1566     unsigned char *what = (*(b->base_src) + b->src);
   1567     int what_stride = b->src_stride;
   1568     int pre_stride = x->e_mbd.pre.y_stride;
   1569     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1570     unsigned char *in_what;
   1571     int in_what_stride = pre_stride;
   1572     int mv_stride = pre_stride;
   1573     unsigned char *bestaddress;
   1574     int_mv *best_mv = &d->bmi.mv;
   1575     int_mv this_mv;
   1576     unsigned int bestsad;
   1577     unsigned int thissad;
   1578     int r, c;
   1579 
   1580     unsigned char *check_here;
   1581 
   1582     int ref_row = ref_mv->as_mv.row;
   1583     int ref_col = ref_mv->as_mv.col;
   1584 
   1585     int row_min = ref_row - distance;
   1586     int row_max = ref_row + distance;
   1587     int col_min = ref_col - distance;
   1588     int col_max = ref_col + distance;
   1589 
   1590     DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
   1591     unsigned int sad_array[3];
   1592 
   1593     int *mvsadcost[2];
   1594     int_mv fcenter_mv;
   1595 
   1596     mvsadcost[0] = x->mvsadcost[0];
   1597     mvsadcost[1] = x->mvsadcost[1];
   1598     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1599     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1600 
   1601     /* Work out the mid point for the search */
   1602     in_what = base_pre + d->offset;
   1603     bestaddress = in_what + (ref_row * pre_stride) + ref_col;
   1604 
   1605     best_mv->as_mv.row = ref_row;
   1606     best_mv->as_mv.col = ref_col;
   1607 
   1608     /* Baseline value at the centre */
   1609     bestsad = fn_ptr->sdf(what, what_stride,
   1610                           bestaddress, in_what_stride, UINT_MAX)
   1611             + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1612 
   1613     /* Apply further limits to prevent us looking using vectors that stretch
   1614      * beyond the UMV border
   1615      */
   1616     if (col_min < x->mv_col_min)
   1617         col_min = x->mv_col_min;
   1618 
   1619     if (col_max > x->mv_col_max)
   1620         col_max = x->mv_col_max;
   1621 
   1622     if (row_min < x->mv_row_min)
   1623         row_min = x->mv_row_min;
   1624 
   1625     if (row_max > x->mv_row_max)
   1626         row_max = x->mv_row_max;
   1627 
   1628     for (r = row_min; r < row_max ; r++)
   1629     {
   1630         this_mv.as_mv.row = r;
   1631         check_here = r * mv_stride + in_what + col_min;
   1632         c = col_min;
   1633 
   1634         while ((c + 7) < col_max)
   1635         {
   1636             int i;
   1637 
   1638             fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
   1639 
   1640             for (i = 0; i < 8; i++)
   1641             {
   1642                 thissad = sad_array8[i];
   1643 
   1644                 if (thissad < bestsad)
   1645                 {
   1646                     this_mv.as_mv.col = c;
   1647                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
   1648                                               mvsadcost, sad_per_bit);
   1649 
   1650                     if (thissad < bestsad)
   1651                     {
   1652                         bestsad = thissad;
   1653                         best_mv->as_mv.row = r;
   1654                         best_mv->as_mv.col = c;
   1655                         bestaddress = check_here;
   1656                     }
   1657                 }
   1658 
   1659                 check_here++;
   1660                 c++;
   1661             }
   1662         }
   1663 
   1664         while ((c + 2) < col_max)
   1665         {
   1666             int i;
   1667 
   1668             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1669 
   1670             for (i = 0; i < 3; i++)
   1671             {
   1672                 thissad = sad_array[i];
   1673 
   1674                 if (thissad < bestsad)
   1675                 {
   1676                     this_mv.as_mv.col = c;
   1677                     thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1678                         mvsadcost, sad_per_bit);
   1679 
   1680                     if (thissad < bestsad)
   1681                     {
   1682                         bestsad = thissad;
   1683                         best_mv->as_mv.row = r;
   1684                         best_mv->as_mv.col = c;
   1685                         bestaddress = check_here;
   1686                     }
   1687                 }
   1688 
   1689                 check_here++;
   1690                 c++;
   1691             }
   1692         }
   1693 
   1694         while (c < col_max)
   1695         {
   1696             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1697 
   1698             if (thissad < bestsad)
   1699             {
   1700                 this_mv.as_mv.col = c;
   1701                 thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
   1702                     mvsadcost, sad_per_bit);
   1703 
   1704                 if (thissad < bestsad)
   1705                 {
   1706                     bestsad = thissad;
   1707                     best_mv->as_mv.row = r;
   1708                     best_mv->as_mv.col = c;
   1709                     bestaddress = check_here;
   1710                 }
   1711             }
   1712 
   1713             check_here ++;
   1714             c ++;
   1715         }
   1716     }
   1717 
   1718     this_mv.as_mv.row = best_mv->as_mv.row * 8;
   1719     this_mv.as_mv.col = best_mv->as_mv.col * 8;
   1720 
   1721     return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
   1722            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1723 }
   1724 
   1725 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
   1726                             int error_per_bit, int search_range,
   1727                             vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
   1728                             int_mv *center_mv)
   1729 {
   1730     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1731     int i, j;
   1732     short this_row_offset, this_col_offset;
   1733 
   1734     int what_stride = b->src_stride;
   1735     int pre_stride = x->e_mbd.pre.y_stride;
   1736     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1737     int in_what_stride = pre_stride;
   1738     unsigned char *what = (*(b->base_src) + b->src);
   1739     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1740         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1741     unsigned char *check_here;
   1742     int_mv this_mv;
   1743     unsigned int bestsad;
   1744     unsigned int thissad;
   1745 
   1746     int *mvsadcost[2];
   1747     int_mv fcenter_mv;
   1748 
   1749     mvsadcost[0] = x->mvsadcost[0];
   1750     mvsadcost[1] = x->mvsadcost[1];
   1751     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1752     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1753 
   1754     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1755                           in_what_stride, UINT_MAX)
   1756             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1757 
   1758     for (i=0; i<search_range; i++)
   1759     {
   1760         int best_site = -1;
   1761 
   1762         for (j = 0 ; j < 4 ; j++)
   1763         {
   1764             this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1765             this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1766 
   1767             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1768             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1769             {
   1770                 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1771                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1772 
   1773                 if (thissad < bestsad)
   1774                 {
   1775                     this_mv.as_mv.row = this_row_offset;
   1776                     this_mv.as_mv.col = this_col_offset;
   1777                     thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1778 
   1779                     if (thissad < bestsad)
   1780                     {
   1781                         bestsad = thissad;
   1782                         best_site = j;
   1783                     }
   1784                 }
   1785             }
   1786         }
   1787 
   1788         if (best_site == -1)
   1789             break;
   1790         else
   1791         {
   1792             ref_mv->as_mv.row += neighbors[best_site].row;
   1793             ref_mv->as_mv.col += neighbors[best_site].col;
   1794             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1795         }
   1796     }
   1797 
   1798     this_mv.as_mv.row = ref_mv->as_mv.row << 3;
   1799     this_mv.as_mv.col = ref_mv->as_mv.col << 3;
   1800 
   1801     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1802            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1803 }
   1804 
   1805 int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1806                               int_mv *ref_mv, int error_per_bit,
   1807                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
   1808                               int *mvcost[2], int_mv *center_mv)
   1809 {
   1810     MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
   1811     int i, j;
   1812     short this_row_offset, this_col_offset;
   1813 
   1814     int what_stride = b->src_stride;
   1815     int pre_stride = x->e_mbd.pre.y_stride;
   1816     unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1817     int in_what_stride = pre_stride;
   1818     unsigned char *what = (*(b->base_src) + b->src);
   1819     unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
   1820         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
   1821     unsigned char *check_here;
   1822     int_mv this_mv;
   1823     unsigned int bestsad;
   1824     unsigned int thissad;
   1825 
   1826     int *mvsadcost[2];
   1827     int_mv fcenter_mv;
   1828 
   1829     mvsadcost[0] = x->mvsadcost[0];
   1830     mvsadcost[1] = x->mvsadcost[1];
   1831     fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1832     fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1833 
   1834     bestsad = fn_ptr->sdf(what, what_stride, best_address,
   1835                           in_what_stride, UINT_MAX)
   1836             + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1837 
   1838     for (i=0; i<search_range; i++)
   1839     {
   1840         int best_site = -1;
   1841         int all_in = 1;
   1842 
   1843         all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
   1844         all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
   1845         all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
   1846         all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
   1847 
   1848         if(all_in)
   1849         {
   1850             unsigned int sad_array[4];
   1851             const unsigned char *block_offset[4];
   1852             block_offset[0] = best_address - in_what_stride;
   1853             block_offset[1] = best_address - 1;
   1854             block_offset[2] = best_address + 1;
   1855             block_offset[3] = best_address + in_what_stride;
   1856 
   1857             fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1858 
   1859             for (j = 0; j < 4; j++)
   1860             {
   1861                 if (sad_array[j] < bestsad)
   1862                 {
   1863                     this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
   1864                     this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
   1865                     sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1866 
   1867                     if (sad_array[j] < bestsad)
   1868                     {
   1869                         bestsad = sad_array[j];
   1870                         best_site = j;
   1871                     }
   1872                 }
   1873             }
   1874         }
   1875         else
   1876         {
   1877             for (j = 0 ; j < 4 ; j++)
   1878             {
   1879                 this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
   1880                 this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
   1881 
   1882                 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1883                 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1884                 {
   1885                     check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
   1886                     thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1887 
   1888                     if (thissad < bestsad)
   1889                     {
   1890                         this_mv.as_mv.row = this_row_offset;
   1891                         this_mv.as_mv.col = this_col_offset;
   1892                         thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
   1893 
   1894                         if (thissad < bestsad)
   1895                         {
   1896                             bestsad = thissad;
   1897                             best_site = j;
   1898                         }
   1899                     }
   1900                 }
   1901             }
   1902         }
   1903 
   1904         if (best_site == -1)
   1905             break;
   1906         else
   1907         {
   1908             ref_mv->as_mv.row += neighbors[best_site].row;
   1909             ref_mv->as_mv.col += neighbors[best_site].col;
   1910             best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
   1911         }
   1912     }
   1913 
   1914     this_mv.as_mv.row = ref_mv->as_mv.row * 8;
   1915     this_mv.as_mv.col = ref_mv->as_mv.col * 8;
   1916 
   1917     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
   1918            + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
   1919 }
   1920 
   1921 #ifdef VP8_ENTROPY_STATS
   1922 void print_mode_context(void)
   1923 {
   1924     FILE *f = fopen("modecont.c", "w");
   1925     int i, j;
   1926 
   1927     fprintf(f, "#include \"entropy.h\"\n");
   1928     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1929     fprintf(f, "{\n");
   1930 
   1931     for (j = 0; j < 6; j++)
   1932     {
   1933         fprintf(f, "  { /* %d */\n", j);
   1934         fprintf(f, "    ");
   1935 
   1936         for (i = 0; i < 4; i++)
   1937         {
   1938             int overal_prob;
   1939             int this_prob;
   1940             int count;
   1941 
   1942             /* Overall probs */
   1943             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1944 
   1945             if (count)
   1946                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1947             else
   1948                 overal_prob = 128;
   1949 
   1950             if (overal_prob == 0)
   1951                 overal_prob = 1;
   1952 
   1953             /* context probs */
   1954             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1955 
   1956             if (count)
   1957                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1958             else
   1959                 this_prob = 128;
   1960 
   1961             if (this_prob == 0)
   1962                 this_prob = 1;
   1963 
   1964             fprintf(f, "%5d, ", this_prob);
   1965         }
   1966 
   1967         fprintf(f, "  },\n");
   1968     }
   1969 
   1970     fprintf(f, "};\n");
   1971     fclose(f);
   1972 }
   1973 
   1974 /* MV ref count VP8_ENTROPY_STATS stats code */
   1975 #ifdef VP8_ENTROPY_STATS
   1976 void init_mv_ref_counts()
   1977 {
   1978     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1979     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1980 }
   1981 
   1982 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1983 {
   1984     if (m == ZEROMV)
   1985     {
   1986         ++mv_ref_ct [ct[0]] [0] [0];
   1987         ++mv_mode_cts[0][0];
   1988     }
   1989     else
   1990     {
   1991         ++mv_ref_ct [ct[0]] [0] [1];
   1992         ++mv_mode_cts[0][1];
   1993 
   1994         if (m == NEARESTMV)
   1995         {
   1996             ++mv_ref_ct [ct[1]] [1] [0];
   1997             ++mv_mode_cts[1][0];
   1998         }
   1999         else
   2000         {
   2001             ++mv_ref_ct [ct[1]] [1] [1];
   2002             ++mv_mode_cts[1][1];
   2003 
   2004             if (m == NEARMV)
   2005             {
   2006                 ++mv_ref_ct [ct[2]] [2] [0];
   2007                 ++mv_mode_cts[2][0];
   2008             }
   2009             else
   2010             {
   2011                 ++mv_ref_ct [ct[2]] [2] [1];
   2012                 ++mv_mode_cts[2][1];
   2013 
   2014                 if (m == NEWMV)
   2015                 {
   2016                     ++mv_ref_ct [ct[3]] [3] [0];
   2017                     ++mv_mode_cts[3][0];
   2018                 }
   2019                 else
   2020                 {
   2021                     ++mv_ref_ct [ct[3]] [3] [1];
   2022                     ++mv_mode_cts[3][1];
   2023                 }
   2024             }
   2025         }
   2026     }
   2027 }
   2028 
   2029 #endif/* END MV ref count VP8_ENTROPY_STATS stats code */
   2030 
   2031 #endif
   2032