Home | History | Annotate | Download | only in arm
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "mcomp.h"
     13 #include "vpx_mem/vpx_mem.h"
     14 
     15 #include <stdio.h>
     16 #include <limits.h>
     17 #include <math.h>
     18 
     19 #ifdef ENTROPY_STATS
     20 static int mv_ref_ct [31] [4] [2];
     21 static int mv_mode_cts [4] [2];
     22 #endif
     23 
     24 static int mv_bits_sadcost[256];
     25 
     26 extern unsigned int vp8_sub_pixel_variance16x16s_neon
     27 (
     28     unsigned char  *src_ptr,
     29     int  src_pixels_per_line,
     30     int  xoffset,
     31     int  yoffset,
     32     unsigned char *dst_ptr,
     33     int dst_pixels_per_line,
     34     unsigned int *sse
     35 );
     36 extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
     37 (
     38     unsigned char  *src_ptr,
     39     int  src_pixels_per_line,
     40     unsigned char *dst_ptr,
     41     int dst_pixels_per_line,
     42     unsigned int *sse
     43 );
     44 extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
     45 (
     46     unsigned char  *src_ptr,
     47     int  src_pixels_per_line,
     48     unsigned char *dst_ptr,
     49     int dst_pixels_per_line,
     50     unsigned int *sse
     51 );
     52 extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
     53 (
     54     unsigned char  *src_ptr,
     55     int  src_pixels_per_line,
     56     unsigned char *dst_ptr,
     57     int dst_pixels_per_line,
     58     unsigned int *sse
     59 );
     60 
     61 void vp8cx_init_mv_bits_sadcost()
     62 {
     63     int i;
     64 
     65     for (i = 0; i < 256; i++)
     66     {
     67         mv_bits_sadcost[i] = (int)sqrt(i * 16);
     68     }
     69 }
     70 
     71 
     72 int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
     73 {
     74     // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
     75     // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
     76     // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
     77     // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
     78     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
     79 }
     80 
     81 int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
     82 {
     83     //int i;
     84     //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
     85     //return ( (vp8_mv_bit_cost(mv,  ref, mvcost, 100) + 128) * error_per_bit) >> 8;
     86 
     87     //i = (vp8_mv_bit_cost(mv,  ref, mvcost, 100) * error_per_bit + 128) >> 8;
     88     return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
     89     //return (vp8_mv_bit_cost(mv,  ref, mvcost, 128) * error_per_bit + 128) >> 8;
     90 }
     91 
     92 
     93 static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
     94 {
     95     // get the estimated number of bits for a motion vector, to be used for costing in SAD based
     96     // motion estimation
     97     return ((mvcost[0][(mv->row - ref->row) >> 1]  +  mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
     98 }
     99 
    100 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
    101 {
    102     int Len;
    103     int search_site_count = 0;
    104 
    105 
    106     // Generate offsets for 4 search sites per step.
    107     Len = MAX_FIRST_STEP;
    108     x->ss[search_site_count].mv.col = 0;
    109     x->ss[search_site_count].mv.row = 0;
    110     x->ss[search_site_count].offset = 0;
    111     search_site_count++;
    112 
    113     while (Len > 0)
    114     {
    115 
    116         // Compute offsets for search sites.
    117         x->ss[search_site_count].mv.col = 0;
    118         x->ss[search_site_count].mv.row = -Len;
    119         x->ss[search_site_count].offset = -Len * stride;
    120         search_site_count++;
    121 
    122         // Compute offsets for search sites.
    123         x->ss[search_site_count].mv.col = 0;
    124         x->ss[search_site_count].mv.row = Len;
    125         x->ss[search_site_count].offset = Len * stride;
    126         search_site_count++;
    127 
    128         // Compute offsets for search sites.
    129         x->ss[search_site_count].mv.col = -Len;
    130         x->ss[search_site_count].mv.row = 0;
    131         x->ss[search_site_count].offset = -Len;
    132         search_site_count++;
    133 
    134         // Compute offsets for search sites.
    135         x->ss[search_site_count].mv.col = Len;
    136         x->ss[search_site_count].mv.row = 0;
    137         x->ss[search_site_count].offset = Len;
    138         search_site_count++;
    139 
    140         // Contract.
    141         Len /= 2;
    142     }
    143 
    144     x->ss_count = search_site_count;
    145     x->searches_per_step = 4;
    146 }
    147 
    148 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
    149 {
    150     int Len;
    151     int search_site_count = 0;
    152 
    153     // Generate offsets for 8 search sites per step.
    154     Len = MAX_FIRST_STEP;
    155     x->ss[search_site_count].mv.col = 0;
    156     x->ss[search_site_count].mv.row = 0;
    157     x->ss[search_site_count].offset = 0;
    158     search_site_count++;
    159 
    160     while (Len > 0)
    161     {
    162 
    163         // Compute offsets for search sites.
    164         x->ss[search_site_count].mv.col = 0;
    165         x->ss[search_site_count].mv.row = -Len;
    166         x->ss[search_site_count].offset = -Len * stride;
    167         search_site_count++;
    168 
    169         // Compute offsets for search sites.
    170         x->ss[search_site_count].mv.col = 0;
    171         x->ss[search_site_count].mv.row = Len;
    172         x->ss[search_site_count].offset = Len * stride;
    173         search_site_count++;
    174 
    175         // Compute offsets for search sites.
    176         x->ss[search_site_count].mv.col = -Len;
    177         x->ss[search_site_count].mv.row = 0;
    178         x->ss[search_site_count].offset = -Len;
    179         search_site_count++;
    180 
    181         // Compute offsets for search sites.
    182         x->ss[search_site_count].mv.col = Len;
    183         x->ss[search_site_count].mv.row = 0;
    184         x->ss[search_site_count].offset = Len;
    185         search_site_count++;
    186 
    187         // Compute offsets for search sites.
    188         x->ss[search_site_count].mv.col = -Len;
    189         x->ss[search_site_count].mv.row = -Len;
    190         x->ss[search_site_count].offset = -Len * stride - Len;
    191         search_site_count++;
    192 
    193         // Compute offsets for search sites.
    194         x->ss[search_site_count].mv.col = Len;
    195         x->ss[search_site_count].mv.row = -Len;
    196         x->ss[search_site_count].offset = -Len * stride + Len;
    197         search_site_count++;
    198 
    199         // Compute offsets for search sites.
    200         x->ss[search_site_count].mv.col = -Len;
    201         x->ss[search_site_count].mv.row = Len;
    202         x->ss[search_site_count].offset = Len * stride - Len;
    203         search_site_count++;
    204 
    205         // Compute offsets for search sites.
    206         x->ss[search_site_count].mv.col = Len;
    207         x->ss[search_site_count].mv.row = Len;
    208         x->ss[search_site_count].offset = Len * stride + Len;
    209         search_site_count++;
    210 
    211 
    212         // Contract.
    213         Len /= 2;
    214     }
    215 
    216     x->ss_count = search_site_count;
    217     x->searches_per_step = 8;
    218 }
    219 
    220 
    221 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    222 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
    223 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
    224 #define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
    225 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
    226 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
    227 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
    228 #define MIN(x,y) (((x)<(y))?(x):(y))
    229 #define MAX(x,y) (((x)>(y))?(x):(y))
    230 
    231 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
    232 
    233 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
    234 {
    235     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    236     unsigned char *z = (*(b->base_src) + b->src);
    237 
    238     int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
    239     int br = bestmv->row << 2, bc = bestmv->col << 2;
    240     int tr = br, tc = bc;
    241     unsigned int besterr = INT_MAX;
    242     unsigned int left, right, up, down, diag;
    243     unsigned int sse;
    244     unsigned int whichdir;
    245     unsigned int halfiters = 4;
    246     unsigned int quarteriters = 4;
    247 
    248     int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
    249     int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
    250     int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
    251     int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
    252 
    253     // central mv
    254     bestmv->row <<= 3;
    255     bestmv->col <<= 3;
    256 
    257     // calculate central point error
    258     besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
    259     besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    260 
    261     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    262     while (--halfiters)
    263     {
    264         // 1/2 pel
    265         CHECK_BETTER(left, tr, tc - 2);
    266         CHECK_BETTER(right, tr, tc + 2);
    267         CHECK_BETTER(up, tr - 2, tc);
    268         CHECK_BETTER(down, tr + 2, tc);
    269 
    270         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    271 
    272         switch (whichdir)
    273         {
    274         case 0:
    275             CHECK_BETTER(diag, tr - 2, tc - 2);
    276             break;
    277         case 1:
    278             CHECK_BETTER(diag, tr - 2, tc + 2);
    279             break;
    280         case 2:
    281             CHECK_BETTER(diag, tr + 2, tc - 2);
    282             break;
    283         case 3:
    284             CHECK_BETTER(diag, tr + 2, tc + 2);
    285             break;
    286         }
    287 
    288         // no reason to check the same one again.
    289         if (tr == br && tc == bc)
    290             break;
    291 
    292         tr = br;
    293         tc = bc;
    294     }
    295 
    296     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
    297     // 1/4 pel
    298     while (--quarteriters)
    299     {
    300         CHECK_BETTER(left, tr, tc - 1);
    301         CHECK_BETTER(right, tr, tc + 1);
    302         CHECK_BETTER(up, tr - 1, tc);
    303         CHECK_BETTER(down, tr + 1, tc);
    304 
    305         whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    306 
    307         switch (whichdir)
    308         {
    309         case 0:
    310             CHECK_BETTER(diag, tr - 1, tc - 1);
    311             break;
    312         case 1:
    313             CHECK_BETTER(diag, tr - 1, tc + 1);
    314             break;
    315         case 2:
    316             CHECK_BETTER(diag, tr + 1, tc - 1);
    317             break;
    318         case 3:
    319             CHECK_BETTER(diag, tr + 1, tc + 1);
    320             break;
    321         }
    322 
    323         // no reason to check the same one again.
    324         if (tr == br && tc == bc)
    325             break;
    326 
    327         tr = br;
    328         tc = bc;
    329     }
    330 
    331     bestmv->row = br << 1;
    332     bestmv->col = bc << 1;
    333 
    334     if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
    335         return INT_MAX;
    336 
    337     return besterr;
    338 }
    339 #undef MVC
    340 #undef PRE
    341 #undef SP
    342 #undef DIST
    343 #undef ERR
    344 #undef CHECK_BETTER
    345 #undef MIN
    346 #undef MAX
    347 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
    348 {
    349     int bestmse = INT_MAX;
    350     MV startmv;
    351     //MV this_mv;
    352     MV this_mv;
    353     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    354     unsigned char *z = (*(b->base_src) + b->src);
    355     int left, right, up, down, diag;
    356     unsigned int sse;
    357     int whichdir ;
    358 
    359 
    360     // Trap uncodable vectors
    361     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    362     {
    363         bestmv->row <<= 3;
    364         bestmv->col <<= 3;
    365         return INT_MAX;
    366     }
    367 
    368     // central mv
    369     bestmv->row <<= 3;
    370     bestmv->col <<= 3;
    371     startmv = *bestmv;
    372 
    373     // calculate central point error
    374     bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
    375     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    376 
    377     // go left then right and check error
    378     this_mv.row = startmv.row;
    379     this_mv.col = ((startmv.col - 8) | 4);
    380     left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
    381     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    382 
    383     if (left < bestmse)
    384     {
    385         *bestmv = this_mv;
    386         bestmse = left;
    387     }
    388 
    389     this_mv.col += 8;
    390     right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
    391     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    392 
    393     if (right < bestmse)
    394     {
    395         *bestmv = this_mv;
    396         bestmse = right;
    397     }
    398 
    399     // go up then down and check error
    400     this_mv.col = startmv.col;
    401     this_mv.row = ((startmv.row - 8) | 4);
    402     up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    403     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    404 
    405     if (up < bestmse)
    406     {
    407         *bestmv = this_mv;
    408         bestmse = up;
    409     }
    410 
    411     this_mv.row += 8;
    412     down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
    413     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    414 
    415     if (down < bestmse)
    416     {
    417         *bestmv = this_mv;
    418         bestmse = down;
    419     }
    420 
    421 
    422     // now check 1 more diagonal
    423     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    424     //for(whichdir =0;whichdir<4;whichdir++)
    425     //{
    426     this_mv = startmv;
    427 
    428     switch (whichdir)
    429     {
    430     case 0:
    431         this_mv.col = (this_mv.col - 8) | 4;
    432         this_mv.row = (this_mv.row - 8) | 4;
    433         diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    434         break;
    435     case 1:
    436         this_mv.col += 4;
    437         this_mv.row = (this_mv.row - 8) | 4;
    438         diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    439         break;
    440     case 2:
    441         this_mv.col = (this_mv.col - 8) | 4;
    442         this_mv.row += 4;
    443         diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
    444         break;
    445     case 3:
    446         this_mv.col += 4;
    447         this_mv.row += 4;
    448         diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
    449         break;
    450     }
    451 
    452     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    453 
    454     if (diag < bestmse)
    455     {
    456         *bestmv = this_mv;
    457         bestmse = diag;
    458     }
    459 
    460 //  }
    461 
    462 
    463     // time to check quarter pels.
    464     if (bestmv->row < startmv.row)
    465         y -= d->pre_stride;
    466 
    467     if (bestmv->col < startmv.col)
    468         y--;
    469 
    470     startmv = *bestmv;
    471 
    472 
    473 
    474     // go left then right and check error
    475     this_mv.row = startmv.row;
    476 
    477     if (startmv.col & 7)
    478     {
    479         this_mv.col = startmv.col - 2;
    480         left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    481     }
    482     else
    483     {
    484         this_mv.col = (startmv.col - 8) | 6;
    485         left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
    486     }
    487 
    488     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    489 
    490     if (left < bestmse)
    491     {
    492         *bestmv = this_mv;
    493         bestmse = left;
    494     }
    495 
    496     this_mv.col += 4;
    497     right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    498     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    499 
    500     if (right < bestmse)
    501     {
    502         *bestmv = this_mv;
    503         bestmse = right;
    504     }
    505 
    506     // go up then down and check error
    507     this_mv.col = startmv.col;
    508 
    509     if (startmv.row & 7)
    510     {
    511         this_mv.row = startmv.row - 2;
    512         up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    513     }
    514     else
    515     {
    516         this_mv.row = (startmv.row - 8) | 6;
    517         up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    518     }
    519 
    520     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    521 
    522     if (up < bestmse)
    523     {
    524         *bestmv = this_mv;
    525         bestmse = up;
    526     }
    527 
    528     this_mv.row += 4;
    529     down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    530     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    531 
    532     if (down < bestmse)
    533     {
    534         *bestmv = this_mv;
    535         bestmse = down;
    536     }
    537 
    538 
    539     // now check 1 more diagonal
    540     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    541 
    542 //  for(whichdir=0;whichdir<4;whichdir++)
    543 //  {
    544     this_mv = startmv;
    545 
    546     switch (whichdir)
    547     {
    548     case 0:
    549 
    550         if (startmv.row & 7)
    551         {
    552             this_mv.row -= 2;
    553 
    554             if (startmv.col & 7)
    555             {
    556                 this_mv.col -= 2;
    557                 diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    558             }
    559             else
    560             {
    561                 this_mv.col = (startmv.col - 8) | 6;
    562                 diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    563             }
    564         }
    565         else
    566         {
    567             this_mv.row = (startmv.row - 8) | 6;
    568 
    569             if (startmv.col & 7)
    570             {
    571                 this_mv.col -= 2;
    572                 diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    573             }
    574             else
    575             {
    576                 this_mv.col = (startmv.col - 8) | 6;
    577                 diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
    578             }
    579         }
    580 
    581         break;
    582     case 1:
    583         this_mv.col += 2;
    584 
    585         if (startmv.row & 7)
    586         {
    587             this_mv.row -= 2;
    588             diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    589         }
    590         else
    591         {
    592             this_mv.row = (startmv.row - 8) | 6;
    593             diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
    594         }
    595 
    596         break;
    597     case 2:
    598         this_mv.row += 2;
    599 
    600         if (startmv.col & 7)
    601         {
    602             this_mv.col -= 2;
    603             diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    604         }
    605         else
    606         {
    607             this_mv.col = (startmv.col - 8) | 6;
    608             diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
    609         }
    610 
    611         break;
    612     case 3:
    613         this_mv.col += 2;
    614         this_mv.row += 2;
    615         diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
    616         break;
    617     }
    618 
    619     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    620 
    621     if (diag < bestmse)
    622     {
    623         *bestmv = this_mv;
    624         bestmse = diag;
    625     }
    626 
    627 //  }
    628 
    629     return bestmse;
    630 }
    631 
    632 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
    633 {
    634     int bestmse = INT_MAX;
    635     MV startmv;
    636     //MV this_mv;
    637     MV this_mv;
    638     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
    639     unsigned char *z = (*(b->base_src) + b->src);
    640     int left, right, up, down, diag;
    641     unsigned int sse;
    642 
    643     // Trap uncodable vectors
    644     if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
    645     {
    646         bestmv->row <<= 3;
    647         bestmv->col <<= 3;
    648         return INT_MAX;
    649     }
    650 
    651     // central mv
    652     bestmv->row <<= 3;
    653     bestmv->col <<= 3;
    654     startmv = *bestmv;
    655 
    656     // calculate central point error
    657     bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
    658     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
    659 
    660     // go left then right and check error
    661     this_mv.row = startmv.row;
    662     this_mv.col = ((startmv.col - 8) | 4);
    663     left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
    664     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    665 
    666     if (left < bestmse)
    667     {
    668         *bestmv = this_mv;
    669         bestmse = left;
    670     }
    671 
    672     this_mv.col += 8;
    673     right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
    674     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    675 
    676     if (right < bestmse)
    677     {
    678         *bestmv = this_mv;
    679         bestmse = right;
    680     }
    681 
    682     // go up then down and check error
    683     this_mv.col = startmv.col;
    684     this_mv.row = ((startmv.row - 8) | 4);
    685     up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    686     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    687 
    688     if (up < bestmse)
    689     {
    690         *bestmv = this_mv;
    691         bestmse = up;
    692     }
    693 
    694     this_mv.row += 8;
    695     down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
    696     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    697 
    698     if (down < bestmse)
    699     {
    700         *bestmv = this_mv;
    701         bestmse = down;
    702     }
    703 
    704     // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
    705 #if 0
    706     // now check 1 more diagonal -
    707     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
    708     this_mv = startmv;
    709 
    710     switch (whichdir)
    711     {
    712     case 0:
    713         this_mv.col = (this_mv.col - 8) | 4;
    714         this_mv.row = (this_mv.row - 8) | 4;
    715         diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    716         break;
    717     case 1:
    718         this_mv.col += 4;
    719         this_mv.row = (this_mv.row - 8) | 4;
    720         diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    721         break;
    722     case 2:
    723         this_mv.col = (this_mv.col - 8) | 4;
    724         this_mv.row += 4;
    725         diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    726         break;
    727     case 3:
    728         this_mv.col += 4;
    729         this_mv.row += 4;
    730         diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
    731         break;
    732     }
    733 
    734     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    735 
    736     if (diag < bestmse)
    737     {
    738         *bestmv = this_mv;
    739         bestmse = diag;
    740     }
    741 
    742 #else
    743     this_mv.col = (this_mv.col - 8) | 4;
    744     this_mv.row = (this_mv.row - 8) | 4;
    745     diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    746     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    747 
    748     if (diag < bestmse)
    749     {
    750         *bestmv = this_mv;
    751         bestmse = diag;
    752     }
    753 
    754     this_mv.col += 8;
    755     diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
    756     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    757 
    758     if (diag < bestmse)
    759     {
    760         *bestmv = this_mv;
    761         bestmse = diag;
    762     }
    763 
    764     this_mv.col = (this_mv.col - 8) | 4;
    765     this_mv.row = startmv.row + 4;
    766     diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
    767     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    768 
    769     if (diag < bestmse)
    770     {
    771         *bestmv = this_mv;
    772         bestmse = diag;
    773     }
    774 
    775     this_mv.col += 8;
    776     diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
    777     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
    778 
    779     if (diag < bestmse)
    780     {
    781         *bestmv = this_mv;
    782         bestmse = diag;
    783     }
    784 
    785 #endif
    786     return bestmse;
    787 }
    788 
    789 #if 1
    790 
    791 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    792 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
    793 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
    794 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
    795 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
    796 const MV next_chkpts[6][3] =
    797 {
    798     {{ -2, 0}, { -1, -2}, {1, -2}},
    799     {{ -1, -2}, {1, -2}, {2, 0}},
    800     {{1, -2}, {2, 0}, {1, 2}},
    801     {{2, 0}, {1, 2}, { -1, 2}},
    802     {{1, 2}, { -1, 2}, { -2, 0}},
    803     {{ -1, 2}, { -2, 0}, { -1, -2}}
    804 };
    805 int vp8_hex_search
    806 (
    807     MACROBLOCK *x,
    808     BLOCK *b,
    809     BLOCKD *d,
    810     MV *ref_mv,
    811     MV *best_mv,
    812     int search_param,
    813     int error_per_bit,
    814     int *num00,
    815     vp8_variance_fn_t vf,
    816     vp8_sad_fn_t      sf,
    817     int *mvsadcost[2],
    818     int *mvcost[2]
    819 )
    820 {
    821     MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
    822     MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
    823     int i, j;
    824     unsigned char *src = (*(b->base_src) + b->src);
    825     int src_stride = b->src_stride;
    826     int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
    827     unsigned int besterr, thiserr = 0x7fffffff;
    828     int k = -1, tk;
    829 
    830     if (bc < x->mv_col_min) bc = x->mv_col_min;
    831 
    832     if (bc > x->mv_col_max) bc = x->mv_col_max;
    833 
    834     if (br < x->mv_row_min) br = x->mv_row_min;
    835 
    836     if (br > x->mv_row_max) br = x->mv_row_max;
    837 
    838     rr >>= 1;
    839     rc >>= 1;
    840 
    841     besterr = ERR(br, bc, thiserr);
    842 
    843     // hex search
    844     //j=0
    845     tr = br;
    846     tc = bc;
    847 
    848     for (i = 0; i < 6; i++)
    849     {
    850         int nr = tr + hex[i].row, nc = tc + hex[i].col;
    851 
    852         if (nc < x->mv_col_min) continue;
    853 
    854         if (nc > x->mv_col_max) continue;
    855 
    856         if (nr < x->mv_row_min) continue;
    857 
    858         if (nr > x->mv_row_max) continue;
    859 
    860         //CHECK_BETTER(thiserr,nr,nc);
    861         if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    862         {
    863             besterr = thiserr;
    864             br = nr;
    865             bc = nc;
    866             k = i;
    867         }
    868     }
    869 
    870     if (tr == br && tc == bc)
    871         goto cal_neighbors;
    872 
    873     for (j = 1; j < 127; j++)
    874     {
    875         tr = br;
    876         tc = bc;
    877         tk = k;
    878 
    879         for (i = 0; i < 3; i++)
    880         {
    881             int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
    882 
    883             if (nc < x->mv_col_min) continue;
    884 
    885             if (nc > x->mv_col_max) continue;
    886 
    887             if (nr < x->mv_row_min) continue;
    888 
    889             if (nr > x->mv_row_max) continue;
    890 
    891             //CHECK_BETTER(thiserr,nr,nc);
    892             if ((thiserr = ERR(nr, nc, besterr)) < besterr)
    893             {
    894                 besterr = thiserr;
    895                 br = nr;
    896                 bc = nc; //k=(tk+5+i)%6;}
    897                 k = tk + 5 + i;
    898 
    899                 if (k >= 12) k -= 12;
    900                 else if (k >= 6) k -= 6;
    901             }
    902         }
    903 
    904         if (tr == br && tc == bc)
    905             break;
    906     }
    907 
    908     // check 8 1 away neighbors
    909 cal_neighbors:
    910     tr = br;
    911     tc = bc;
    912 
    913     for (i = 0; i < 8; i++)
    914     {
    915         int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
    916 
    917         if (nc < x->mv_col_min) continue;
    918 
    919         if (nc > x->mv_col_max) continue;
    920 
    921         if (nr < x->mv_row_min) continue;
    922 
    923         if (nr > x->mv_row_max) continue;
    924 
    925         CHECK_BETTER(thiserr, nr, nc);
    926     }
    927 
    928     best_mv->row = br;
    929     best_mv->col = bc;
    930 
    931     return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
    932 }
    933 #undef MVC
    934 #undef PRE
    935 #undef SP
    936 #undef DIST
    937 #undef ERR
    938 #undef CHECK_BETTER
    939 
    940 #else
    941 
    942 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
    943 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
    944 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
    945 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
    946 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
    947 
    948 int vp8_hex_search
    949 (
    950     MACROBLOCK *x,
    951     BLOCK *b,
    952     BLOCKD *d,
    953     MV *ref_mv,
    954     MV *best_mv,
    955     int search_param,
    956     int error_per_bit,
    957     int *num00,
    958     vp8_variance_fn_t vf,
    959     vp8_sad_fn_t      sf,
    960     int *mvsadcost[2],
    961     int *mvcost[2]
    962 )
    963 {
    964     MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
    965     MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
    966     int i, j;
    967     unsigned char *src = (*(b->base_src) + b->src);
    968     int src_stride = b->src_stride;
    969     //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc;
    970     int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
    971     unsigned int besterr, thiserr = 0x7fffffff;
    972 
    973     /*
    974         if ( rc < x->mv_col_min) bc = x->mv_col_min;
    975         if ( rc > x->mv_col_max) bc = x->mv_col_max;
    976         if ( rr < x->mv_row_min) br = x->mv_row_min;
    977         if ( rr > x->mv_row_max) br = x->mv_row_max;
    978         rr>>=1;
    979         rc>>=1;
    980         br>>=3;
    981         bc>>=3;
    982     */
    983     if (bc < x->mv_col_min) bc = x->mv_col_min;
    984 
    985     if (bc > x->mv_col_max) bc = x->mv_col_max;
    986 
    987     if (br < x->mv_row_min) br = x->mv_row_min;
    988 
    989     if (br > x->mv_row_max) br = x->mv_row_max;
    990 
    991     rr >>= 1;
    992     rc >>= 1;
    993 
    994     besterr = ERR(br, bc, thiserr);
    995 
    996     // hex search  jbb changed to 127 to avoid max 256 problem steping by 2.
    997     for (j = 0; j < 127; j++)
    998     {
    999         tr = br;
   1000         tc = bc;
   1001 
   1002         for (i = 0; i < 6; i++)
   1003         {
   1004             int nr = tr + hex[i].row, nc = tc + hex[i].col;
   1005 
   1006             if (nc < x->mv_col_min) continue;
   1007 
   1008             if (nc > x->mv_col_max) continue;
   1009 
   1010             if (nr < x->mv_row_min) continue;
   1011 
   1012             if (nr > x->mv_row_max) continue;
   1013 
   1014             CHECK_BETTER(thiserr, nr, nc);
   1015         }
   1016 
   1017         if (tr == br && tc == bc)
   1018             break;
   1019     }
   1020 
   1021     // check 8 1 away neighbors
   1022     tr = br;
   1023     tc = bc;
   1024 
   1025     for (i = 0; i < 8; i++)
   1026     {
   1027         int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
   1028 
   1029         if (nc < x->mv_col_min) continue;
   1030 
   1031         if (nc > x->mv_col_max) continue;
   1032 
   1033         if (nr < x->mv_row_min) continue;
   1034 
   1035         if (nr > x->mv_row_max) continue;
   1036 
   1037         CHECK_BETTER(thiserr, nr, nc);
   1038     }
   1039 
   1040     best_mv->row = br;
   1041     best_mv->col = bc;
   1042 
   1043     return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
   1044 }
   1045 #undef MVC
   1046 #undef PRE
   1047 #undef SP
   1048 #undef DIST
   1049 #undef ERR
   1050 #undef CHECK_BETTER
   1051 
   1052 #endif
   1053 
   1054 int vp8_diamond_search_sad
   1055 (
   1056     MACROBLOCK *x,
   1057     BLOCK *b,
   1058     BLOCKD *d,
   1059     MV *ref_mv,
   1060     MV *best_mv,
   1061     int search_param,
   1062     int error_per_bit,
   1063     int *num00,
   1064     vp8_variance_fn_ptr_t *fn_ptr,
   1065     int *mvsadcost[2],
   1066     int *mvcost[2]
   1067 )
   1068 {
   1069     int i, j, step;
   1070 
   1071     unsigned char *what = (*(b->base_src) + b->src);
   1072     int what_stride = b->src_stride;
   1073     unsigned char *in_what;
   1074     int in_what_stride = d->pre_stride;
   1075     unsigned char *best_address;
   1076 
   1077     int tot_steps;
   1078     MV this_mv;
   1079 
   1080     int bestsad = INT_MAX;
   1081     int best_site = 0;
   1082     int last_site = 0;
   1083 
   1084     int ref_row = ref_mv->row >> 3;
   1085     int ref_col = ref_mv->col >> 3;
   1086     int this_row_offset;
   1087     int this_col_offset;
   1088     search_site *ss;
   1089 
   1090     unsigned char *check_here;
   1091     int thissad;
   1092 
   1093     // Work out the start point for the search
   1094     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
   1095     best_address = in_what;
   1096 
   1097     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1098     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1099     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1100     {
   1101         // Check the starting position
   1102         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1103     }
   1104 
   1105     // search_param determines the length of the initial step and hence the number of iterations
   1106     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1107     ss = &x->ss[search_param * x->searches_per_step];
   1108     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1109 
   1110     i = 1;
   1111     best_mv->row = ref_row;
   1112     best_mv->col = ref_col;
   1113 
   1114     *num00 = 0;
   1115 
   1116     for (step = 0; step < tot_steps ; step++)
   1117     {
   1118         for (j = 0 ; j < x->searches_per_step ; j++)
   1119         {
   1120             // Trap illegal vectors
   1121             this_row_offset = best_mv->row + ss[i].mv.row;
   1122             this_col_offset = best_mv->col + ss[i].mv.col;
   1123 
   1124             if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
   1125             (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
   1126 
   1127             {
   1128                 check_here = ss[i].offset + best_address;
   1129                 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1130 
   1131                 if (thissad < bestsad)
   1132                 {
   1133                     this_mv.row = this_row_offset << 3;
   1134                     this_mv.col = this_col_offset << 3;
   1135                     thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1136 
   1137                     if (thissad < bestsad)
   1138                     {
   1139                         bestsad = thissad;
   1140                         best_site = i;
   1141                     }
   1142                 }
   1143             }
   1144 
   1145             i++;
   1146         }
   1147 
   1148         if (best_site != last_site)
   1149         {
   1150             best_mv->row += ss[best_site].mv.row;
   1151             best_mv->col += ss[best_site].mv.col;
   1152             best_address += ss[best_site].offset;
   1153             last_site = best_site;
   1154         }
   1155         else if (best_address == in_what)
   1156             (*num00)++;
   1157     }
   1158 
   1159     this_mv.row = best_mv->row << 3;
   1160     this_mv.col = best_mv->col << 3;
   1161 
   1162     if (bestsad == INT_MAX)
   1163         return INT_MAX;
   1164 
   1165     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1166     + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1167 }
   1168 
   1169 int vp8_diamond_search_sadx4
   1170 (
   1171     MACROBLOCK *x,
   1172     BLOCK *b,
   1173     BLOCKD *d,
   1174     MV *ref_mv,
   1175     MV *best_mv,
   1176     int search_param,
   1177     int error_per_bit,
   1178     int *num00,
   1179     vp8_variance_fn_ptr_t *fn_ptr,
   1180     int *mvsadcost[2],
   1181     int *mvcost[2]
   1182 )
   1183 {
   1184     int i, j, step;
   1185 
   1186     unsigned char *what = (*(b->base_src) + b->src);
   1187     int what_stride = b->src_stride;
   1188     unsigned char *in_what;
   1189     int in_what_stride = d->pre_stride;
   1190     unsigned char *best_address;
   1191 
   1192     int tot_steps;
   1193     MV this_mv;
   1194 
   1195     int bestsad = INT_MAX;
   1196     int best_site = 0;
   1197     int last_site = 0;
   1198 
   1199     int ref_row = ref_mv->row >> 3;
   1200     int ref_col = ref_mv->col >> 3;
   1201     int this_row_offset;
   1202     int this_col_offset;
   1203     search_site *ss;
   1204 
   1205     unsigned char *check_here;
   1206     int thissad;
   1207 
   1208     // Work out the start point for the search
   1209     in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
   1210     best_address = in_what;
   1211 
   1212     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1213     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1214     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1215     {
   1216         // Check the starting position
   1217         bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1218     }
   1219 
   1220     // search_param determines the length of the initial step and hence the number of iterations
   1221     // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
   1222     ss = &x->ss[search_param * x->searches_per_step];
   1223     tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   1224 
   1225     i = 1;
   1226     best_mv->row = ref_row;
   1227     best_mv->col = ref_col;
   1228 
   1229     *num00 = 0;
   1230 
   1231     for (step = 0; step < tot_steps ; step++)
   1232     {
   1233         int check_row_min, check_col_min, check_row_max, check_col_max;
   1234 
   1235         check_row_min = x->mv_row_min - best_mv->row;
   1236         check_row_max = x->mv_row_max - best_mv->row;
   1237         check_col_min = x->mv_col_min - best_mv->col;
   1238         check_col_max = x->mv_col_max - best_mv->col;
   1239 
   1240         for (j = 0 ; j < x->searches_per_step ; j += 4)
   1241         {
   1242             char *block_offset[4];
   1243             unsigned int valid_block[4];
   1244             int all_in = 1, t;
   1245 
   1246             for (t = 0; t < 4; t++)
   1247             {
   1248                 valid_block [t]  = (ss[t+i].mv.col > check_col_min);
   1249                 valid_block [t] &= (ss[t+i].mv.col < check_col_max);
   1250                 valid_block [t] &= (ss[t+i].mv.row > check_row_min);
   1251                 valid_block [t] &= (ss[t+i].mv.row < check_row_max);
   1252 
   1253                 all_in &= valid_block[t];
   1254                 block_offset[t] = ss[i+t].offset + best_address;
   1255             }
   1256 
   1257             if (all_in)
   1258             {
   1259                 int sad_array[4];
   1260 
   1261                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
   1262 
   1263                 for (t = 0; t < 4; t++, i++)
   1264                 {
   1265                     thissad = sad_array[t];
   1266 
   1267                     if (thissad < bestsad)
   1268                     {
   1269                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
   1270                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
   1271                         thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1272 
   1273                         if (thissad < bestsad)
   1274                         {
   1275                             bestsad = thissad;
   1276                             best_site = i;
   1277                         }
   1278                     }
   1279                 }
   1280             }
   1281             else
   1282             {
   1283                 int t;
   1284 
   1285                 for (t = 0; t < 4; i++, t++)
   1286                 {
   1287                     // Trap illegal vectors
   1288                     if (valid_block[t])
   1289 
   1290                     {
   1291                         check_here = block_offset[t];
   1292                         thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1293 
   1294                         if (thissad < bestsad)
   1295                         {
   1296                             this_row_offset = best_mv->row + ss[i].mv.row;
   1297                             this_col_offset = best_mv->col + ss[i].mv.col;
   1298 
   1299                             this_mv.row = this_row_offset << 3;
   1300                             this_mv.col = this_col_offset << 3;
   1301                             thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1302 
   1303                             if (thissad < bestsad)
   1304                             {
   1305                                 bestsad = thissad;
   1306                                 best_site = i;
   1307                             }
   1308                         }
   1309                     }
   1310                 }
   1311             }
   1312         }
   1313 
   1314         if (best_site != last_site)
   1315         {
   1316             best_mv->row += ss[best_site].mv.row;
   1317             best_mv->col += ss[best_site].mv.col;
   1318             best_address += ss[best_site].offset;
   1319             last_site = best_site;
   1320         }
   1321         else if (best_address == in_what)
   1322             (*num00)++;
   1323     }
   1324 
   1325     this_mv.row = best_mv->row << 3;
   1326     this_mv.col = best_mv->col << 3;
   1327 
   1328     if (bestsad == INT_MAX)
   1329         return INT_MAX;
   1330 
   1331     return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
   1332     + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1333 }
   1334 
   1335 
   1336 #if !(CONFIG_REALTIME_ONLY)
   1337 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
   1338 {
   1339     unsigned char *what = (*(b->base_src) + b->src);
   1340     int what_stride = b->src_stride;
   1341     unsigned char *in_what;
   1342     int in_what_stride = d->pre_stride;
   1343     int mv_stride = d->pre_stride;
   1344     unsigned char *bestaddress;
   1345     MV *best_mv = &d->bmi.mv.as_mv;
   1346     MV this_mv;
   1347     int bestsad = INT_MAX;
   1348     int r, c;
   1349 
   1350     unsigned char *check_here;
   1351     int thissad;
   1352 
   1353     int ref_row = ref_mv->row >> 3;
   1354     int ref_col = ref_mv->col >> 3;
   1355 
   1356     int row_min = ref_row - distance;
   1357     int row_max = ref_row + distance;
   1358     int col_min = ref_col - distance;
   1359     int col_max = ref_col + distance;
   1360 
   1361     // Work out the mid point for the search
   1362     in_what = *(d->base_pre) + d->pre;
   1363     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1364 
   1365     best_mv->row = ref_row;
   1366     best_mv->col = ref_col;
   1367 
   1368     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1369     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1370     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1371     {
   1372         // Baseline value at the centre
   1373 
   1374         //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
   1375         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1376     }
   1377 
   1378     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1379     if (col_min < x->mv_col_min)
   1380         col_min = x->mv_col_min;
   1381 
   1382     if (col_max > x->mv_col_max)
   1383         col_max = x->mv_col_max;
   1384 
   1385     if (row_min < x->mv_row_min)
   1386         row_min = x->mv_row_min;
   1387 
   1388     if (row_max > x->mv_row_max)
   1389         row_max = x->mv_row_max;
   1390 
   1391     for (r = row_min; r < row_max ; r++)
   1392     {
   1393         this_mv.row = r << 3;
   1394         check_here = r * mv_stride + in_what + col_min;
   1395 
   1396         for (c = col_min; c < col_max; c++)
   1397         {
   1398             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1399 
   1400             this_mv.col = c << 3;
   1401             //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
   1402             //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
   1403             thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
   1404 
   1405             if (thissad < bestsad)
   1406             {
   1407                 bestsad = thissad;
   1408                 best_mv->row = r;
   1409                 best_mv->col = c;
   1410                 bestaddress = check_here;
   1411             }
   1412 
   1413             check_here++;
   1414         }
   1415     }
   1416 
   1417     this_mv.row = best_mv->row << 3;
   1418     this_mv.col = best_mv->col << 3;
   1419 
   1420     if (bestsad < INT_MAX)
   1421         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1422         + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1423     else
   1424         return INT_MAX;
   1425 }
   1426 
   1427 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
   1428 {
   1429     unsigned char *what = (*(b->base_src) + b->src);
   1430     int what_stride = b->src_stride;
   1431     unsigned char *in_what;
   1432     int in_what_stride = d->pre_stride;
   1433     int mv_stride = d->pre_stride;
   1434     unsigned char *bestaddress;
   1435     MV *best_mv = &d->bmi.mv.as_mv;
   1436     MV this_mv;
   1437     int bestsad = INT_MAX;
   1438     int r, c;
   1439 
   1440     unsigned char *check_here;
   1441     int thissad;
   1442 
   1443     int ref_row = ref_mv->row >> 3;
   1444     int ref_col = ref_mv->col >> 3;
   1445 
   1446     int row_min = ref_row - distance;
   1447     int row_max = ref_row + distance;
   1448     int col_min = ref_col - distance;
   1449     int col_max = ref_col + distance;
   1450 
   1451     int sad_array[3];
   1452 
   1453     // Work out the mid point for the search
   1454     in_what = *(d->base_pre) + d->pre;
   1455     bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
   1456 
   1457     best_mv->row = ref_row;
   1458     best_mv->col = ref_col;
   1459 
   1460     // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
   1461     if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
   1462     (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
   1463     {
   1464         // Baseline value at the centre
   1465         bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
   1466     }
   1467 
   1468     // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
   1469     if (col_min < x->mv_col_min)
   1470         col_min = x->mv_col_min;
   1471 
   1472     if (col_max > x->mv_col_max)
   1473         col_max = x->mv_col_max;
   1474 
   1475     if (row_min < x->mv_row_min)
   1476         row_min = x->mv_row_min;
   1477 
   1478     if (row_max > x->mv_row_max)
   1479         row_max = x->mv_row_max;
   1480 
   1481     for (r = row_min; r < row_max ; r++)
   1482     {
   1483         this_mv.row = r << 3;
   1484         check_here = r * mv_stride + in_what + col_min;
   1485         c = col_min;
   1486 
   1487         while ((c + 3) < col_max)
   1488         {
   1489             int i;
   1490 
   1491             fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
   1492 
   1493             for (i = 0; i < 3; i++)
   1494             {
   1495                 thissad = sad_array[i];
   1496 
   1497                 if (thissad < bestsad)
   1498                 {
   1499                     this_mv.col = c << 3;
   1500                     thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1501 
   1502                     if (thissad < bestsad)
   1503                     {
   1504                         bestsad = thissad;
   1505                         best_mv->row = r;
   1506                         best_mv->col = c;
   1507                         bestaddress = check_here;
   1508                     }
   1509                 }
   1510 
   1511                 check_here++;
   1512                 c++;
   1513             }
   1514         }
   1515 
   1516         while (c < col_max)
   1517         {
   1518             thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
   1519 
   1520             if (thissad < bestsad)
   1521             {
   1522                 this_mv.col = c << 3;
   1523                 thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
   1524 
   1525                 if (thissad < bestsad)
   1526                 {
   1527                     bestsad = thissad;
   1528                     best_mv->row = r;
   1529                     best_mv->col = c;
   1530                     bestaddress = check_here;
   1531                 }
   1532             }
   1533 
   1534             check_here ++;
   1535             c ++;
   1536         }
   1537 
   1538     }
   1539 
   1540     this_mv.row = best_mv->row << 3;
   1541     this_mv.col = best_mv->col << 3;
   1542 
   1543     if (bestsad < INT_MAX)
   1544         return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
   1545         + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1546     else
   1547         return INT_MAX;
   1548 }
   1549 #endif
   1550 
   1551 #ifdef ENTROPY_STATS
   1552 void print_mode_context(void)
   1553 {
   1554     FILE *f = fopen("modecont.c", "w");
   1555     int i, j;
   1556 
   1557     fprintf(f, "#include \"entropy.h\"\n");
   1558     fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
   1559     fprintf(f, "{\n");
   1560 
   1561     for (j = 0; j < 6; j++)
   1562     {
   1563         fprintf(f, "  { // %d \n", j);
   1564         fprintf(f, "    ");
   1565 
   1566         for (i = 0; i < 4; i++)
   1567         {
   1568             int overal_prob;
   1569             int this_prob;
   1570             int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
   1571 
   1572             // Overall probs
   1573             count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
   1574 
   1575             if (count)
   1576                 overal_prob = 256 * mv_mode_cts[i][0] / count;
   1577             else
   1578                 overal_prob = 128;
   1579 
   1580             if (overal_prob == 0)
   1581                 overal_prob = 1;
   1582 
   1583             // context probs
   1584             count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
   1585 
   1586             if (count)
   1587                 this_prob = 256 * mv_ref_ct[j][i][0] / count;
   1588             else
   1589                 this_prob = 128;
   1590 
   1591             if (this_prob == 0)
   1592                 this_prob = 1;
   1593 
   1594             fprintf(f, "%5d, ", this_prob);
   1595             //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
   1596             //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
   1597         }
   1598 
   1599         fprintf(f, "  },\n");
   1600     }
   1601 
   1602     fprintf(f, "};\n");
   1603     fclose(f);
   1604 }
   1605 
   1606 /* MV ref count ENTROPY_STATS stats code */
   1607 #ifdef ENTROPY_STATS
   1608 void init_mv_ref_counts()
   1609 {
   1610     vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
   1611     vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
   1612 }
   1613 
   1614 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
   1615 {
   1616     if (m == ZEROMV)
   1617     {
   1618         ++mv_ref_ct [ct[0]] [0] [0];
   1619         ++mv_mode_cts[0][0];
   1620     }
   1621     else
   1622     {
   1623         ++mv_ref_ct [ct[0]] [0] [1];
   1624         ++mv_mode_cts[0][1];
   1625 
   1626         if (m == NEARESTMV)
   1627         {
   1628             ++mv_ref_ct [ct[1]] [1] [0];
   1629             ++mv_mode_cts[1][0];
   1630         }
   1631         else
   1632         {
   1633             ++mv_ref_ct [ct[1]] [1] [1];
   1634             ++mv_mode_cts[1][1];
   1635 
   1636             if (m == NEARMV)
   1637             {
   1638                 ++mv_ref_ct [ct[2]] [2] [0];
   1639                 ++mv_mode_cts[2][0];
   1640             }
   1641             else
   1642             {
   1643                 ++mv_ref_ct [ct[2]] [2] [1];
   1644                 ++mv_mode_cts[2][1];
   1645 
   1646                 if (m == NEWMV)
   1647                 {
   1648                     ++mv_ref_ct [ct[3]] [3] [0];
   1649                     ++mv_mode_cts[3][0];
   1650                 }
   1651                 else
   1652                 {
   1653                     ++mv_ref_ct [ct[3]] [3] [1];
   1654                     ++mv_mode_cts[3][1];
   1655                 }
   1656             }
   1657         }
   1658     }
   1659 }
   1660 
   1661 #endif/* END MV ref count ENTROPY_STATS stats code */
   1662 
   1663 #endif
   1664