1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vpx_config.h" 16 17 #include "vpx_mem/vpx_mem.h" 18 19 #include "vp9/common/vp9_common.h" 20 21 #include "vp9/encoder/vp9_onyx_int.h" 22 #include "vp9/encoder/vp9_mcomp.h" 23 24 // #define NEW_DIAMOND_SEARCH 25 26 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 27 const MV *mv) { 28 return &buf->buf[mv->row * buf->stride + mv->col]; 29 } 30 31 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { 32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 36 37 col_min = MAX(col_min, (MV_LOW >> 3) + 1); 38 row_min = MAX(row_min, (MV_LOW >> 3) + 1); 39 col_max = MIN(col_max, (MV_UPP >> 3) - 1); 40 row_max = MIN(row_max, (MV_UPP >> 3) - 1); 41 42 // Get intersection of UMV window and valid MV window to reduce # of checks 43 // in diamond search. 44 if (x->mv_col_min < col_min) 45 x->mv_col_min = col_min; 46 if (x->mv_col_max > col_max) 47 x->mv_col_max = col_max; 48 if (x->mv_row_min < row_min) 49 x->mv_row_min = row_min; 50 if (x->mv_row_max > row_max) 51 x->mv_row_max = row_max; 52 } 53 54 int vp9_init_search_range(VP9_COMP *cpi, int size) { 55 int sr = 0; 56 57 // Minimum search size no matter what the passed in value. 58 size = MAX(16, size); 59 60 while ((size << sr) < MAX_FULL_PEL_VAL) 61 sr++; 62 63 sr += cpi->sf.reduce_first_step_size; 64 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); 65 return sr; 66 } 67 68 static INLINE int mv_cost(const MV *mv, 69 const int *joint_cost, int *comp_cost[2]) { 70 return joint_cost[vp9_get_mv_joint(mv)] + 71 comp_cost[0][mv->row] + comp_cost[1][mv->col]; 72 } 73 74 int vp9_mv_bit_cost(const MV *mv, const MV *ref, 75 const int *mvjcost, int *mvcost[2], int weight) { 76 const MV diff = { mv->row - ref->row, 77 mv->col - ref->col }; 78 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 79 } 80 81 static int mv_err_cost(const MV *mv, const MV *ref, 82 const int *mvjcost, int *mvcost[2], 83 int error_per_bit) { 84 if (mvcost) { 85 const MV diff = { mv->row - ref->row, 86 mv->col - ref->col }; 87 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 88 error_per_bit, 13); 89 } 90 return 0; 91 } 92 93 static int mvsad_err_cost(const MV *mv, const MV *ref, 94 const int *mvjsadcost, int *mvsadcost[2], 95 int error_per_bit) { 96 if (mvsadcost) { 97 const MV diff = { mv->row - ref->row, 98 mv->col - ref->col }; 99 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * 100 error_per_bit, 8); 101 } 102 return 0; 103 } 104 105 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 106 int len, ss_count = 1; 107 108 x->ss[0].mv.col = x->ss[0].mv.row = 0; 109 x->ss[0].offset = 0; 110 111 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 112 // Generate offsets for 4 search sites per step. 113 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; 114 int i; 115 for (i = 0; i < 4; ++i) { 116 search_site *const ss = &x->ss[ss_count++]; 117 ss->mv = ss_mvs[i]; 118 ss->offset = ss->mv.row * stride + ss->mv.col; 119 } 120 } 121 122 x->ss_count = ss_count; 123 x->searches_per_step = 4; 124 } 125 126 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { 127 int len, ss_count = 1; 128 129 x->ss[0].mv.col = x->ss[0].mv.row = 0; 130 x->ss[0].offset = 0; 131 132 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 133 // Generate offsets for 8 search sites per step. 134 const MV ss_mvs[8] = { 135 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 136 {-len, -len}, {-len, len}, {len, -len}, {len, len} 137 }; 138 int i; 139 for (i = 0; i < 8; ++i) { 140 search_site *const ss = &x->ss[ss_count++]; 141 ss->mv = ss_mvs[i]; 142 ss->offset = ss->mv.row * stride + ss->mv.col; 143 } 144 } 145 146 x->ss_count = ss_count; 147 x->searches_per_step = 8; 148 } 149 150 /* 151 * To avoid the penalty for crossing cache-line read, preload the reference 152 * area in a small buffer, which is aligned to make sure there won't be crossing 153 * cache-line read while reading from this buffer. This reduced the cpu 154 * cycles spent on reading ref data in sub-pixel filter functions. 155 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 156 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 157 * could reduce the area. 158 */ 159 160 /* estimated cost of a motion vector (r,c) */ 161 #define MVC(r, c) \ 162 (mvcost ? \ 163 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 164 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 165 error_per_bit + 4096) >> 13 : 0) 166 167 168 // convert motion vector component to offset for svf calc 169 static INLINE int sp(int x) { 170 return (x & 7) << 1; 171 } 172 173 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c, 174 int offset) { 175 return &buf[(r >> 3) * stride + (c >> 3) - offset]; 176 } 177 178 /* returns subpixel variance error function */ 179 #define DIST(r, c) \ 180 vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \ 181 src_stride, &sse) 182 183 /* checks if (r, c) has better score than previous best */ 184 #define CHECK_BETTER(v, r, c) \ 185 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 186 thismse = (DIST(r, c)); \ 187 if ((v = MVC(r, c) + thismse) < besterr) { \ 188 besterr = v; \ 189 br = r; \ 190 bc = c; \ 191 *distortion = thismse; \ 192 *sse1 = sse; \ 193 } \ 194 } else { \ 195 v = INT_MAX; \ 196 } 197 198 #define FIRST_LEVEL_CHECKS \ 199 { \ 200 unsigned int left, right, up, down, diag; \ 201 CHECK_BETTER(left, tr, tc - hstep); \ 202 CHECK_BETTER(right, tr, tc + hstep); \ 203 CHECK_BETTER(up, tr - hstep, tc); \ 204 CHECK_BETTER(down, tr + hstep, tc); \ 205 whichdir = (left < right ? 0 : 1) + \ 206 (up < down ? 0 : 2); \ 207 switch (whichdir) { \ 208 case 0: \ 209 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 210 break; \ 211 case 1: \ 212 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 213 break; \ 214 case 2: \ 215 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 216 break; \ 217 case 3: \ 218 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 219 break; \ 220 } \ 221 } 222 223 #define SECOND_LEVEL_CHECKS \ 224 { \ 225 int kr, kc; \ 226 unsigned int second; \ 227 if (tr != br && tc != bc) { \ 228 kr = br - tr; \ 229 kc = bc - tc; \ 230 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 231 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 232 } else if (tr == br && tc != bc) { \ 233 kc = bc - tc; \ 234 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 235 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 236 switch (whichdir) { \ 237 case 0: \ 238 case 1: \ 239 CHECK_BETTER(second, tr + hstep, tc + kc); \ 240 break; \ 241 case 2: \ 242 case 3: \ 243 CHECK_BETTER(second, tr - hstep, tc + kc); \ 244 break; \ 245 } \ 246 } else if (tr != br && tc == bc) { \ 247 kr = br - tr; \ 248 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 249 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 250 switch (whichdir) { \ 251 case 0: \ 252 case 2: \ 253 CHECK_BETTER(second, tr + kr, tc + hstep); \ 254 break; \ 255 case 1: \ 256 case 3: \ 257 CHECK_BETTER(second, tr + kr, tc - hstep); \ 258 break; \ 259 } \ 260 } \ 261 } 262 263 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, 264 MV *bestmv, const MV *ref_mv, 265 int allow_hp, 266 int error_per_bit, 267 const vp9_variance_fn_ptr_t *vfp, 268 int forced_stop, 269 int iters_per_step, 270 int *mvjcost, int *mvcost[2], 271 int *distortion, 272 unsigned int *sse1) { 273 const uint8_t *z = x->plane[0].src.buf; 274 const int src_stride = x->plane[0].src.stride; 275 const MACROBLOCKD *xd = &x->e_mbd; 276 unsigned int besterr = INT_MAX; 277 unsigned int sse; 278 unsigned int whichdir; 279 int thismse; 280 unsigned int halfiters = iters_per_step; 281 unsigned int quarteriters = iters_per_step; 282 unsigned int eighthiters = iters_per_step; 283 284 const int y_stride = xd->plane[0].pre[0].stride; 285 const int offset = bestmv->row * y_stride + bestmv->col; 286 const uint8_t *y = xd->plane[0].pre[0].buf + offset; 287 288 int rr = ref_mv->row; 289 int rc = ref_mv->col; 290 int br = bestmv->row * 8; 291 int bc = bestmv->col * 8; 292 int hstep = 4; 293 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 294 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 295 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 296 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 297 298 int tr = br; 299 int tc = bc; 300 301 // central mv 302 bestmv->row *= 8; 303 bestmv->col *= 8; 304 305 // calculate central point error 306 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 307 *distortion = besterr; 308 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 309 310 // 1/2 pel 311 FIRST_LEVEL_CHECKS; 312 if (halfiters > 1) { 313 SECOND_LEVEL_CHECKS; 314 } 315 tr = br; 316 tc = bc; 317 318 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 319 if (forced_stop != 2) { 320 hstep >>= 1; 321 FIRST_LEVEL_CHECKS; 322 if (quarteriters > 1) { 323 SECOND_LEVEL_CHECKS; 324 } 325 tr = br; 326 tc = bc; 327 } 328 329 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 330 hstep >>= 1; 331 FIRST_LEVEL_CHECKS; 332 if (eighthiters > 1) { 333 SECOND_LEVEL_CHECKS; 334 } 335 tr = br; 336 tc = bc; 337 } 338 // These lines insure static analysis doesn't warn that 339 // tr and tc aren't used after the above point. 340 (void) tr; 341 (void) tc; 342 343 bestmv->row = br; 344 bestmv->col = bc; 345 346 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 347 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 348 return INT_MAX; 349 350 return besterr; 351 } 352 353 #undef DIST 354 /* returns subpixel variance error function */ 355 #define DIST(r, c) \ 356 vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \ 357 z, src_stride, &sse, second_pred) 358 359 int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, 360 MV *bestmv, const MV *ref_mv, 361 int allow_hp, 362 int error_per_bit, 363 const vp9_variance_fn_ptr_t *vfp, 364 int forced_stop, 365 int iters_per_step, 366 int *mvjcost, int *mvcost[2], 367 int *distortion, 368 unsigned int *sse1, 369 const uint8_t *second_pred, 370 int w, int h) { 371 const uint8_t *z = x->plane[0].src.buf; 372 const int src_stride = x->plane[0].src.stride; 373 const MACROBLOCKD *xd = &x->e_mbd; 374 unsigned int besterr = INT_MAX; 375 unsigned int sse; 376 unsigned int whichdir; 377 int thismse; 378 const unsigned int halfiters = iters_per_step; 379 const unsigned int quarteriters = iters_per_step; 380 const unsigned int eighthiters = iters_per_step; 381 382 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 383 const int y_stride = xd->plane[0].pre[0].stride; 384 const int offset = bestmv->row * y_stride + bestmv->col; 385 const uint8_t *y = xd->plane[0].pre[0].buf + offset; 386 387 int rr = ref_mv->row; 388 int rc = ref_mv->col; 389 int br = bestmv->row * 8; 390 int bc = bestmv->col * 8; 391 int hstep = 4; 392 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 393 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 394 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 395 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 396 397 int tr = br; 398 int tc = bc; 399 400 // central mv 401 bestmv->row *= 8; 402 bestmv->col *= 8; 403 404 // calculate central point error 405 // TODO(yunqingwang): central pointer error was already calculated in full- 406 // pixel search, and can be passed in this function. 407 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 408 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 409 *distortion = besterr; 410 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 411 412 // Each subsequent iteration checks at least one point in 413 // common with the last iteration could be 2 ( if diag selected) 414 // 1/2 pel 415 FIRST_LEVEL_CHECKS; 416 if (halfiters > 1) { 417 SECOND_LEVEL_CHECKS; 418 } 419 tr = br; 420 tc = bc; 421 422 // Each subsequent iteration checks at least one point in common with 423 // the last iteration could be 2 ( if diag selected) 1/4 pel 424 425 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 426 if (forced_stop != 2) { 427 hstep >>= 1; 428 FIRST_LEVEL_CHECKS; 429 if (quarteriters > 1) { 430 SECOND_LEVEL_CHECKS; 431 } 432 tr = br; 433 tc = bc; 434 } 435 436 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 437 hstep >>= 1; 438 FIRST_LEVEL_CHECKS; 439 if (eighthiters > 1) { 440 SECOND_LEVEL_CHECKS; 441 } 442 tr = br; 443 tc = bc; 444 } 445 // These lines insure static analysis doesn't warn that 446 // tr and tc aren't used after the above point. 447 (void) tr; 448 (void) tc; 449 450 bestmv->row = br; 451 bestmv->col = bc; 452 453 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 454 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 455 return INT_MAX; 456 457 return besterr; 458 } 459 460 #undef MVC 461 #undef PRE 462 #undef DIST 463 #undef CHECK_BETTER 464 465 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, 466 int range) { 467 return ((row - range) >= x->mv_row_min) & 468 ((row + range) <= x->mv_row_max) & 469 ((col - range) >= x->mv_col_min) & 470 ((col + range) <= x->mv_col_max); 471 } 472 473 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { 474 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) && 475 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max); 476 } 477 478 #define CHECK_BETTER \ 479 {\ 480 if (thissad < bestsad) {\ 481 if (use_mvcost) \ 482 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ 483 mvjsadcost, mvsadcost, sad_per_bit);\ 484 if (thissad < bestsad) {\ 485 bestsad = thissad;\ 486 best_site = i;\ 487 }\ 488 }\ 489 } 490 491 #define MAX_PATTERN_SCALES 11 492 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 493 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 494 495 // Generic pattern search function that searches over multiple scales. 496 // Each scale can have a different number of candidates and shape of 497 // candidates as indicated in the num_candidates and candidates arrays 498 // passed into this function 499 static int vp9_pattern_search(const MACROBLOCK *x, 500 MV *ref_mv, 501 int search_param, 502 int sad_per_bit, 503 int do_init_search, int do_refine, 504 const vp9_variance_fn_ptr_t *vfp, 505 int use_mvcost, 506 const MV *center_mv, MV *best_mv, 507 const int num_candidates[MAX_PATTERN_SCALES], 508 const MV candidates[MAX_PATTERN_SCALES] 509 [MAX_PATTERN_CANDIDATES]) { 510 const MACROBLOCKD *const xd = &x->e_mbd; 511 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 512 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 513 }; 514 int i, j, s, t; 515 const struct buf_2d *const what = &x->plane[0].src; 516 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 517 int br, bc; 518 int bestsad = INT_MAX; 519 int thissad; 520 int k = -1; 521 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 522 int best_init_s = search_param_to_steps[search_param]; 523 const int *const mvjsadcost = x->nmvjointsadcost; 524 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 525 526 // adjust ref_mv to make sure it is within MV range 527 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 528 br = ref_mv->row; 529 bc = ref_mv->col; 530 531 // Work out the start point for the search 532 bestsad = vfp->sdf(what->buf, what->stride, 533 get_buf_from_mv(in_what, ref_mv), in_what->stride, 534 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, 535 mvjsadcost, mvsadcost, sad_per_bit); 536 537 // Search all possible scales upto the search param around the center point 538 // pick the scale of the point that is best as the starting scale of 539 // further steps around it. 540 if (do_init_search) { 541 s = best_init_s; 542 best_init_s = -1; 543 for (t = 0; t <= s; ++t) { 544 int best_site = -1; 545 if (check_bounds(x, br, bc, 1 << t)) { 546 for (i = 0; i < num_candidates[t]; i++) { 547 const MV this_mv = {br + candidates[t][i].row, 548 bc + candidates[t][i].col}; 549 thissad = vfp->sdf(what->buf, what->stride, 550 get_buf_from_mv(in_what, &this_mv), 551 in_what->stride, bestsad); 552 CHECK_BETTER 553 } 554 } else { 555 for (i = 0; i < num_candidates[t]; i++) { 556 const MV this_mv = {br + candidates[t][i].row, 557 bc + candidates[t][i].col}; 558 if (!is_mv_in(x, &this_mv)) 559 continue; 560 thissad = vfp->sdf(what->buf, what->stride, 561 get_buf_from_mv(in_what, &this_mv), 562 in_what->stride, bestsad); 563 CHECK_BETTER 564 } 565 } 566 if (best_site == -1) { 567 continue; 568 } else { 569 best_init_s = t; 570 k = best_site; 571 } 572 } 573 if (best_init_s != -1) { 574 br += candidates[best_init_s][k].row; 575 bc += candidates[best_init_s][k].col; 576 } 577 } 578 579 // If the center point is still the best, just skip this and move to 580 // the refinement step. 581 if (best_init_s != -1) { 582 int best_site = -1; 583 s = best_init_s; 584 585 do { 586 // No need to search all 6 points the 1st time if initial search was used 587 if (!do_init_search || s != best_init_s) { 588 if (check_bounds(x, br, bc, 1 << s)) { 589 for (i = 0; i < num_candidates[s]; i++) { 590 const MV this_mv = {br + candidates[s][i].row, 591 bc + candidates[s][i].col}; 592 thissad = vfp->sdf(what->buf, what->stride, 593 get_buf_from_mv(in_what, &this_mv), 594 in_what->stride, bestsad); 595 CHECK_BETTER 596 } 597 } else { 598 for (i = 0; i < num_candidates[s]; i++) { 599 const MV this_mv = {br + candidates[s][i].row, 600 bc + candidates[s][i].col}; 601 if (!is_mv_in(x, &this_mv)) 602 continue; 603 thissad = vfp->sdf(what->buf, what->stride, 604 get_buf_from_mv(in_what, &this_mv), 605 in_what->stride, bestsad); 606 CHECK_BETTER 607 } 608 } 609 610 if (best_site == -1) { 611 continue; 612 } else { 613 br += candidates[s][best_site].row; 614 bc += candidates[s][best_site].col; 615 k = best_site; 616 } 617 } 618 619 do { 620 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 621 best_site = -1; 622 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 623 next_chkpts_indices[1] = k; 624 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 625 626 if (check_bounds(x, br, bc, 1 << s)) { 627 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 628 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 629 bc + candidates[s][next_chkpts_indices[i]].col}; 630 thissad = vfp->sdf(what->buf, what->stride, 631 get_buf_from_mv(in_what, &this_mv), 632 in_what->stride, bestsad); 633 CHECK_BETTER 634 } 635 } else { 636 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 637 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 638 bc + candidates[s][next_chkpts_indices[i]].col}; 639 if (!is_mv_in(x, &this_mv)) 640 continue; 641 thissad = vfp->sdf(what->buf, what->stride, 642 get_buf_from_mv(in_what, &this_mv), 643 in_what->stride, bestsad); 644 CHECK_BETTER 645 } 646 } 647 648 if (best_site != -1) { 649 k = next_chkpts_indices[best_site]; 650 br += candidates[s][k].row; 651 bc += candidates[s][k].col; 652 } 653 } while (best_site != -1); 654 } while (s--); 655 } 656 657 // Check 4 1-away neighbors if do_refine is true. 658 // For most well-designed schemes do_refine will not be necessary. 659 if (do_refine) { 660 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; 661 662 for (j = 0; j < 16; j++) { 663 int best_site = -1; 664 if (check_bounds(x, br, bc, 1)) { 665 for (i = 0; i < 4; i++) { 666 const MV this_mv = {br + neighbors[i].row, 667 bc + neighbors[i].col}; 668 thissad = vfp->sdf(what->buf, what->stride, 669 get_buf_from_mv(in_what, &this_mv), 670 in_what->stride, bestsad); 671 CHECK_BETTER 672 } 673 } else { 674 for (i = 0; i < 4; i++) { 675 const MV this_mv = {br + neighbors[i].row, 676 bc + neighbors[i].col}; 677 if (!is_mv_in(x, &this_mv)) 678 continue; 679 thissad = vfp->sdf(what->buf, what->stride, 680 get_buf_from_mv(in_what, &this_mv), 681 in_what->stride, bestsad); 682 CHECK_BETTER 683 } 684 } 685 686 if (best_site == -1) { 687 break; 688 } else { 689 br += neighbors[best_site].row; 690 bc += neighbors[best_site].col; 691 } 692 } 693 } 694 695 best_mv->row = br; 696 best_mv->col = bc; 697 698 return bestsad; 699 } 700 701 int vp9_get_mvpred_var(const MACROBLOCK *x, 702 const MV *best_mv, const MV *center_mv, 703 const vp9_variance_fn_ptr_t *vfp, 704 int use_mvcost) { 705 const MACROBLOCKD *const xd = &x->e_mbd; 706 const struct buf_2d *const what = &x->plane[0].src; 707 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 708 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 709 unsigned int unused; 710 711 return vfp->vf(what->buf, what->stride, 712 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + 713 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 714 x->mvcost, x->errorperbit) : 0); 715 } 716 717 int vp9_get_mvpred_av_var(const MACROBLOCK *x, 718 const MV *best_mv, const MV *center_mv, 719 const uint8_t *second_pred, 720 const vp9_variance_fn_ptr_t *vfp, 721 int use_mvcost) { 722 const MACROBLOCKD *const xd = &x->e_mbd; 723 const struct buf_2d *const what = &x->plane[0].src; 724 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 725 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 726 unsigned int unused; 727 728 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 729 what->buf, what->stride, &unused, second_pred) + 730 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 731 x->mvcost, x->errorperbit) : 0); 732 } 733 734 int vp9_hex_search(const MACROBLOCK *x, 735 MV *ref_mv, 736 int search_param, 737 int sad_per_bit, 738 int do_init_search, 739 const vp9_variance_fn_ptr_t *vfp, 740 int use_mvcost, 741 const MV *center_mv, MV *best_mv) { 742 // First scale has 8-closest points, the rest have 6 points in hex shape 743 // at increasing scales 744 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 745 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 746 }; 747 // Note that the largest candidate step at each scale is 2^scale 748 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 749 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 750 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 751 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 752 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 753 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 754 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 755 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 756 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 757 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 758 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 759 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 760 { -1024, 0}}, 761 }; 762 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 763 do_init_search, 0, vfp, use_mvcost, 764 center_mv, best_mv, 765 hex_num_candidates, hex_candidates); 766 } 767 768 int vp9_bigdia_search(const MACROBLOCK *x, 769 MV *ref_mv, 770 int search_param, 771 int sad_per_bit, 772 int do_init_search, 773 const vp9_variance_fn_ptr_t *vfp, 774 int use_mvcost, 775 const MV *center_mv, 776 MV *best_mv) { 777 // First scale has 4-closest points, the rest have 8 points in diamond 778 // shape at increasing scales 779 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 780 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 781 }; 782 // Note that the largest candidate step at each scale is 2^scale 783 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 784 [MAX_PATTERN_CANDIDATES] = { 785 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 786 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 787 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 788 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 789 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 790 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 791 {-16, 16}, {-32, 0}}, 792 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 793 {-32, 32}, {-64, 0}}, 794 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 795 {-64, 64}, {-128, 0}}, 796 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 797 {-128, 128}, {-256, 0}}, 798 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 799 {-256, 256}, {-512, 0}}, 800 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 801 {-512, 512}, {-1024, 0}}, 802 }; 803 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 804 do_init_search, 0, vfp, use_mvcost, 805 center_mv, best_mv, 806 bigdia_num_candidates, bigdia_candidates); 807 } 808 809 int vp9_square_search(const MACROBLOCK *x, 810 MV *ref_mv, 811 int search_param, 812 int sad_per_bit, 813 int do_init_search, 814 const vp9_variance_fn_ptr_t *vfp, 815 int use_mvcost, 816 const MV *center_mv, 817 MV *best_mv) { 818 // All scales have 8 closest points in square shape 819 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 820 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 821 }; 822 // Note that the largest candidate step at each scale is 2^scale 823 static const MV square_candidates[MAX_PATTERN_SCALES] 824 [MAX_PATTERN_CANDIDATES] = { 825 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 826 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 827 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 828 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 829 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 830 {-16, 16}, {-16, 0}}, 831 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 832 {-32, 32}, {-32, 0}}, 833 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 834 {-64, 64}, {-64, 0}}, 835 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 836 {-128, 128}, {-128, 0}}, 837 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 838 {-256, 256}, {-256, 0}}, 839 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 840 {-512, 512}, {-512, 0}}, 841 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 842 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 843 }; 844 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 845 do_init_search, 0, vfp, use_mvcost, 846 center_mv, best_mv, 847 square_num_candidates, square_candidates); 848 } 849 850 int vp9_fast_hex_search(const MACROBLOCK *x, 851 MV *ref_mv, 852 int search_param, 853 int sad_per_bit, 854 int do_init_search, // must be zero for fast_hex 855 const vp9_variance_fn_ptr_t *vfp, 856 int use_mvcost, 857 const MV *center_mv, 858 MV *best_mv) { 859 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 860 sad_per_bit, do_init_search, vfp, use_mvcost, 861 center_mv, best_mv); 862 } 863 864 int vp9_fast_dia_search(const MACROBLOCK *x, 865 MV *ref_mv, 866 int search_param, 867 int sad_per_bit, 868 int do_init_search, 869 const vp9_variance_fn_ptr_t *vfp, 870 int use_mvcost, 871 const MV *center_mv, 872 MV *best_mv) { 873 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 874 sad_per_bit, do_init_search, vfp, use_mvcost, 875 center_mv, best_mv); 876 } 877 878 #undef CHECK_BETTER 879 880 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, 881 int search_param, int sad_per_bit, int *num00, 882 const vp9_variance_fn_ptr_t *fn_ptr, 883 int *mvjcost, int *mvcost[2], 884 const MV *center_mv) { 885 const MACROBLOCKD *const xd = &x->e_mbd; 886 const uint8_t *what = x->plane[0].src.buf; 887 const int what_stride = x->plane[0].src.stride; 888 const uint8_t *in_what; 889 const int in_what_stride = xd->plane[0].pre[0].stride; 890 891 unsigned int bestsad = INT_MAX; 892 int ref_row, ref_col; 893 894 unsigned int thissad; 895 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 896 897 const int *mvjsadcost = x->nmvjointsadcost; 898 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 899 900 int tr, tc; 901 int best_tr = 0; 902 int best_tc = 0; 903 int range = 64; 904 905 int start_col, end_col; 906 int start_row, end_row; 907 int i; 908 909 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 910 ref_row = ref_mv->row; 911 ref_col = ref_mv->col; 912 *num00 = 11; 913 best_mv->row = ref_row; 914 best_mv->col = ref_col; 915 916 // Work out the start point for the search 917 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 918 919 // Check the starting position 920 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 921 + mvsad_err_cost(best_mv, &fcenter_mv, 922 mvjsadcost, mvsadcost, sad_per_bit); 923 924 start_row = MAX(-range, x->mv_row_min - ref_row); 925 start_col = MAX(-range, x->mv_col_min - ref_col); 926 end_row = MIN(range, x->mv_row_max - ref_row); 927 end_col = MIN(range, x->mv_col_max - ref_col); 928 929 for (tr = start_row; tr <= end_row; ++tr) { 930 for (tc = start_col; tc <= end_col; tc += 4) { 931 if ((tc + 3) <= end_col) { 932 unsigned int sad_array[4]; 933 unsigned char const *addr_ref[4]; 934 for (i = 0; i < 4; ++i) 935 addr_ref[i] = in_what + tr * in_what_stride + tc + i; 936 937 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array); 938 939 for (i = 0; i < 4; ++i) { 940 if (sad_array[i] < bestsad) { 941 const MV this_mv = {ref_row + tr, ref_col + tc + i}; 942 thissad = sad_array[i] + 943 mvsad_err_cost(&this_mv, &fcenter_mv, 944 mvjsadcost, mvsadcost, sad_per_bit); 945 if (thissad < bestsad) { 946 bestsad = thissad; 947 best_tr = tr; 948 best_tc = tc + i; 949 } 950 } 951 } 952 } else { 953 for (i = 0; i < end_col - tc; ++i) { 954 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i; 955 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 956 bestsad); 957 958 if (thissad < bestsad) { 959 const MV this_mv = {ref_row + tr, ref_col + tc + i}; 960 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 961 mvjsadcost, mvsadcost, sad_per_bit); 962 963 if (thissad < bestsad) { 964 bestsad = thissad; 965 best_tr = tr; 966 best_tc = tc + i; 967 } 968 } 969 } 970 } 971 } 972 } 973 best_mv->row += best_tr; 974 best_mv->col += best_tc; 975 return bestsad; 976 } 977 978 int vp9_diamond_search_sad_c(const MACROBLOCK *x, 979 MV *ref_mv, MV *best_mv, 980 int search_param, int sad_per_bit, int *num00, 981 const vp9_variance_fn_ptr_t *fn_ptr, 982 int *mvjcost, int *mvcost[2], 983 const MV *center_mv) { 984 int i, j, step; 985 986 const MACROBLOCKD *const xd = &x->e_mbd; 987 const uint8_t *what = x->plane[0].src.buf; 988 const int what_stride = x->plane[0].src.stride; 989 const uint8_t *in_what; 990 const int in_what_stride = xd->plane[0].pre[0].stride; 991 const uint8_t *best_address; 992 993 int bestsad = INT_MAX; 994 int best_site = 0; 995 int last_site = 0; 996 997 int ref_row, ref_col; 998 999 // search_param determines the length of the initial step and hence the number 1000 // of iterations 1001 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = 1002 // (MAX_FIRST_STEP/4) pel... etc. 1003 const search_site *const ss = &x->ss[search_param * x->searches_per_step]; 1004 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1005 1006 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1007 1008 const int *mvjsadcost = x->nmvjointsadcost; 1009 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1010 1011 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1012 ref_row = ref_mv->row; 1013 ref_col = ref_mv->col; 1014 *num00 = 0; 1015 best_mv->row = ref_row; 1016 best_mv->col = ref_col; 1017 1018 // Work out the start point for the search 1019 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1020 best_address = in_what; 1021 1022 // Check the starting position 1023 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1024 + mvsad_err_cost(best_mv, &fcenter_mv, 1025 mvjsadcost, mvsadcost, sad_per_bit); 1026 1027 i = 1; 1028 1029 for (step = 0; step < tot_steps; step++) { 1030 for (j = 0; j < x->searches_per_step; j++) { 1031 const MV this_mv = {best_mv->row + ss[i].mv.row, 1032 best_mv->col + ss[i].mv.col}; 1033 if (is_mv_in(x, &this_mv)) { 1034 const uint8_t *const check_here = ss[i].offset + best_address; 1035 int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1036 bestsad); 1037 1038 if (thissad < bestsad) { 1039 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1040 mvjsadcost, mvsadcost, sad_per_bit); 1041 1042 if (thissad < bestsad) { 1043 bestsad = thissad; 1044 best_site = i; 1045 } 1046 } 1047 } 1048 1049 i++; 1050 } 1051 1052 if (best_site != last_site) { 1053 best_mv->row += ss[best_site].mv.row; 1054 best_mv->col += ss[best_site].mv.col; 1055 best_address += ss[best_site].offset; 1056 last_site = best_site; 1057 #if defined(NEW_DIAMOND_SEARCH) 1058 while (1) { 1059 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 1060 best_mv->col + ss[best_site].mv.col}; 1061 if (is_mv_in(x, &this_mv)) { 1062 const uint8_t *const check_here = ss[best_site].offset + best_address; 1063 int thissad = fn_ptr->sdf(what, what_stride, check_here, 1064 in_what_stride, bestsad); 1065 if (thissad < bestsad) { 1066 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1067 mvjsadcost, mvsadcost, sad_per_bit); 1068 if (thissad < bestsad) { 1069 bestsad = thissad; 1070 best_mv->row += ss[best_site].mv.row; 1071 best_mv->col += ss[best_site].mv.col; 1072 best_address += ss[best_site].offset; 1073 continue; 1074 } 1075 } 1076 } 1077 break; 1078 }; 1079 #endif 1080 } else if (best_address == in_what) { 1081 (*num00)++; 1082 } 1083 } 1084 return bestsad; 1085 } 1086 1087 int vp9_diamond_search_sadx4(const MACROBLOCK *x, 1088 MV *ref_mv, MV *best_mv, int search_param, 1089 int sad_per_bit, int *num00, 1090 const vp9_variance_fn_ptr_t *fn_ptr, 1091 int *mvjcost, int *mvcost[2], 1092 const MV *center_mv) { 1093 int i, j, step; 1094 1095 const MACROBLOCKD *const xd = &x->e_mbd; 1096 uint8_t *what = x->plane[0].src.buf; 1097 const int what_stride = x->plane[0].src.stride; 1098 const uint8_t *in_what; 1099 const int in_what_stride = xd->plane[0].pre[0].stride; 1100 const uint8_t *best_address; 1101 1102 unsigned int bestsad = INT_MAX; 1103 int best_site = 0; 1104 int last_site = 0; 1105 1106 int ref_row; 1107 int ref_col; 1108 1109 // search_param determines the length of the initial step and hence the number 1110 // of iterations. 1111 // 0 = initial step (MAX_FIRST_STEP) pel 1112 // 1 = (MAX_FIRST_STEP/2) pel, 1113 // 2 = (MAX_FIRST_STEP/4) pel... 1114 const search_site *ss = &x->ss[search_param * x->searches_per_step]; 1115 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1116 1117 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1118 1119 const int *mvjsadcost = x->nmvjointsadcost; 1120 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1121 1122 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1123 ref_row = ref_mv->row; 1124 ref_col = ref_mv->col; 1125 *num00 = 0; 1126 best_mv->row = ref_row; 1127 best_mv->col = ref_col; 1128 1129 // Work out the start point for the search 1130 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1131 best_address = in_what; 1132 1133 // Check the starting position 1134 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1135 + mvsad_err_cost(best_mv, &fcenter_mv, 1136 mvjsadcost, mvsadcost, sad_per_bit); 1137 1138 i = 1; 1139 1140 for (step = 0; step < tot_steps; step++) { 1141 int all_in = 1, t; 1142 1143 // All_in is true if every one of the points we are checking are within 1144 // the bounds of the image. 1145 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); 1146 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); 1147 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); 1148 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); 1149 1150 // If all the pixels are within the bounds we don't check whether the 1151 // search point is valid in this loop, otherwise we check each point 1152 // for validity.. 1153 if (all_in) { 1154 unsigned int sad_array[4]; 1155 1156 for (j = 0; j < x->searches_per_step; j += 4) { 1157 unsigned char const *block_offset[4]; 1158 1159 for (t = 0; t < 4; t++) 1160 block_offset[t] = ss[i + t].offset + best_address; 1161 1162 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1163 sad_array); 1164 1165 for (t = 0; t < 4; t++, i++) { 1166 if (sad_array[t] < bestsad) { 1167 const MV this_mv = {best_mv->row + ss[i].mv.row, 1168 best_mv->col + ss[i].mv.col}; 1169 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, 1170 mvjsadcost, mvsadcost, sad_per_bit); 1171 1172 if (sad_array[t] < bestsad) { 1173 bestsad = sad_array[t]; 1174 best_site = i; 1175 } 1176 } 1177 } 1178 } 1179 } else { 1180 for (j = 0; j < x->searches_per_step; j++) { 1181 // Trap illegal vectors 1182 const MV this_mv = {best_mv->row + ss[i].mv.row, 1183 best_mv->col + ss[i].mv.col}; 1184 1185 if (is_mv_in(x, &this_mv)) { 1186 const uint8_t *const check_here = ss[i].offset + best_address; 1187 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1188 in_what_stride, bestsad); 1189 1190 if (thissad < bestsad) { 1191 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1192 mvjsadcost, mvsadcost, sad_per_bit); 1193 1194 if (thissad < bestsad) { 1195 bestsad = thissad; 1196 best_site = i; 1197 } 1198 } 1199 } 1200 i++; 1201 } 1202 } 1203 if (best_site != last_site) { 1204 best_mv->row += ss[best_site].mv.row; 1205 best_mv->col += ss[best_site].mv.col; 1206 best_address += ss[best_site].offset; 1207 last_site = best_site; 1208 #if defined(NEW_DIAMOND_SEARCH) 1209 while (1) { 1210 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 1211 best_mv->col + ss[best_site].mv.col}; 1212 if (is_mv_in(x, &this_mv)) { 1213 const uint8_t *const check_here = ss[best_site].offset + best_address; 1214 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1215 in_what_stride, bestsad); 1216 if (thissad < bestsad) { 1217 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1218 mvjsadcost, mvsadcost, sad_per_bit); 1219 if (thissad < bestsad) { 1220 bestsad = thissad; 1221 best_mv->row += ss[best_site].mv.row; 1222 best_mv->col += ss[best_site].mv.col; 1223 best_address += ss[best_site].offset; 1224 continue; 1225 } 1226 } 1227 } 1228 break; 1229 }; 1230 #endif 1231 } else if (best_address == in_what) { 1232 (*num00)++; 1233 } 1234 } 1235 return bestsad; 1236 } 1237 1238 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1239 point as the best match, we will do a final 1-away diamond 1240 refining search */ 1241 1242 int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, 1243 MV *mvp_full, int step_param, 1244 int sadpb, int further_steps, int do_refine, 1245 const vp9_variance_fn_ptr_t *fn_ptr, 1246 const MV *ref_mv, MV *dst_mv) { 1247 MV temp_mv; 1248 int thissme, n, num00 = 0; 1249 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1250 step_param, sadpb, &n, 1251 fn_ptr, x->nmvjointcost, 1252 x->mvcost, ref_mv); 1253 if (bestsme < INT_MAX) 1254 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1255 *dst_mv = temp_mv; 1256 1257 // If there won't be more n-step search, check to see if refining search is 1258 // needed. 1259 if (n > further_steps) 1260 do_refine = 0; 1261 1262 while (n < further_steps) { 1263 ++n; 1264 1265 if (num00) { 1266 num00--; 1267 } else { 1268 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1269 step_param + n, sadpb, &num00, 1270 fn_ptr, x->nmvjointcost, x->mvcost, 1271 ref_mv); 1272 if (thissme < INT_MAX) 1273 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1274 1275 // check to see if refining search is needed. 1276 if (num00 > further_steps - n) 1277 do_refine = 0; 1278 1279 if (thissme < bestsme) { 1280 bestsme = thissme; 1281 *dst_mv = temp_mv; 1282 } 1283 } 1284 } 1285 1286 // final 1-away diamond refining search 1287 if (do_refine) { 1288 const int search_range = 8; 1289 MV best_mv = *dst_mv; 1290 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1291 fn_ptr, x->nmvjointcost, x->mvcost, 1292 ref_mv); 1293 if (thissme < INT_MAX) 1294 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 1295 if (thissme < bestsme) { 1296 bestsme = thissme; 1297 *dst_mv = best_mv; 1298 } 1299 } 1300 return bestsme; 1301 } 1302 1303 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, 1304 int sad_per_bit, int distance, 1305 const vp9_variance_fn_ptr_t *fn_ptr, 1306 int *mvjcost, int *mvcost[2], 1307 const MV *center_mv, MV *best_mv) { 1308 int r, c; 1309 const MACROBLOCKD *const xd = &x->e_mbd; 1310 const struct buf_2d *const what = &x->plane[0].src; 1311 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1312 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); 1313 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); 1314 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); 1315 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); 1316 const int *mvjsadcost = x->nmvjointsadcost; 1317 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1318 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1319 int best_sad = fn_ptr->sdf(what->buf, what->stride, 1320 get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + 1321 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); 1322 *best_mv = *ref_mv; 1323 1324 for (r = row_min; r < row_max; ++r) { 1325 for (c = col_min; c < col_max; ++c) { 1326 const MV mv = {r, c}; 1327 const int sad = fn_ptr->sdf(what->buf, what->stride, 1328 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + 1329 mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, 1330 sad_per_bit); 1331 1332 if (sad < best_sad) { 1333 best_sad = sad; 1334 *best_mv = mv; 1335 } 1336 } 1337 } 1338 return best_sad; 1339 } 1340 1341 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, 1342 int sad_per_bit, int distance, 1343 const vp9_variance_fn_ptr_t *fn_ptr, 1344 int *mvjcost, int *mvcost[2], 1345 const MV *center_mv, MV *best_mv) { 1346 const MACROBLOCKD *const xd = &x->e_mbd; 1347 const uint8_t *const what = x->plane[0].src.buf; 1348 const int what_stride = x->plane[0].src.stride; 1349 const uint8_t *const in_what = xd->plane[0].pre[0].buf; 1350 const int in_what_stride = xd->plane[0].pre[0].stride; 1351 MV this_mv; 1352 unsigned int bestsad = INT_MAX; 1353 int r, c; 1354 unsigned int thissad; 1355 int ref_row = ref_mv->row; 1356 int ref_col = ref_mv->col; 1357 1358 // Apply further limits to prevent us looking using vectors that stretch 1359 // beyond the UMV border 1360 const int row_min = MAX(ref_row - distance, x->mv_row_min); 1361 const int row_max = MIN(ref_row + distance, x->mv_row_max); 1362 const int col_min = MAX(ref_col - distance, x->mv_col_min); 1363 const int col_max = MIN(ref_col + distance, x->mv_col_max); 1364 unsigned int sad_array[3]; 1365 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1366 const int *mvjsadcost = x->nmvjointsadcost; 1367 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1368 1369 // Work out the mid point for the search 1370 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; 1371 1372 best_mv->row = ref_row; 1373 best_mv->col = ref_col; 1374 1375 // Baseline value at the centre 1376 bestsad = fn_ptr->sdf(what, what_stride, 1377 bestaddress, in_what_stride, 0x7fffffff) 1378 + mvsad_err_cost(best_mv, &fcenter_mv, 1379 mvjsadcost, mvsadcost, sad_per_bit); 1380 1381 for (r = row_min; r < row_max; r++) { 1382 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; 1383 this_mv.row = r; 1384 c = col_min; 1385 1386 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1387 int i; 1388 1389 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1390 1391 for (i = 0; i < 3; i++) { 1392 thissad = sad_array[i]; 1393 1394 if (thissad < bestsad) { 1395 this_mv.col = c; 1396 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1397 mvjsadcost, mvsadcost, sad_per_bit); 1398 1399 if (thissad < bestsad) { 1400 bestsad = thissad; 1401 best_mv->row = r; 1402 best_mv->col = c; 1403 } 1404 } 1405 check_here++; 1406 c++; 1407 } 1408 } 1409 1410 while (c < col_max) { 1411 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1412 bestsad); 1413 1414 if (thissad < bestsad) { 1415 this_mv.col = c; 1416 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1417 mvjsadcost, mvsadcost, sad_per_bit); 1418 1419 if (thissad < bestsad) { 1420 bestsad = thissad; 1421 best_mv->row = r; 1422 best_mv->col = c; 1423 } 1424 } 1425 1426 check_here++; 1427 c++; 1428 } 1429 } 1430 return bestsad; 1431 } 1432 1433 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, 1434 int sad_per_bit, int distance, 1435 const vp9_variance_fn_ptr_t *fn_ptr, 1436 int *mvjcost, int *mvcost[2], 1437 const MV *center_mv, MV *best_mv) { 1438 const MACROBLOCKD *const xd = &x->e_mbd; 1439 const uint8_t *const what = x->plane[0].src.buf; 1440 const int what_stride = x->plane[0].src.stride; 1441 const uint8_t *const in_what = xd->plane[0].pre[0].buf; 1442 const int in_what_stride = xd->plane[0].pre[0].stride; 1443 MV this_mv; 1444 unsigned int bestsad = INT_MAX; 1445 int r, c; 1446 int ref_row = ref_mv->row; 1447 int ref_col = ref_mv->col; 1448 1449 // Apply further limits to prevent us looking using vectors that stretch 1450 // beyond the UMV border 1451 const int row_min = MAX(ref_row - distance, x->mv_row_min); 1452 const int row_max = MIN(ref_row + distance, x->mv_row_max); 1453 const int col_min = MAX(ref_col - distance, x->mv_col_min); 1454 const int col_max = MIN(ref_col + distance, x->mv_col_max); 1455 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); 1456 unsigned int sad_array[3]; 1457 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1458 1459 const int *mvjsadcost = x->nmvjointsadcost; 1460 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1461 1462 // Work out the mid point for the search 1463 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; 1464 1465 best_mv->row = ref_row; 1466 best_mv->col = ref_col; 1467 1468 // Baseline value at the center 1469 bestsad = fn_ptr->sdf(what, what_stride, 1470 bestaddress, in_what_stride, 0x7fffffff) 1471 + mvsad_err_cost(best_mv, &fcenter_mv, 1472 mvjsadcost, mvsadcost, sad_per_bit); 1473 1474 for (r = row_min; r < row_max; r++) { 1475 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; 1476 this_mv.row = r; 1477 c = col_min; 1478 1479 while ((c + 7) < col_max) { 1480 int i; 1481 1482 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1483 1484 for (i = 0; i < 8; i++) { 1485 unsigned int thissad = (unsigned int)sad_array8[i]; 1486 1487 if (thissad < bestsad) { 1488 this_mv.col = c; 1489 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1490 mvjsadcost, mvsadcost, sad_per_bit); 1491 1492 if (thissad < bestsad) { 1493 bestsad = thissad; 1494 best_mv->row = r; 1495 best_mv->col = c; 1496 } 1497 } 1498 1499 check_here++; 1500 c++; 1501 } 1502 } 1503 1504 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1505 int i; 1506 1507 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1508 1509 for (i = 0; i < 3; i++) { 1510 unsigned int thissad = sad_array[i]; 1511 1512 if (thissad < bestsad) { 1513 this_mv.col = c; 1514 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1515 mvjsadcost, mvsadcost, sad_per_bit); 1516 1517 if (thissad < bestsad) { 1518 bestsad = thissad; 1519 best_mv->row = r; 1520 best_mv->col = c; 1521 } 1522 } 1523 1524 check_here++; 1525 c++; 1526 } 1527 } 1528 1529 while (c < col_max) { 1530 unsigned int thissad = fn_ptr->sdf(what, what_stride, 1531 check_here, in_what_stride, bestsad); 1532 1533 if (thissad < bestsad) { 1534 this_mv.col = c; 1535 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1536 mvjsadcost, mvsadcost, sad_per_bit); 1537 1538 if (thissad < bestsad) { 1539 bestsad = thissad; 1540 best_mv->row = r; 1541 best_mv->col = c; 1542 } 1543 } 1544 1545 check_here++; 1546 c++; 1547 } 1548 } 1549 return bestsad; 1550 } 1551 1552 int vp9_refining_search_sad_c(const MACROBLOCK *x, 1553 MV *ref_mv, int error_per_bit, 1554 int search_range, 1555 const vp9_variance_fn_ptr_t *fn_ptr, 1556 int *mvjcost, int *mvcost[2], 1557 const MV *center_mv) { 1558 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1559 const MACROBLOCKD *const xd = &x->e_mbd; 1560 const struct buf_2d *const what = &x->plane[0].src; 1561 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1562 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1563 const int *mvjsadcost = x->nmvjointsadcost; 1564 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1565 1566 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 1567 get_buf_from_mv(in_what, ref_mv), 1568 in_what->stride, 0x7fffffff) + 1569 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1570 int i, j; 1571 1572 for (i = 0; i < search_range; i++) { 1573 int best_site = -1; 1574 1575 for (j = 0; j < 4; j++) { 1576 const MV mv = {ref_mv->row + neighbors[j].row, 1577 ref_mv->col + neighbors[j].col}; 1578 if (is_mv_in(x, &mv)) { 1579 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1580 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); 1581 if (sad < best_sad) { 1582 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, 1583 error_per_bit); 1584 if (sad < best_sad) { 1585 best_sad = sad; 1586 best_site = j; 1587 } 1588 } 1589 } 1590 } 1591 1592 if (best_site == -1) { 1593 break; 1594 } else { 1595 ref_mv->row += neighbors[best_site].row; 1596 ref_mv->col += neighbors[best_site].col; 1597 } 1598 } 1599 return best_sad; 1600 } 1601 1602 int vp9_refining_search_sadx4(const MACROBLOCK *x, 1603 MV *ref_mv, int error_per_bit, 1604 int search_range, 1605 const vp9_variance_fn_ptr_t *fn_ptr, 1606 int *mvjcost, int *mvcost[2], 1607 const MV *center_mv) { 1608 const MACROBLOCKD *const xd = &x->e_mbd; 1609 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1610 int i, j; 1611 1612 const int what_stride = x->plane[0].src.stride; 1613 const int in_what_stride = xd->plane[0].pre[0].stride; 1614 const uint8_t *what = x->plane[0].src.buf; 1615 const uint8_t *best_address = xd->plane[0].pre[0].buf + 1616 (ref_mv->row * xd->plane[0].pre[0].stride) + 1617 ref_mv->col; 1618 1619 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1620 1621 const int *mvjsadcost = x->nmvjointsadcost; 1622 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1623 1624 unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, 1625 in_what_stride, 0x7fffffff) + 1626 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1627 1628 for (i = 0; i < search_range; i++) { 1629 int best_site = -1; 1630 int all_in = ((ref_mv->row - 1) > x->mv_row_min) & 1631 ((ref_mv->row + 1) < x->mv_row_max) & 1632 ((ref_mv->col - 1) > x->mv_col_min) & 1633 ((ref_mv->col + 1) < x->mv_col_max); 1634 1635 if (all_in) { 1636 unsigned int sad_array[4]; 1637 uint8_t const *block_offset[4] = { 1638 best_address - in_what_stride, 1639 best_address - 1, 1640 best_address + 1, 1641 best_address + in_what_stride 1642 }; 1643 1644 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1645 sad_array); 1646 1647 for (j = 0; j < 4; j++) { 1648 if (sad_array[j] < bestsad) { 1649 const MV this_mv = {ref_mv->row + neighbors[j].row, 1650 ref_mv->col + neighbors[j].col}; 1651 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, 1652 mvjsadcost, mvsadcost, error_per_bit); 1653 1654 if (sad_array[j] < bestsad) { 1655 bestsad = sad_array[j]; 1656 best_site = j; 1657 } 1658 } 1659 } 1660 } else { 1661 for (j = 0; j < 4; j++) { 1662 const MV this_mv = {ref_mv->row + neighbors[j].row, 1663 ref_mv->col + neighbors[j].col}; 1664 1665 if (is_mv_in(x, &this_mv)) { 1666 const uint8_t *check_here = neighbors[j].row * in_what_stride + 1667 neighbors[j].col + best_address; 1668 unsigned int thissad = fn_ptr->sdf(what, what_stride, 1669 check_here, in_what_stride, 1670 bestsad); 1671 1672 if (thissad < bestsad) { 1673 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1674 mvjsadcost, mvsadcost, error_per_bit); 1675 1676 if (thissad < bestsad) { 1677 bestsad = thissad; 1678 best_site = j; 1679 } 1680 } 1681 } 1682 } 1683 } 1684 1685 if (best_site == -1) { 1686 break; 1687 } else { 1688 ref_mv->row += neighbors[best_site].row; 1689 ref_mv->col += neighbors[best_site].col; 1690 best_address += (neighbors[best_site].row) * in_what_stride + 1691 neighbors[best_site].col; 1692 } 1693 } 1694 1695 return bestsad; 1696 } 1697 1698 // This function is called when we do joint motion search in comp_inter_inter 1699 // mode. 1700 int vp9_refining_search_8p_c(const MACROBLOCK *x, 1701 MV *ref_mv, int error_per_bit, 1702 int search_range, 1703 const vp9_variance_fn_ptr_t *fn_ptr, 1704 int *mvjcost, int *mvcost[2], 1705 const MV *center_mv, 1706 const uint8_t *second_pred, int w, int h) { 1707 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 1708 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 1709 const MACROBLOCKD *const xd = &x->e_mbd; 1710 const struct buf_2d *const what = &x->plane[0].src; 1711 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1712 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1713 const int *mvjsadcost = x->nmvjointsadcost; 1714 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1715 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, 1716 get_buf_from_mv(in_what, ref_mv), in_what->stride, 1717 second_pred, 0x7fffffff) + 1718 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1719 int i, j; 1720 1721 for (i = 0; i < search_range; ++i) { 1722 int best_site = -1; 1723 1724 for (j = 0; j < 8; ++j) { 1725 const MV mv = {ref_mv->row + neighbors[j].row, 1726 ref_mv->col + neighbors[j].col}; 1727 1728 if (is_mv_in(x, &mv)) { 1729 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, 1730 get_buf_from_mv(in_what, &mv), in_what->stride, 1731 second_pred, best_sad); 1732 if (sad < best_sad) { 1733 sad += mvsad_err_cost(&mv, &fcenter_mv, 1734 mvjsadcost, mvsadcost, error_per_bit); 1735 if (sad < best_sad) { 1736 best_sad = sad; 1737 best_site = j; 1738 } 1739 } 1740 } 1741 } 1742 1743 if (best_site == -1) { 1744 break; 1745 } else { 1746 ref_mv->row += neighbors[best_site].row; 1747 ref_mv->col += neighbors[best_site].col; 1748 } 1749 } 1750 return best_sad; 1751 } 1752