1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vpx_config.h" 16 17 #include "vpx_mem/vpx_mem.h" 18 19 #include "vp9/common/vp9_findnearmv.h" 20 #include "vp9/common/vp9_common.h" 21 22 #include "vp9/encoder/vp9_onyx_int.h" 23 #include "vp9/encoder/vp9_mcomp.h" 24 25 // #define NEW_DIAMOND_SEARCH 26 27 void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) { 28 const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 29 const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 30 const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 31 const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 32 33 // Get intersection of UMV window and valid MV window to reduce # of checks 34 // in diamond search. 35 if (x->mv_col_min < col_min) 36 x->mv_col_min = col_min; 37 if (x->mv_col_max > col_max) 38 x->mv_col_max = col_max; 39 if (x->mv_row_min < row_min) 40 x->mv_row_min = row_min; 41 if (x->mv_row_max > row_max) 42 x->mv_row_max = row_max; 43 } 44 45 int vp9_init_search_range(VP9_COMP *cpi, int size) { 46 int sr = 0; 47 48 // Minimum search size no matter what the passed in value. 49 size = MAX(16, size); 50 51 while ((size << sr) < MAX_FULL_PEL_VAL) 52 sr++; 53 54 if (sr) 55 sr--; 56 57 sr += cpi->sf.reduce_first_step_size; 58 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); 59 return sr; 60 } 61 62 static INLINE int mv_cost(const MV *mv, 63 const int *joint_cost, int *comp_cost[2]) { 64 return joint_cost[vp9_get_mv_joint(mv)] + 65 comp_cost[0][mv->row] + comp_cost[1][mv->col]; 66 } 67 68 int vp9_mv_bit_cost(const MV *mv, const MV *ref, 69 const int *mvjcost, int *mvcost[2], int weight) { 70 const MV diff = { mv->row - ref->row, 71 mv->col - ref->col }; 72 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 73 } 74 75 static int mv_err_cost(const MV *mv, const MV *ref, 76 const int *mvjcost, int *mvcost[2], 77 int error_per_bit) { 78 if (mvcost) { 79 const MV diff = { mv->row - ref->row, 80 mv->col - ref->col }; 81 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 82 error_per_bit, 13); 83 } 84 return 0; 85 } 86 87 static int mvsad_err_cost(const MV *mv, const MV *ref, 88 const int *mvjsadcost, int *mvsadcost[2], 89 int error_per_bit) { 90 if (mvsadcost) { 91 const MV diff = { mv->row - ref->row, 92 mv->col - ref->col }; 93 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * 94 error_per_bit, 8); 95 } 96 return 0; 97 } 98 99 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 100 int len; 101 int search_site_count = 0; 102 103 // Generate offsets for 4 search sites per step. 104 x->ss[search_site_count].mv.col = 0; 105 x->ss[search_site_count].mv.row = 0; 106 x->ss[search_site_count].offset = 0; 107 search_site_count++; 108 109 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 110 // Compute offsets for search sites. 111 x->ss[search_site_count].mv.col = 0; 112 x->ss[search_site_count].mv.row = -len; 113 x->ss[search_site_count].offset = -len * stride; 114 search_site_count++; 115 116 // Compute offsets for search sites. 117 x->ss[search_site_count].mv.col = 0; 118 x->ss[search_site_count].mv.row = len; 119 x->ss[search_site_count].offset = len * stride; 120 search_site_count++; 121 122 // Compute offsets for search sites. 123 x->ss[search_site_count].mv.col = -len; 124 x->ss[search_site_count].mv.row = 0; 125 x->ss[search_site_count].offset = -len; 126 search_site_count++; 127 128 // Compute offsets for search sites. 129 x->ss[search_site_count].mv.col = len; 130 x->ss[search_site_count].mv.row = 0; 131 x->ss[search_site_count].offset = len; 132 search_site_count++; 133 } 134 135 x->ss_count = search_site_count; 136 x->searches_per_step = 4; 137 } 138 139 void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { 140 int len, ss_count = 1; 141 142 x->ss[0].mv.col = x->ss[0].mv.row = 0; 143 x->ss[0].offset = 0; 144 145 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 146 // Generate offsets for 8 search sites per step. 147 const MV ss_mvs[8] = { 148 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 149 {-len, -len}, {-len, len}, {len, -len}, {len, len} 150 }; 151 int i; 152 for (i = 0; i < 8; ++i) { 153 search_site *const ss = &x->ss[ss_count++]; 154 ss->mv = ss_mvs[i]; 155 ss->offset = ss->mv.row * stride + ss->mv.col; 156 } 157 } 158 159 x->ss_count = ss_count; 160 x->searches_per_step = 8; 161 } 162 163 /* 164 * To avoid the penalty for crossing cache-line read, preload the reference 165 * area in a small buffer, which is aligned to make sure there won't be crossing 166 * cache-line read while reading from this buffer. This reduced the cpu 167 * cycles spent on reading ref data in sub-pixel filter functions. 168 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 169 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 170 * could reduce the area. 171 */ 172 173 /* estimated cost of a motion vector (r,c) */ 174 #define MVC(r, c) \ 175 (mvcost ? \ 176 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 177 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 178 error_per_bit + 4096) >> 13 : 0) 179 180 181 #define SP(x) (((x) & 7) << 1) // convert motion vector component to offset 182 // for svf calc 183 184 #define IFMVCV(r, c, s, e) \ 185 if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ 186 s \ 187 else \ 188 e; 189 190 /* pointer to predictor base of a motionvector */ 191 #define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset))) 192 193 /* returns subpixel variance error function */ 194 #define DIST(r, c) \ 195 vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse) 196 197 /* checks if (r, c) has better score than previous best */ 198 #define CHECK_BETTER(v, r, c) \ 199 IFMVCV(r, c, { \ 200 thismse = (DIST(r, c)); \ 201 if ((v = MVC(r, c) + thismse) < besterr) { \ 202 besterr = v; \ 203 br = r; \ 204 bc = c; \ 205 *distortion = thismse; \ 206 *sse1 = sse; \ 207 } \ 208 }, \ 209 v = INT_MAX;) 210 211 #define FIRST_LEVEL_CHECKS \ 212 { \ 213 unsigned int left, right, up, down, diag; \ 214 CHECK_BETTER(left, tr, tc - hstep); \ 215 CHECK_BETTER(right, tr, tc + hstep); \ 216 CHECK_BETTER(up, tr - hstep, tc); \ 217 CHECK_BETTER(down, tr + hstep, tc); \ 218 whichdir = (left < right ? 0 : 1) + \ 219 (up < down ? 0 : 2); \ 220 switch (whichdir) { \ 221 case 0: \ 222 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 223 break; \ 224 case 1: \ 225 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 226 break; \ 227 case 2: \ 228 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 229 break; \ 230 case 3: \ 231 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 232 break; \ 233 } \ 234 } 235 236 #define SECOND_LEVEL_CHECKS \ 237 { \ 238 int kr, kc; \ 239 unsigned int second; \ 240 if (tr != br && tc != bc) { \ 241 kr = br - tr; \ 242 kc = bc - tc; \ 243 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 244 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 245 } else if (tr == br && tc != bc) { \ 246 kc = bc - tc; \ 247 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 248 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 249 switch (whichdir) { \ 250 case 0: \ 251 case 1: \ 252 CHECK_BETTER(second, tr + hstep, tc + kc); \ 253 break; \ 254 case 2: \ 255 case 3: \ 256 CHECK_BETTER(second, tr - hstep, tc + kc); \ 257 break; \ 258 } \ 259 } else if (tr != br && tc == bc) { \ 260 kr = br - tr; \ 261 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 262 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 263 switch (whichdir) { \ 264 case 0: \ 265 case 2: \ 266 CHECK_BETTER(second, tr + kr, tc + hstep); \ 267 break; \ 268 case 1: \ 269 case 3: \ 270 CHECK_BETTER(second, tr + kr, tc - hstep); \ 271 break; \ 272 } \ 273 } \ 274 } 275 276 int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, 277 MV *bestmv, const MV *ref_mv, 278 int allow_hp, 279 int error_per_bit, 280 const vp9_variance_fn_ptr_t *vfp, 281 int forced_stop, 282 int iters_per_step, 283 int *mvjcost, int *mvcost[2], 284 int *distortion, 285 unsigned int *sse1) { 286 uint8_t *z = x->plane[0].src.buf; 287 int src_stride = x->plane[0].src.stride; 288 MACROBLOCKD *xd = &x->e_mbd; 289 290 unsigned int besterr = INT_MAX; 291 unsigned int sse; 292 unsigned int whichdir; 293 unsigned int halfiters = iters_per_step; 294 unsigned int quarteriters = iters_per_step; 295 unsigned int eighthiters = iters_per_step; 296 int thismse; 297 298 const int y_stride = xd->plane[0].pre[0].stride; 299 const int offset = bestmv->row * y_stride + bestmv->col; 300 uint8_t *y = xd->plane[0].pre[0].buf + offset; 301 302 int rr = ref_mv->row; 303 int rc = ref_mv->col; 304 int br = bestmv->row * 8; 305 int bc = bestmv->col * 8; 306 int hstep = 4; 307 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 308 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 309 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 310 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 311 312 int tr = br; 313 int tc = bc; 314 315 // central mv 316 bestmv->row <<= 3; 317 bestmv->col <<= 3; 318 319 // calculate central point error 320 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 321 *distortion = besterr; 322 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 323 324 // TODO(jbb): Each subsequent iteration checks at least one point in 325 // common with the last iteration could be 2 if diagonal is selected. 326 while (halfiters--) { 327 // 1/2 pel 328 FIRST_LEVEL_CHECKS; 329 // no reason to check the same one again. 330 if (tr == br && tc == bc) 331 break; 332 tr = br; 333 tc = bc; 334 } 335 336 // TODO(yaowu): Each subsequent iteration checks at least one point in common 337 // with the last iteration could be 2 if diagonal is selected. 338 339 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 340 if (forced_stop != 2) { 341 hstep >>= 1; 342 while (quarteriters--) { 343 FIRST_LEVEL_CHECKS; 344 // no reason to check the same one again. 345 if (tr == br && tc == bc) 346 break; 347 tr = br; 348 tc = bc; 349 } 350 } 351 352 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 353 hstep >>= 1; 354 while (eighthiters--) { 355 FIRST_LEVEL_CHECKS; 356 // no reason to check the same one again. 357 if (tr == br && tc == bc) 358 break; 359 tr = br; 360 tc = bc; 361 } 362 } 363 364 bestmv->row = br; 365 bestmv->col = bc; 366 367 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 368 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 369 return INT_MAX; 370 371 return besterr; 372 } 373 374 int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, 375 MV *bestmv, const MV *ref_mv, 376 int allow_hp, 377 int error_per_bit, 378 const vp9_variance_fn_ptr_t *vfp, 379 int forced_stop, 380 int iters_per_step, 381 int *mvjcost, int *mvcost[2], 382 int *distortion, 383 unsigned int *sse1) { 384 uint8_t *z = x->plane[0].src.buf; 385 const int src_stride = x->plane[0].src.stride; 386 MACROBLOCKD *xd = &x->e_mbd; 387 unsigned int besterr = INT_MAX; 388 unsigned int sse; 389 unsigned int whichdir; 390 int thismse; 391 unsigned int halfiters = iters_per_step; 392 unsigned int quarteriters = iters_per_step; 393 unsigned int eighthiters = iters_per_step; 394 395 const int y_stride = xd->plane[0].pre[0].stride; 396 const int offset = bestmv->row * y_stride + bestmv->col; 397 uint8_t *y = xd->plane[0].pre[0].buf + offset; 398 399 int rr = ref_mv->row; 400 int rc = ref_mv->col; 401 int br = bestmv->row * 8; 402 int bc = bestmv->col * 8; 403 int hstep = 4; 404 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 405 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 406 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 407 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 408 409 int tr = br; 410 int tc = bc; 411 412 // central mv 413 bestmv->row *= 8; 414 bestmv->col *= 8; 415 416 // calculate central point error 417 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 418 *distortion = besterr; 419 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 420 421 // 1/2 pel 422 FIRST_LEVEL_CHECKS; 423 if (halfiters > 1) { 424 SECOND_LEVEL_CHECKS; 425 } 426 tr = br; 427 tc = bc; 428 429 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 430 if (forced_stop != 2) { 431 hstep >>= 1; 432 FIRST_LEVEL_CHECKS; 433 if (quarteriters > 1) { 434 SECOND_LEVEL_CHECKS; 435 } 436 tr = br; 437 tc = bc; 438 } 439 440 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 441 hstep >>= 1; 442 FIRST_LEVEL_CHECKS; 443 if (eighthiters > 1) { 444 SECOND_LEVEL_CHECKS; 445 } 446 tr = br; 447 tc = bc; 448 } 449 450 bestmv->row = br; 451 bestmv->col = bc; 452 453 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 454 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 455 return INT_MAX; 456 457 return besterr; 458 } 459 460 #undef DIST 461 /* returns subpixel variance error function */ 462 #define DIST(r, c) \ 463 vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \ 464 z, src_stride, &sse, second_pred) 465 466 int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, 467 MV *bestmv, const MV *ref_mv, 468 int allow_hp, 469 int error_per_bit, 470 const vp9_variance_fn_ptr_t *vfp, 471 int forced_stop, 472 int iters_per_step, 473 int *mvjcost, int *mvcost[2], 474 int *distortion, 475 unsigned int *sse1, 476 const uint8_t *second_pred, 477 int w, int h) { 478 uint8_t *const z = x->plane[0].src.buf; 479 const int src_stride = x->plane[0].src.stride; 480 MACROBLOCKD *const xd = &x->e_mbd; 481 482 unsigned int besterr = INT_MAX; 483 unsigned int sse; 484 unsigned int whichdir; 485 unsigned int halfiters = iters_per_step; 486 unsigned int quarteriters = iters_per_step; 487 unsigned int eighthiters = iters_per_step; 488 int thismse; 489 490 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 491 const int y_stride = xd->plane[0].pre[0].stride; 492 const int offset = bestmv->row * y_stride + bestmv->col; 493 uint8_t *const y = xd->plane[0].pre[0].buf + offset; 494 495 int rr = ref_mv->row; 496 int rc = ref_mv->col; 497 int br = bestmv->row * 8; 498 int bc = bestmv->col * 8; 499 int hstep = 4; 500 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 501 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 502 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 503 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 504 505 int tr = br; 506 int tc = bc; 507 508 // central mv 509 bestmv->row *= 8; 510 bestmv->col *= 8; 511 512 // calculate central point error 513 // TODO(yunqingwang): central pointer error was already calculated in full- 514 // pixel search, and can be passed in this function. 515 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 516 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 517 *distortion = besterr; 518 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 519 520 // Each subsequent iteration checks at least one point in 521 // common with the last iteration could be 2 ( if diag selected) 522 while (halfiters--) { 523 // 1/2 pel 524 FIRST_LEVEL_CHECKS; 525 // no reason to check the same one again. 526 if (tr == br && tc == bc) 527 break; 528 tr = br; 529 tc = bc; 530 } 531 532 // Each subsequent iteration checks at least one point in common with 533 // the last iteration could be 2 ( if diag selected) 1/4 pel 534 535 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 536 if (forced_stop != 2) { 537 hstep >>= 1; 538 while (quarteriters--) { 539 FIRST_LEVEL_CHECKS; 540 // no reason to check the same one again. 541 if (tr == br && tc == bc) 542 break; 543 tr = br; 544 tc = bc; 545 } 546 } 547 548 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 549 hstep >>= 1; 550 while (eighthiters--) { 551 FIRST_LEVEL_CHECKS; 552 // no reason to check the same one again. 553 if (tr == br && tc == bc) 554 break; 555 tr = br; 556 tc = bc; 557 } 558 } 559 bestmv->row = br; 560 bestmv->col = bc; 561 562 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 563 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 564 return INT_MAX; 565 566 return besterr; 567 } 568 569 int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, 570 MV *bestmv, const MV *ref_mv, 571 int allow_hp, 572 int error_per_bit, 573 const vp9_variance_fn_ptr_t *vfp, 574 int forced_stop, 575 int iters_per_step, 576 int *mvjcost, int *mvcost[2], 577 int *distortion, 578 unsigned int *sse1, 579 const uint8_t *second_pred, 580 int w, int h) { 581 uint8_t *z = x->plane[0].src.buf; 582 const int src_stride = x->plane[0].src.stride; 583 MACROBLOCKD *xd = &x->e_mbd; 584 unsigned int besterr = INT_MAX; 585 unsigned int sse; 586 unsigned int whichdir; 587 int thismse; 588 unsigned int halfiters = iters_per_step; 589 unsigned int quarteriters = iters_per_step; 590 unsigned int eighthiters = iters_per_step; 591 592 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 593 const int y_stride = xd->plane[0].pre[0].stride; 594 const int offset = bestmv->row * y_stride + bestmv->col; 595 uint8_t *y = xd->plane[0].pre[0].buf + offset; 596 597 int rr = ref_mv->row; 598 int rc = ref_mv->col; 599 int br = bestmv->row * 8; 600 int bc = bestmv->col * 8; 601 int hstep = 4; 602 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 603 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 604 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 605 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 606 607 int tr = br; 608 int tc = bc; 609 610 // central mv 611 bestmv->row *= 8; 612 bestmv->col *= 8; 613 614 // calculate central point error 615 // TODO(yunqingwang): central pointer error was already calculated in full- 616 // pixel search, and can be passed in this function. 617 comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 618 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 619 *distortion = besterr; 620 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 621 622 // Each subsequent iteration checks at least one point in 623 // common with the last iteration could be 2 ( if diag selected) 624 // 1/2 pel 625 FIRST_LEVEL_CHECKS; 626 if (halfiters > 1) { 627 SECOND_LEVEL_CHECKS; 628 } 629 tr = br; 630 tc = bc; 631 632 // Each subsequent iteration checks at least one point in common with 633 // the last iteration could be 2 ( if diag selected) 1/4 pel 634 635 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 636 if (forced_stop != 2) { 637 hstep >>= 1; 638 FIRST_LEVEL_CHECKS; 639 if (quarteriters > 1) { 640 SECOND_LEVEL_CHECKS; 641 } 642 tr = br; 643 tc = bc; 644 } 645 646 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 647 hstep >>= 1; 648 FIRST_LEVEL_CHECKS; 649 if (eighthiters > 1) { 650 SECOND_LEVEL_CHECKS; 651 } 652 tr = br; 653 tc = bc; 654 } 655 bestmv->row = br; 656 bestmv->col = bc; 657 658 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 659 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 660 return INT_MAX; 661 662 return besterr; 663 } 664 665 #undef MVC 666 #undef PRE 667 #undef DIST 668 #undef IFMVCV 669 #undef CHECK_BETTER 670 #undef SP 671 672 #define CHECK_BOUNDS(range) \ 673 {\ 674 all_in = 1;\ 675 all_in &= ((br-range) >= x->mv_row_min);\ 676 all_in &= ((br+range) <= x->mv_row_max);\ 677 all_in &= ((bc-range) >= x->mv_col_min);\ 678 all_in &= ((bc+range) <= x->mv_col_max);\ 679 } 680 681 #define CHECK_POINT \ 682 {\ 683 if (this_mv.col < x->mv_col_min) continue;\ 684 if (this_mv.col > x->mv_col_max) continue;\ 685 if (this_mv.row < x->mv_row_min) continue;\ 686 if (this_mv.row > x->mv_row_max) continue;\ 687 } 688 689 #define CHECK_BETTER \ 690 {\ 691 if (thissad < bestsad)\ 692 {\ 693 if (use_mvcost) \ 694 thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \ 695 mvjsadcost, mvsadcost, \ 696 sad_per_bit);\ 697 if (thissad < bestsad)\ 698 {\ 699 bestsad = thissad;\ 700 best_site = i;\ 701 }\ 702 }\ 703 } 704 705 #define get_next_chkpts(list, i, n) \ 706 list[0] = ((i) == 0 ? (n) - 1 : (i) - 1); \ 707 list[1] = (i); \ 708 list[2] = ((i) == (n) - 1 ? 0 : (i) + 1); 709 710 #define MAX_PATTERN_SCALES 11 711 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 712 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 713 714 // Generic pattern search function that searches over multiple scales. 715 // Each scale can have a different number of candidates and shape of 716 // candidates as indicated in the num_candidates and candidates arrays 717 // passed into this function 718 static int vp9_pattern_search(MACROBLOCK *x, 719 MV *ref_mv, 720 int search_param, 721 int sad_per_bit, 722 int do_init_search, 723 int do_refine, 724 const vp9_variance_fn_ptr_t *vfp, 725 int use_mvcost, 726 const MV *center_mv, MV *best_mv, 727 const int num_candidates[MAX_PATTERN_SCALES], 728 const MV candidates[MAX_PATTERN_SCALES] 729 [MAX_PATTERN_CANDIDATES]) { 730 const MACROBLOCKD* const xd = &x->e_mbd; 731 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 732 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 733 }; 734 int i, j, s, t; 735 uint8_t *what = x->plane[0].src.buf; 736 int what_stride = x->plane[0].src.stride; 737 int in_what_stride = xd->plane[0].pre[0].stride; 738 int br, bc; 739 MV this_mv; 740 int bestsad = INT_MAX; 741 int thissad; 742 uint8_t *base_offset; 743 uint8_t *this_offset; 744 int k = -1; 745 int all_in; 746 int best_site = -1; 747 int_mv fcenter_mv; 748 int best_init_s = search_param_to_steps[search_param]; 749 int *mvjsadcost = x->nmvjointsadcost; 750 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 751 752 fcenter_mv.as_mv.row = center_mv->row >> 3; 753 fcenter_mv.as_mv.col = center_mv->col >> 3; 754 755 // adjust ref_mv to make sure it is within MV range 756 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 757 br = ref_mv->row; 758 bc = ref_mv->col; 759 760 // Work out the start point for the search 761 base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); 762 this_offset = base_offset + (br * in_what_stride) + bc; 763 this_mv.row = br; 764 this_mv.col = bc; 765 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) 766 + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, 767 mvjsadcost, mvsadcost, sad_per_bit); 768 769 // Search all possible scales upto the search param around the center point 770 // pick the scale of the point that is best as the starting scale of 771 // further steps around it. 772 if (do_init_search) { 773 s = best_init_s; 774 best_init_s = -1; 775 for (t = 0; t <= s; ++t) { 776 best_site = -1; 777 CHECK_BOUNDS((1 << t)) 778 if (all_in) { 779 for (i = 0; i < num_candidates[t]; i++) { 780 this_mv.row = br + candidates[t][i].row; 781 this_mv.col = bc + candidates[t][i].col; 782 this_offset = base_offset + (this_mv.row * in_what_stride) + 783 this_mv.col; 784 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 785 bestsad); 786 CHECK_BETTER 787 } 788 } else { 789 for (i = 0; i < num_candidates[t]; i++) { 790 this_mv.row = br + candidates[t][i].row; 791 this_mv.col = bc + candidates[t][i].col; 792 CHECK_POINT 793 this_offset = base_offset + (this_mv.row * in_what_stride) + 794 this_mv.col; 795 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 796 bestsad); 797 CHECK_BETTER 798 } 799 } 800 if (best_site == -1) { 801 continue; 802 } else { 803 best_init_s = t; 804 k = best_site; 805 } 806 } 807 if (best_init_s != -1) { 808 br += candidates[best_init_s][k].row; 809 bc += candidates[best_init_s][k].col; 810 } 811 } 812 813 // If the center point is still the best, just skip this and move to 814 // the refinement step. 815 if (best_init_s != -1) { 816 s = best_init_s; 817 best_site = -1; 818 do { 819 // No need to search all 6 points the 1st time if initial search was used 820 if (!do_init_search || s != best_init_s) { 821 CHECK_BOUNDS((1 << s)) 822 if (all_in) { 823 for (i = 0; i < num_candidates[s]; i++) { 824 this_mv.row = br + candidates[s][i].row; 825 this_mv.col = bc + candidates[s][i].col; 826 this_offset = base_offset + (this_mv.row * in_what_stride) + 827 this_mv.col; 828 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 829 bestsad); 830 CHECK_BETTER 831 } 832 } else { 833 for (i = 0; i < num_candidates[s]; i++) { 834 this_mv.row = br + candidates[s][i].row; 835 this_mv.col = bc + candidates[s][i].col; 836 CHECK_POINT 837 this_offset = base_offset + (this_mv.row * in_what_stride) + 838 this_mv.col; 839 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 840 bestsad); 841 CHECK_BETTER 842 } 843 } 844 845 if (best_site == -1) { 846 continue; 847 } else { 848 br += candidates[s][best_site].row; 849 bc += candidates[s][best_site].col; 850 k = best_site; 851 } 852 } 853 854 do { 855 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 856 best_site = -1; 857 CHECK_BOUNDS((1 << s)) 858 859 get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); 860 if (all_in) { 861 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 862 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; 863 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; 864 this_offset = base_offset + (this_mv.row * (in_what_stride)) + 865 this_mv.col; 866 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 867 bestsad); 868 CHECK_BETTER 869 } 870 } else { 871 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 872 this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; 873 this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; 874 CHECK_POINT 875 this_offset = base_offset + (this_mv.row * (in_what_stride)) + 876 this_mv.col; 877 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 878 bestsad); 879 CHECK_BETTER 880 } 881 } 882 883 if (best_site != -1) { 884 k = next_chkpts_indices[best_site]; 885 br += candidates[s][k].row; 886 bc += candidates[s][k].col; 887 } 888 } while (best_site != -1); 889 } while (s--); 890 } 891 892 // Check 4 1-away neighbors if do_refine is true. 893 // For most well-designed schemes do_refine will not be necessary. 894 if (do_refine) { 895 static const MV neighbors[4] = { 896 {0, -1}, { -1, 0}, {1, 0}, {0, 1}, 897 }; 898 for (j = 0; j < 16; j++) { 899 best_site = -1; 900 CHECK_BOUNDS(1) 901 if (all_in) { 902 for (i = 0; i < 4; i++) { 903 this_mv.row = br + neighbors[i].row; 904 this_mv.col = bc + neighbors[i].col; 905 this_offset = base_offset + (this_mv.row * (in_what_stride)) + 906 this_mv.col; 907 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 908 bestsad); 909 CHECK_BETTER 910 } 911 } else { 912 for (i = 0; i < 4; i++) { 913 this_mv.row = br + neighbors[i].row; 914 this_mv.col = bc + neighbors[i].col; 915 CHECK_POINT 916 this_offset = base_offset + (this_mv.row * (in_what_stride)) + 917 this_mv.col; 918 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 919 bestsad); 920 CHECK_BETTER 921 } 922 } 923 924 if (best_site == -1) { 925 break; 926 } else { 927 br += neighbors[best_site].row; 928 bc += neighbors[best_site].col; 929 } 930 } 931 } 932 933 best_mv->row = br; 934 best_mv->col = bc; 935 936 this_offset = base_offset + (best_mv->row * in_what_stride) + 937 best_mv->col; 938 this_mv.row = best_mv->row * 8; 939 this_mv.col = best_mv->col * 8; 940 if (bestsad == INT_MAX) 941 return INT_MAX; 942 943 return vfp->vf(what, what_stride, this_offset, in_what_stride, 944 (unsigned int *)&bestsad) + 945 use_mvcost ? mv_err_cost(&this_mv, center_mv, 946 x->nmvjointcost, x->mvcost, x->errorperbit) 947 : 0; 948 } 949 950 951 int vp9_hex_search(MACROBLOCK *x, 952 MV *ref_mv, 953 int search_param, 954 int sad_per_bit, 955 int do_init_search, 956 const vp9_variance_fn_ptr_t *vfp, 957 int use_mvcost, 958 const MV *center_mv, MV *best_mv) { 959 // First scale has 8-closest points, the rest have 6 points in hex shape 960 // at increasing scales 961 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 962 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 963 }; 964 // Note that the largest candidate step at each scale is 2^scale 965 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 966 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 967 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 968 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 969 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 970 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 971 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 972 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 973 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 974 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 975 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 976 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 977 { -1024, 0}}, 978 }; 979 return 980 vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 981 do_init_search, 0, vfp, use_mvcost, 982 center_mv, best_mv, 983 hex_num_candidates, hex_candidates); 984 } 985 986 int vp9_bigdia_search(MACROBLOCK *x, 987 MV *ref_mv, 988 int search_param, 989 int sad_per_bit, 990 int do_init_search, 991 const vp9_variance_fn_ptr_t *vfp, 992 int use_mvcost, 993 const MV *center_mv, 994 MV *best_mv) { 995 // First scale has 4-closest points, the rest have 8 points in diamond 996 // shape at increasing scales 997 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 998 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 999 }; 1000 // Note that the largest candidate step at each scale is 2^scale 1001 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 1002 [MAX_PATTERN_CANDIDATES] = { 1003 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 1004 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 1005 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 1006 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 1007 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 1008 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 1009 {-16, 16}, {-32, 0}}, 1010 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 1011 {-32, 32}, {-64, 0}}, 1012 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 1013 {-64, 64}, {-128, 0}}, 1014 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 1015 {-128, 128}, {-256, 0}}, 1016 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 1017 {-256, 256}, {-512, 0}}, 1018 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 1019 {-512, 512}, {-1024, 0}}, 1020 }; 1021 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1022 do_init_search, 0, vfp, use_mvcost, 1023 center_mv, best_mv, 1024 bigdia_num_candidates, bigdia_candidates); 1025 } 1026 1027 int vp9_square_search(MACROBLOCK *x, 1028 MV *ref_mv, 1029 int search_param, 1030 int sad_per_bit, 1031 int do_init_search, 1032 const vp9_variance_fn_ptr_t *vfp, 1033 int use_mvcost, 1034 const MV *center_mv, 1035 MV *best_mv) { 1036 // All scales have 8 closest points in square shape 1037 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1038 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1039 }; 1040 // Note that the largest candidate step at each scale is 2^scale 1041 static const MV square_candidates[MAX_PATTERN_SCALES] 1042 [MAX_PATTERN_CANDIDATES] = { 1043 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 1044 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 1045 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 1046 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 1047 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 1048 {-16, 16}, {-16, 0}}, 1049 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 1050 {-32, 32}, {-32, 0}}, 1051 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 1052 {-64, 64}, {-64, 0}}, 1053 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 1054 {-128, 128}, {-128, 0}}, 1055 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 1056 {-256, 256}, {-256, 0}}, 1057 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 1058 {-512, 512}, {-512, 0}}, 1059 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 1060 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 1061 }; 1062 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1063 do_init_search, 0, vfp, use_mvcost, 1064 center_mv, best_mv, 1065 square_num_candidates, square_candidates); 1066 }; 1067 1068 #undef CHECK_BOUNDS 1069 #undef CHECK_POINT 1070 #undef CHECK_BETTER 1071 1072 int vp9_diamond_search_sad_c(MACROBLOCK *x, 1073 int_mv *ref_mv, int_mv *best_mv, 1074 int search_param, int sad_per_bit, int *num00, 1075 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1076 int *mvcost[2], int_mv *center_mv) { 1077 int i, j, step; 1078 1079 const MACROBLOCKD* const xd = &x->e_mbd; 1080 uint8_t *what = x->plane[0].src.buf; 1081 int what_stride = x->plane[0].src.stride; 1082 uint8_t *in_what; 1083 int in_what_stride = xd->plane[0].pre[0].stride; 1084 uint8_t *best_address; 1085 1086 int tot_steps; 1087 int_mv this_mv; 1088 1089 int bestsad = INT_MAX; 1090 int best_site = 0; 1091 int last_site = 0; 1092 1093 int ref_row, ref_col; 1094 int this_row_offset, this_col_offset; 1095 search_site *ss; 1096 1097 uint8_t *check_here; 1098 int thissad; 1099 int_mv fcenter_mv; 1100 1101 int *mvjsadcost = x->nmvjointsadcost; 1102 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1103 1104 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1105 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1106 1107 clamp_mv(&ref_mv->as_mv, 1108 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1109 ref_row = ref_mv->as_mv.row; 1110 ref_col = ref_mv->as_mv.col; 1111 *num00 = 0; 1112 best_mv->as_mv.row = ref_row; 1113 best_mv->as_mv.col = ref_col; 1114 1115 // Work out the start point for the search 1116 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1117 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1118 best_address = in_what; 1119 1120 // Check the starting position 1121 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1122 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1123 mvjsadcost, mvsadcost, sad_per_bit); 1124 1125 // search_param determines the length of the initial step and hence the number 1126 // of iterations 1127 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = 1128 // (MAX_FIRST_STEP/4) pel... etc. 1129 ss = &x->ss[search_param * x->searches_per_step]; 1130 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1131 1132 i = 1; 1133 1134 for (step = 0; step < tot_steps; step++) { 1135 for (j = 0; j < x->searches_per_step; j++) { 1136 // Trap illegal vectors 1137 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1138 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1139 1140 if ((this_col_offset > x->mv_col_min) && 1141 (this_col_offset < x->mv_col_max) && 1142 (this_row_offset > x->mv_row_min) && 1143 (this_row_offset < x->mv_row_max)) { 1144 check_here = ss[i].offset + best_address; 1145 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1146 bestsad); 1147 1148 if (thissad < bestsad) { 1149 this_mv.as_mv.row = this_row_offset; 1150 this_mv.as_mv.col = this_col_offset; 1151 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1152 mvjsadcost, mvsadcost, sad_per_bit); 1153 1154 if (thissad < bestsad) { 1155 bestsad = thissad; 1156 best_site = i; 1157 } 1158 } 1159 } 1160 1161 i++; 1162 } 1163 1164 if (best_site != last_site) { 1165 best_mv->as_mv.row += ss[best_site].mv.row; 1166 best_mv->as_mv.col += ss[best_site].mv.col; 1167 best_address += ss[best_site].offset; 1168 last_site = best_site; 1169 #if defined(NEW_DIAMOND_SEARCH) 1170 while (1) { 1171 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1172 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1173 if ((this_col_offset > x->mv_col_min) && 1174 (this_col_offset < x->mv_col_max) && 1175 (this_row_offset > x->mv_row_min) && 1176 (this_row_offset < x->mv_row_max)) { 1177 check_here = ss[best_site].offset + best_address; 1178 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1179 bestsad); 1180 if (thissad < bestsad) { 1181 this_mv.as_mv.row = this_row_offset; 1182 this_mv.as_mv.col = this_col_offset; 1183 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1184 mvjsadcost, mvsadcost, sad_per_bit); 1185 if (thissad < bestsad) { 1186 bestsad = thissad; 1187 best_mv->as_mv.row += ss[best_site].mv.row; 1188 best_mv->as_mv.col += ss[best_site].mv.col; 1189 best_address += ss[best_site].offset; 1190 continue; 1191 } 1192 } 1193 } 1194 break; 1195 }; 1196 #endif 1197 } else if (best_address == in_what) { 1198 (*num00)++; 1199 } 1200 } 1201 1202 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1203 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1204 1205 if (bestsad == INT_MAX) 1206 return INT_MAX; 1207 1208 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1209 (unsigned int *)(&thissad)) + 1210 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1211 mvjcost, mvcost, x->errorperbit); 1212 } 1213 1214 int vp9_diamond_search_sadx4(MACROBLOCK *x, 1215 int_mv *ref_mv, int_mv *best_mv, int search_param, 1216 int sad_per_bit, int *num00, 1217 vp9_variance_fn_ptr_t *fn_ptr, 1218 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1219 int i, j, step; 1220 1221 const MACROBLOCKD* const xd = &x->e_mbd; 1222 uint8_t *what = x->plane[0].src.buf; 1223 int what_stride = x->plane[0].src.stride; 1224 uint8_t *in_what; 1225 int in_what_stride = xd->plane[0].pre[0].stride; 1226 uint8_t *best_address; 1227 1228 int tot_steps; 1229 int_mv this_mv; 1230 1231 unsigned int bestsad = INT_MAX; 1232 int best_site = 0; 1233 int last_site = 0; 1234 1235 int ref_row; 1236 int ref_col; 1237 int this_row_offset; 1238 int this_col_offset; 1239 search_site *ss; 1240 1241 uint8_t *check_here; 1242 unsigned int thissad; 1243 int_mv fcenter_mv; 1244 1245 int *mvjsadcost = x->nmvjointsadcost; 1246 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1247 1248 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1249 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1250 1251 clamp_mv(&ref_mv->as_mv, 1252 x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1253 ref_row = ref_mv->as_mv.row; 1254 ref_col = ref_mv->as_mv.col; 1255 *num00 = 0; 1256 best_mv->as_mv.row = ref_row; 1257 best_mv->as_mv.col = ref_col; 1258 1259 // Work out the start point for the search 1260 in_what = (uint8_t *)(xd->plane[0].pre[0].buf + 1261 (ref_row * (xd->plane[0].pre[0].stride)) + ref_col); 1262 best_address = in_what; 1263 1264 // Check the starting position 1265 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1266 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1267 mvjsadcost, mvsadcost, sad_per_bit); 1268 1269 // search_param determines the length of the initial step and hence the number 1270 // of iterations. 1271 // 0 = initial step (MAX_FIRST_STEP) pel 1272 // 1 = (MAX_FIRST_STEP/2) pel, 1273 // 2 = (MAX_FIRST_STEP/4) pel... 1274 ss = &x->ss[search_param * x->searches_per_step]; 1275 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1276 1277 i = 1; 1278 1279 for (step = 0; step < tot_steps; step++) { 1280 int all_in = 1, t; 1281 1282 // All_in is true if every one of the points we are checking are within 1283 // the bounds of the image. 1284 all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); 1285 all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); 1286 all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); 1287 all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); 1288 1289 // If all the pixels are within the bounds we don't check whether the 1290 // search point is valid in this loop, otherwise we check each point 1291 // for validity.. 1292 if (all_in) { 1293 unsigned int sad_array[4]; 1294 1295 for (j = 0; j < x->searches_per_step; j += 4) { 1296 unsigned char const *block_offset[4]; 1297 1298 for (t = 0; t < 4; t++) 1299 block_offset[t] = ss[i + t].offset + best_address; 1300 1301 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1302 sad_array); 1303 1304 for (t = 0; t < 4; t++, i++) { 1305 if (sad_array[t] < bestsad) { 1306 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1307 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1308 sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1309 mvjsadcost, mvsadcost, sad_per_bit); 1310 1311 if (sad_array[t] < bestsad) { 1312 bestsad = sad_array[t]; 1313 best_site = i; 1314 } 1315 } 1316 } 1317 } 1318 } else { 1319 for (j = 0; j < x->searches_per_step; j++) { 1320 // Trap illegal vectors 1321 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1322 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1323 1324 if ((this_col_offset > x->mv_col_min) && 1325 (this_col_offset < x->mv_col_max) && 1326 (this_row_offset > x->mv_row_min) && 1327 (this_row_offset < x->mv_row_max)) { 1328 check_here = ss[i].offset + best_address; 1329 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1330 bestsad); 1331 1332 if (thissad < bestsad) { 1333 this_mv.as_mv.row = this_row_offset; 1334 this_mv.as_mv.col = this_col_offset; 1335 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1336 mvjsadcost, mvsadcost, sad_per_bit); 1337 1338 if (thissad < bestsad) { 1339 bestsad = thissad; 1340 best_site = i; 1341 } 1342 } 1343 } 1344 i++; 1345 } 1346 } 1347 if (best_site != last_site) { 1348 best_mv->as_mv.row += ss[best_site].mv.row; 1349 best_mv->as_mv.col += ss[best_site].mv.col; 1350 best_address += ss[best_site].offset; 1351 last_site = best_site; 1352 #if defined(NEW_DIAMOND_SEARCH) 1353 while (1) { 1354 this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row; 1355 this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col; 1356 if ((this_col_offset > x->mv_col_min) && 1357 (this_col_offset < x->mv_col_max) && 1358 (this_row_offset > x->mv_row_min) && 1359 (this_row_offset < x->mv_row_max)) { 1360 check_here = ss[best_site].offset + best_address; 1361 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1362 bestsad); 1363 if (thissad < bestsad) { 1364 this_mv.as_mv.row = this_row_offset; 1365 this_mv.as_mv.col = this_col_offset; 1366 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1367 mvjsadcost, mvsadcost, sad_per_bit); 1368 if (thissad < bestsad) { 1369 bestsad = thissad; 1370 best_mv->as_mv.row += ss[best_site].mv.row; 1371 best_mv->as_mv.col += ss[best_site].mv.col; 1372 best_address += ss[best_site].offset; 1373 continue; 1374 } 1375 } 1376 } 1377 break; 1378 }; 1379 #endif 1380 } else if (best_address == in_what) { 1381 (*num00)++; 1382 } 1383 } 1384 1385 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1386 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1387 1388 if (bestsad == INT_MAX) 1389 return INT_MAX; 1390 1391 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1392 (unsigned int *)(&thissad)) + 1393 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1394 mvjcost, mvcost, x->errorperbit); 1395 } 1396 1397 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1398 point as the best match, we will do a final 1-away diamond 1399 refining search */ 1400 1401 int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, 1402 int_mv *mvp_full, int step_param, 1403 int sadpb, int further_steps, 1404 int do_refine, vp9_variance_fn_ptr_t *fn_ptr, 1405 int_mv *ref_mv, int_mv *dst_mv) { 1406 int_mv temp_mv; 1407 int thissme, n, num00; 1408 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1409 step_param, sadpb, &num00, 1410 fn_ptr, x->nmvjointcost, 1411 x->mvcost, ref_mv); 1412 dst_mv->as_int = temp_mv.as_int; 1413 1414 n = num00; 1415 num00 = 0; 1416 1417 /* If there won't be more n-step search, check to see if refining search is 1418 * needed. */ 1419 if (n > further_steps) 1420 do_refine = 0; 1421 1422 while (n < further_steps) { 1423 n++; 1424 1425 if (num00) { 1426 num00--; 1427 } else { 1428 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1429 step_param + n, sadpb, &num00, 1430 fn_ptr, x->nmvjointcost, x->mvcost, 1431 ref_mv); 1432 1433 /* check to see if refining search is needed. */ 1434 if (num00 > (further_steps - n)) 1435 do_refine = 0; 1436 1437 if (thissme < bestsme) { 1438 bestsme = thissme; 1439 dst_mv->as_int = temp_mv.as_int; 1440 } 1441 } 1442 } 1443 1444 /* final 1-away diamond refining search */ 1445 if (do_refine == 1) { 1446 int search_range = 8; 1447 int_mv best_mv; 1448 best_mv.as_int = dst_mv->as_int; 1449 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1450 fn_ptr, x->nmvjointcost, x->mvcost, 1451 ref_mv); 1452 1453 if (thissme < bestsme) { 1454 bestsme = thissme; 1455 dst_mv->as_int = best_mv.as_int; 1456 } 1457 } 1458 return bestsme; 1459 } 1460 1461 int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, 1462 int sad_per_bit, int distance, 1463 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1464 int *mvcost[2], 1465 int_mv *center_mv, int n) { 1466 const MACROBLOCKD* const xd = &x->e_mbd; 1467 uint8_t *what = x->plane[0].src.buf; 1468 int what_stride = x->plane[0].src.stride; 1469 uint8_t *in_what; 1470 int in_what_stride = xd->plane[0].pre[0].stride; 1471 int mv_stride = xd->plane[0].pre[0].stride; 1472 uint8_t *bestaddress; 1473 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1474 int_mv this_mv; 1475 int bestsad = INT_MAX; 1476 int r, c; 1477 1478 uint8_t *check_here; 1479 int thissad; 1480 1481 int ref_row = ref_mv->as_mv.row; 1482 int ref_col = ref_mv->as_mv.col; 1483 1484 int row_min = ref_row - distance; 1485 int row_max = ref_row + distance; 1486 int col_min = ref_col - distance; 1487 int col_max = ref_col + distance; 1488 int_mv fcenter_mv; 1489 1490 int *mvjsadcost = x->nmvjointsadcost; 1491 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1492 1493 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1494 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1495 1496 // Work out the mid point for the search 1497 in_what = xd->plane[0].pre[0].buf; 1498 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1499 1500 best_mv->as_mv.row = ref_row; 1501 best_mv->as_mv.col = ref_col; 1502 1503 // Baseline value at the centre 1504 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1505 in_what_stride, 0x7fffffff) 1506 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1507 mvjsadcost, mvsadcost, sad_per_bit); 1508 1509 // Apply further limits to prevent us looking using vectors that stretch 1510 // beyond the UMV border 1511 col_min = MAX(col_min, x->mv_col_min); 1512 col_max = MIN(col_max, x->mv_col_max); 1513 row_min = MAX(row_min, x->mv_row_min); 1514 row_max = MIN(row_max, x->mv_row_max); 1515 1516 for (r = row_min; r < row_max; r++) { 1517 this_mv.as_mv.row = r; 1518 check_here = r * mv_stride + in_what + col_min; 1519 1520 for (c = col_min; c < col_max; c++) { 1521 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1522 bestsad); 1523 1524 this_mv.as_mv.col = c; 1525 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1526 mvjsadcost, mvsadcost, sad_per_bit); 1527 1528 if (thissad < bestsad) { 1529 bestsad = thissad; 1530 best_mv->as_mv.row = r; 1531 best_mv->as_mv.col = c; 1532 bestaddress = check_here; 1533 } 1534 1535 check_here++; 1536 } 1537 } 1538 1539 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1540 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1541 1542 if (bestsad < INT_MAX) 1543 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1544 (unsigned int *)(&thissad)) + 1545 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1546 mvjcost, mvcost, x->errorperbit); 1547 else 1548 return INT_MAX; 1549 } 1550 1551 int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, 1552 int sad_per_bit, int distance, 1553 vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, 1554 int *mvcost[2], int_mv *center_mv, int n) { 1555 const MACROBLOCKD* const xd = &x->e_mbd; 1556 uint8_t *what = x->plane[0].src.buf; 1557 int what_stride = x->plane[0].src.stride; 1558 uint8_t *in_what; 1559 int in_what_stride = xd->plane[0].pre[0].stride; 1560 int mv_stride = xd->plane[0].pre[0].stride; 1561 uint8_t *bestaddress; 1562 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1563 int_mv this_mv; 1564 unsigned int bestsad = INT_MAX; 1565 int r, c; 1566 1567 uint8_t *check_here; 1568 unsigned int thissad; 1569 1570 int ref_row = ref_mv->as_mv.row; 1571 int ref_col = ref_mv->as_mv.col; 1572 1573 int row_min = ref_row - distance; 1574 int row_max = ref_row + distance; 1575 int col_min = ref_col - distance; 1576 int col_max = ref_col + distance; 1577 1578 unsigned int sad_array[3]; 1579 int_mv fcenter_mv; 1580 1581 int *mvjsadcost = x->nmvjointsadcost; 1582 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1583 1584 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1585 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1586 1587 // Work out the mid point for the search 1588 in_what = xd->plane[0].pre[0].buf; 1589 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1590 1591 best_mv->as_mv.row = ref_row; 1592 best_mv->as_mv.col = ref_col; 1593 1594 // Baseline value at the centre 1595 bestsad = fn_ptr->sdf(what, what_stride, 1596 bestaddress, in_what_stride, 0x7fffffff) 1597 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1598 mvjsadcost, mvsadcost, sad_per_bit); 1599 1600 // Apply further limits to prevent us looking using vectors that stretch 1601 // beyond the UMV border 1602 col_min = MAX(col_min, x->mv_col_min); 1603 col_max = MIN(col_max, x->mv_col_max); 1604 row_min = MAX(row_min, x->mv_row_min); 1605 row_max = MIN(row_max, x->mv_row_max); 1606 1607 for (r = row_min; r < row_max; r++) { 1608 this_mv.as_mv.row = r; 1609 check_here = r * mv_stride + in_what + col_min; 1610 c = col_min; 1611 1612 while ((c + 2) < col_max) { 1613 int i; 1614 1615 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1616 1617 for (i = 0; i < 3; i++) { 1618 thissad = sad_array[i]; 1619 1620 if (thissad < bestsad) { 1621 this_mv.as_mv.col = c; 1622 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1623 mvjsadcost, mvsadcost, sad_per_bit); 1624 1625 if (thissad < bestsad) { 1626 bestsad = thissad; 1627 best_mv->as_mv.row = r; 1628 best_mv->as_mv.col = c; 1629 bestaddress = check_here; 1630 } 1631 } 1632 1633 check_here++; 1634 c++; 1635 } 1636 } 1637 1638 while (c < col_max) { 1639 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1640 bestsad); 1641 1642 if (thissad < bestsad) { 1643 this_mv.as_mv.col = c; 1644 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1645 mvjsadcost, mvsadcost, sad_per_bit); 1646 1647 if (thissad < bestsad) { 1648 bestsad = thissad; 1649 best_mv->as_mv.row = r; 1650 best_mv->as_mv.col = c; 1651 bestaddress = check_here; 1652 } 1653 } 1654 1655 check_here++; 1656 c++; 1657 } 1658 } 1659 1660 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1661 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1662 1663 if (bestsad < INT_MAX) 1664 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1665 (unsigned int *)(&thissad)) + 1666 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1667 mvjcost, mvcost, x->errorperbit); 1668 else 1669 return INT_MAX; 1670 } 1671 1672 int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, 1673 int sad_per_bit, int distance, 1674 vp9_variance_fn_ptr_t *fn_ptr, 1675 int *mvjcost, int *mvcost[2], 1676 int_mv *center_mv, int n) { 1677 const MACROBLOCKD* const xd = &x->e_mbd; 1678 uint8_t *what = x->plane[0].src.buf; 1679 int what_stride = x->plane[0].src.stride; 1680 uint8_t *in_what; 1681 int in_what_stride = xd->plane[0].pre[0].stride; 1682 int mv_stride = xd->plane[0].pre[0].stride; 1683 uint8_t *bestaddress; 1684 int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0]; 1685 int_mv this_mv; 1686 unsigned int bestsad = INT_MAX; 1687 int r, c; 1688 1689 uint8_t *check_here; 1690 unsigned int thissad; 1691 1692 int ref_row = ref_mv->as_mv.row; 1693 int ref_col = ref_mv->as_mv.col; 1694 1695 int row_min = ref_row - distance; 1696 int row_max = ref_row + distance; 1697 int col_min = ref_col - distance; 1698 int col_max = ref_col + distance; 1699 1700 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); 1701 unsigned int sad_array[3]; 1702 int_mv fcenter_mv; 1703 1704 int *mvjsadcost = x->nmvjointsadcost; 1705 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1706 1707 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1708 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1709 1710 // Work out the mid point for the search 1711 in_what = xd->plane[0].pre[0].buf; 1712 bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col; 1713 1714 best_mv->as_mv.row = ref_row; 1715 best_mv->as_mv.col = ref_col; 1716 1717 // Baseline value at the centre 1718 bestsad = fn_ptr->sdf(what, what_stride, 1719 bestaddress, in_what_stride, 0x7fffffff) 1720 + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, 1721 mvjsadcost, mvsadcost, sad_per_bit); 1722 1723 // Apply further limits to prevent us looking using vectors that stretch 1724 // beyond the UMV border 1725 col_min = MAX(col_min, x->mv_col_min); 1726 col_max = MIN(col_max, x->mv_col_max); 1727 row_min = MAX(row_min, x->mv_row_min); 1728 row_max = MIN(row_max, x->mv_row_max); 1729 1730 for (r = row_min; r < row_max; r++) { 1731 this_mv.as_mv.row = r; 1732 check_here = r * mv_stride + in_what + col_min; 1733 c = col_min; 1734 1735 while ((c + 7) < col_max) { 1736 int i; 1737 1738 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1739 1740 for (i = 0; i < 8; i++) { 1741 thissad = (unsigned int)sad_array8[i]; 1742 1743 if (thissad < bestsad) { 1744 this_mv.as_mv.col = c; 1745 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1746 mvjsadcost, mvsadcost, sad_per_bit); 1747 1748 if (thissad < bestsad) { 1749 bestsad = thissad; 1750 best_mv->as_mv.row = r; 1751 best_mv->as_mv.col = c; 1752 bestaddress = check_here; 1753 } 1754 } 1755 1756 check_here++; 1757 c++; 1758 } 1759 } 1760 1761 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1762 int i; 1763 1764 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1765 1766 for (i = 0; i < 3; i++) { 1767 thissad = sad_array[i]; 1768 1769 if (thissad < bestsad) { 1770 this_mv.as_mv.col = c; 1771 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1772 mvjsadcost, mvsadcost, sad_per_bit); 1773 1774 if (thissad < bestsad) { 1775 bestsad = thissad; 1776 best_mv->as_mv.row = r; 1777 best_mv->as_mv.col = c; 1778 bestaddress = check_here; 1779 } 1780 } 1781 1782 check_here++; 1783 c++; 1784 } 1785 } 1786 1787 while (c < col_max) { 1788 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1789 bestsad); 1790 1791 if (thissad < bestsad) { 1792 this_mv.as_mv.col = c; 1793 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1794 mvjsadcost, mvsadcost, sad_per_bit); 1795 1796 if (thissad < bestsad) { 1797 bestsad = thissad; 1798 best_mv->as_mv.row = r; 1799 best_mv->as_mv.col = c; 1800 bestaddress = check_here; 1801 } 1802 } 1803 1804 check_here++; 1805 c++; 1806 } 1807 } 1808 1809 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1810 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1811 1812 if (bestsad < INT_MAX) 1813 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, 1814 (unsigned int *)(&thissad)) + 1815 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1816 mvjcost, mvcost, x->errorperbit); 1817 else 1818 return INT_MAX; 1819 } 1820 int vp9_refining_search_sad_c(MACROBLOCK *x, 1821 int_mv *ref_mv, int error_per_bit, 1822 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1823 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1824 const MACROBLOCKD* const xd = &x->e_mbd; 1825 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1826 int i, j; 1827 int this_row_offset, this_col_offset; 1828 1829 int what_stride = x->plane[0].src.stride; 1830 int in_what_stride = xd->plane[0].pre[0].stride; 1831 uint8_t *what = x->plane[0].src.buf; 1832 uint8_t *best_address = xd->plane[0].pre[0].buf + 1833 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1834 ref_mv->as_mv.col; 1835 uint8_t *check_here; 1836 unsigned int thissad; 1837 int_mv this_mv; 1838 unsigned int bestsad = INT_MAX; 1839 int_mv fcenter_mv; 1840 1841 int *mvjsadcost = x->nmvjointsadcost; 1842 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1843 1844 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1845 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1846 1847 bestsad = fn_ptr->sdf(what, what_stride, best_address, 1848 in_what_stride, 0x7fffffff) + 1849 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 1850 mvjsadcost, mvsadcost, error_per_bit); 1851 1852 for (i = 0; i < search_range; i++) { 1853 int best_site = -1; 1854 1855 for (j = 0; j < 4; j++) { 1856 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1857 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1858 1859 if ((this_col_offset > x->mv_col_min) && 1860 (this_col_offset < x->mv_col_max) && 1861 (this_row_offset > x->mv_row_min) && 1862 (this_row_offset < x->mv_row_max)) { 1863 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1864 best_address; 1865 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1866 bestsad); 1867 1868 if (thissad < bestsad) { 1869 this_mv.as_mv.row = this_row_offset; 1870 this_mv.as_mv.col = this_col_offset; 1871 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1872 mvjsadcost, mvsadcost, error_per_bit); 1873 1874 if (thissad < bestsad) { 1875 bestsad = thissad; 1876 best_site = j; 1877 } 1878 } 1879 } 1880 } 1881 1882 if (best_site == -1) { 1883 break; 1884 } else { 1885 ref_mv->as_mv.row += neighbors[best_site].row; 1886 ref_mv->as_mv.col += neighbors[best_site].col; 1887 best_address += (neighbors[best_site].row) * in_what_stride + 1888 neighbors[best_site].col; 1889 } 1890 } 1891 1892 this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1893 this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1894 1895 if (bestsad < INT_MAX) 1896 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 1897 (unsigned int *)(&thissad)) + 1898 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 1899 mvjcost, mvcost, x->errorperbit); 1900 else 1901 return INT_MAX; 1902 } 1903 1904 int vp9_refining_search_sadx4(MACROBLOCK *x, 1905 int_mv *ref_mv, int error_per_bit, 1906 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 1907 int *mvjcost, int *mvcost[2], int_mv *center_mv) { 1908 const MACROBLOCKD* const xd = &x->e_mbd; 1909 MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1910 int i, j; 1911 int this_row_offset, this_col_offset; 1912 1913 int what_stride = x->plane[0].src.stride; 1914 int in_what_stride = xd->plane[0].pre[0].stride; 1915 uint8_t *what = x->plane[0].src.buf; 1916 uint8_t *best_address = xd->plane[0].pre[0].buf + 1917 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 1918 ref_mv->as_mv.col; 1919 uint8_t *check_here; 1920 unsigned int thissad; 1921 int_mv this_mv; 1922 unsigned int bestsad = INT_MAX; 1923 int_mv fcenter_mv; 1924 1925 int *mvjsadcost = x->nmvjointsadcost; 1926 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1927 1928 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1929 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1930 1931 bestsad = fn_ptr->sdf(what, what_stride, best_address, 1932 in_what_stride, 0x7fffffff) + 1933 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 1934 mvjsadcost, mvsadcost, error_per_bit); 1935 1936 for (i = 0; i < search_range; i++) { 1937 int best_site = -1; 1938 int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) & 1939 ((ref_mv->as_mv.row + 1) < x->mv_row_max) & 1940 ((ref_mv->as_mv.col - 1) > x->mv_col_min) & 1941 ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1942 1943 if (all_in) { 1944 unsigned int sad_array[4]; 1945 unsigned char const *block_offset[4]; 1946 block_offset[0] = best_address - in_what_stride; 1947 block_offset[1] = best_address - 1; 1948 block_offset[2] = best_address + 1; 1949 block_offset[3] = best_address + in_what_stride; 1950 1951 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1952 sad_array); 1953 1954 for (j = 0; j < 4; j++) { 1955 if (sad_array[j] < bestsad) { 1956 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 1957 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 1958 sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1959 mvjsadcost, mvsadcost, error_per_bit); 1960 1961 if (sad_array[j] < bestsad) { 1962 bestsad = sad_array[j]; 1963 best_site = j; 1964 } 1965 } 1966 } 1967 } else { 1968 for (j = 0; j < 4; j++) { 1969 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1970 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1971 1972 if ((this_col_offset > x->mv_col_min) && 1973 (this_col_offset < x->mv_col_max) && 1974 (this_row_offset > x->mv_row_min) && 1975 (this_row_offset < x->mv_row_max)) { 1976 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 1977 best_address; 1978 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1979 bestsad); 1980 1981 if (thissad < bestsad) { 1982 this_mv.as_mv.row = this_row_offset; 1983 this_mv.as_mv.col = this_col_offset; 1984 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 1985 mvjsadcost, mvsadcost, error_per_bit); 1986 1987 if (thissad < bestsad) { 1988 bestsad = thissad; 1989 best_site = j; 1990 } 1991 } 1992 } 1993 } 1994 } 1995 1996 if (best_site == -1) { 1997 break; 1998 } else { 1999 ref_mv->as_mv.row += neighbors[best_site].row; 2000 ref_mv->as_mv.col += neighbors[best_site].col; 2001 best_address += (neighbors[best_site].row) * in_what_stride + 2002 neighbors[best_site].col; 2003 } 2004 } 2005 2006 this_mv.as_mv.row = ref_mv->as_mv.row * 8; 2007 this_mv.as_mv.col = ref_mv->as_mv.col * 8; 2008 2009 if (bestsad < INT_MAX) 2010 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, 2011 (unsigned int *)(&thissad)) + 2012 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 2013 mvjcost, mvcost, x->errorperbit); 2014 else 2015 return INT_MAX; 2016 } 2017 2018 /* This function is called when we do joint motion search in comp_inter_inter 2019 * mode. 2020 */ 2021 int vp9_refining_search_8p_c(MACROBLOCK *x, 2022 int_mv *ref_mv, int error_per_bit, 2023 int search_range, vp9_variance_fn_ptr_t *fn_ptr, 2024 int *mvjcost, int *mvcost[2], int_mv *center_mv, 2025 const uint8_t *second_pred, int w, int h) { 2026 const MACROBLOCKD* const xd = &x->e_mbd; 2027 MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 2028 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 2029 int i, j; 2030 int this_row_offset, this_col_offset; 2031 2032 int what_stride = x->plane[0].src.stride; 2033 int in_what_stride = xd->plane[0].pre[0].stride; 2034 uint8_t *what = x->plane[0].src.buf; 2035 uint8_t *best_address = xd->plane[0].pre[0].buf + 2036 (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) + 2037 ref_mv->as_mv.col; 2038 uint8_t *check_here; 2039 unsigned int thissad; 2040 int_mv this_mv; 2041 unsigned int bestsad = INT_MAX; 2042 int_mv fcenter_mv; 2043 2044 int *mvjsadcost = x->nmvjointsadcost; 2045 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 2046 2047 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 2048 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 2049 2050 /* Get compound pred by averaging two pred blocks. */ 2051 bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, 2052 second_pred, 0x7fffffff) + 2053 mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, 2054 mvjsadcost, mvsadcost, error_per_bit); 2055 2056 for (i = 0; i < search_range; i++) { 2057 int best_site = -1; 2058 2059 for (j = 0; j < 8; j++) { 2060 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 2061 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 2062 2063 if ((this_col_offset > x->mv_col_min) && 2064 (this_col_offset < x->mv_col_max) && 2065 (this_row_offset > x->mv_row_min) && 2066 (this_row_offset < x->mv_row_max)) { 2067 check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + 2068 best_address; 2069 2070 /* Get compound block and use it to calculate SAD. */ 2071 thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, 2072 second_pred, bestsad); 2073 2074 if (thissad < bestsad) { 2075 this_mv.as_mv.row = this_row_offset; 2076 this_mv.as_mv.col = this_col_offset; 2077 thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, 2078 mvjsadcost, mvsadcost, error_per_bit); 2079 if (thissad < bestsad) { 2080 bestsad = thissad; 2081 best_site = j; 2082 } 2083 } 2084 } 2085 } 2086 2087 if (best_site == -1) { 2088 break; 2089 } else { 2090 ref_mv->as_mv.row += neighbors[best_site].row; 2091 ref_mv->as_mv.col += neighbors[best_site].col; 2092 best_address += (neighbors[best_site].row) * in_what_stride + 2093 neighbors[best_site].col; 2094 } 2095 } 2096 2097 this_mv.as_mv.row = ref_mv->as_mv.row * 8; 2098 this_mv.as_mv.col = ref_mv->as_mv.col * 8; 2099 2100 if (bestsad < INT_MAX) { 2101 // FIXME(rbultje, yunqing): add full-pixel averaging variance functions 2102 // so we don't have to use the subpixel with xoff=0,yoff=0 here. 2103 return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, 2104 (unsigned int *)(&thissad), second_pred) + 2105 mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, 2106 mvjcost, mvcost, x->errorperbit); 2107 } else { 2108 return INT_MAX; 2109 } 2110 } 2111