1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <math.h> 13 #include <stdio.h> 14 15 #include "./vpx_config.h" 16 #include "./vpx_dsp_rtcd.h" 17 18 #include "vpx_dsp/vpx_dsp_common.h" 19 #include "vpx_mem/vpx_mem.h" 20 #include "vpx_ports/mem.h" 21 22 #include "vp9/common/vp9_common.h" 23 #include "vp9/common/vp9_reconinter.h" 24 25 #include "vp9/encoder/vp9_encoder.h" 26 #include "vp9/encoder/vp9_mcomp.h" 27 28 // #define NEW_DIAMOND_SEARCH 29 30 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 31 const MV *mv) { 32 return &buf->buf[mv->row * buf->stride + mv->col]; 33 } 34 35 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { 36 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 37 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 38 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 39 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 40 41 col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); 42 row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); 43 col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); 44 row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); 45 46 // Get intersection of UMV window and valid MV window to reduce # of checks 47 // in diamond search. 48 if (x->mv_col_min < col_min) 49 x->mv_col_min = col_min; 50 if (x->mv_col_max > col_max) 51 x->mv_col_max = col_max; 52 if (x->mv_row_min < row_min) 53 x->mv_row_min = row_min; 54 if (x->mv_row_max > row_max) 55 x->mv_row_max = row_max; 56 } 57 58 int vp9_init_search_range(int size) { 59 int sr = 0; 60 // Minimum search size no matter what the passed in value. 61 size = VPXMAX(16, size); 62 63 while ((size << sr) < MAX_FULL_PEL_VAL) 64 sr++; 65 66 sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); 67 return sr; 68 } 69 70 static INLINE int mv_cost(const MV *mv, 71 const int *joint_cost, int *const comp_cost[2]) { 72 return joint_cost[vp9_get_mv_joint(mv)] + 73 comp_cost[0][mv->row] + comp_cost[1][mv->col]; 74 } 75 76 int vp9_mv_bit_cost(const MV *mv, const MV *ref, 77 const int *mvjcost, int *mvcost[2], int weight) { 78 const MV diff = { mv->row - ref->row, 79 mv->col - ref->col }; 80 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 81 } 82 83 static int mv_err_cost(const MV *mv, const MV *ref, 84 const int *mvjcost, int *mvcost[2], 85 int error_per_bit) { 86 if (mvcost) { 87 const MV diff = { mv->row - ref->row, 88 mv->col - ref->col }; 89 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 90 error_per_bit, 13); 91 } 92 return 0; 93 } 94 95 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, 96 int error_per_bit) { 97 const MV diff = { mv->row - ref->row, 98 mv->col - ref->col }; 99 return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost, 100 x->nmvsadcost) * error_per_bit, 8); 101 } 102 103 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { 104 int len, ss_count = 1; 105 106 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; 107 cfg->ss[0].offset = 0; 108 109 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 110 // Generate offsets for 4 search sites per step. 111 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; 112 int i; 113 for (i = 0; i < 4; ++i) { 114 search_site *const ss = &cfg->ss[ss_count++]; 115 ss->mv = ss_mvs[i]; 116 ss->offset = ss->mv.row * stride + ss->mv.col; 117 } 118 } 119 120 cfg->ss_count = ss_count; 121 cfg->searches_per_step = 4; 122 } 123 124 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { 125 int len, ss_count = 1; 126 127 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; 128 cfg->ss[0].offset = 0; 129 130 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 131 // Generate offsets for 8 search sites per step. 132 const MV ss_mvs[8] = { 133 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 134 {-len, -len}, {-len, len}, {len, -len}, {len, len} 135 }; 136 int i; 137 for (i = 0; i < 8; ++i) { 138 search_site *const ss = &cfg->ss[ss_count++]; 139 ss->mv = ss_mvs[i]; 140 ss->offset = ss->mv.row * stride + ss->mv.col; 141 } 142 } 143 144 cfg->ss_count = ss_count; 145 cfg->searches_per_step = 8; 146 } 147 148 /* 149 * To avoid the penalty for crossing cache-line read, preload the reference 150 * area in a small buffer, which is aligned to make sure there won't be crossing 151 * cache-line read while reading from this buffer. This reduced the cpu 152 * cycles spent on reading ref data in sub-pixel filter functions. 153 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 154 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 155 * could reduce the area. 156 */ 157 158 /* estimated cost of a motion vector (r,c) */ 159 #define MVC(r, c) \ 160 (mvcost ? \ 161 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 162 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 163 error_per_bit + 4096) >> 13 : 0) 164 165 166 // convert motion vector component to offset for sv[a]f calc 167 static INLINE int sp(int x) { 168 return x & 7; 169 } 170 171 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { 172 return &buf[(r >> 3) * stride + (c >> 3)]; 173 } 174 175 /* checks if (r, c) has better score than previous best */ 176 #define CHECK_BETTER(v, r, c) \ 177 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 178 if (second_pred == NULL) \ 179 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 180 src_stride, &sse); \ 181 else \ 182 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ 183 z, src_stride, &sse, second_pred); \ 184 if ((v = MVC(r, c) + thismse) < besterr) { \ 185 besterr = v; \ 186 br = r; \ 187 bc = c; \ 188 *distortion = thismse; \ 189 *sse1 = sse; \ 190 } \ 191 } else { \ 192 v = INT_MAX; \ 193 } 194 195 #define FIRST_LEVEL_CHECKS \ 196 { \ 197 unsigned int left, right, up, down, diag; \ 198 CHECK_BETTER(left, tr, tc - hstep); \ 199 CHECK_BETTER(right, tr, tc + hstep); \ 200 CHECK_BETTER(up, tr - hstep, tc); \ 201 CHECK_BETTER(down, tr + hstep, tc); \ 202 whichdir = (left < right ? 0 : 1) + \ 203 (up < down ? 0 : 2); \ 204 switch (whichdir) { \ 205 case 0: \ 206 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 207 break; \ 208 case 1: \ 209 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 210 break; \ 211 case 2: \ 212 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 213 break; \ 214 case 3: \ 215 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 216 break; \ 217 } \ 218 } 219 220 #define SECOND_LEVEL_CHECKS \ 221 { \ 222 int kr, kc; \ 223 unsigned int second; \ 224 if (tr != br && tc != bc) { \ 225 kr = br - tr; \ 226 kc = bc - tc; \ 227 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 228 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 229 } else if (tr == br && tc != bc) { \ 230 kc = bc - tc; \ 231 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 232 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 233 switch (whichdir) { \ 234 case 0: \ 235 case 1: \ 236 CHECK_BETTER(second, tr + hstep, tc + kc); \ 237 break; \ 238 case 2: \ 239 case 3: \ 240 CHECK_BETTER(second, tr - hstep, tc + kc); \ 241 break; \ 242 } \ 243 } else if (tr != br && tc == bc) { \ 244 kr = br - tr; \ 245 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 246 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 247 switch (whichdir) { \ 248 case 0: \ 249 case 2: \ 250 CHECK_BETTER(second, tr + kr, tc + hstep); \ 251 break; \ 252 case 1: \ 253 case 3: \ 254 CHECK_BETTER(second, tr + kr, tc - hstep); \ 255 break; \ 256 } \ 257 } \ 258 } 259 260 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of 261 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten 262 // later in the same way. 263 #define SECOND_LEVEL_CHECKS_BEST \ 264 { \ 265 unsigned int second; \ 266 int br0 = br; \ 267 int bc0 = bc; \ 268 assert(tr == br || tc == bc); \ 269 if (tr == br && tc != bc) { \ 270 kc = bc - tc; \ 271 } else if (tr != br && tc == bc) { \ 272 kr = br - tr; \ 273 } \ 274 CHECK_BETTER(second, br0 + kr, bc0); \ 275 CHECK_BETTER(second, br0, bc0 + kc); \ 276 if (br0 != br || bc0 != bc) { \ 277 CHECK_BETTER(second, br0 + kr, bc0 + kc); \ 278 } \ 279 } 280 281 #define SETUP_SUBPEL_SEARCH \ 282 const uint8_t *const z = x->plane[0].src.buf; \ 283 const int src_stride = x->plane[0].src.stride; \ 284 const MACROBLOCKD *xd = &x->e_mbd; \ 285 unsigned int besterr = INT_MAX; \ 286 unsigned int sse; \ 287 unsigned int whichdir; \ 288 int thismse; \ 289 const unsigned int halfiters = iters_per_step; \ 290 const unsigned int quarteriters = iters_per_step; \ 291 const unsigned int eighthiters = iters_per_step; \ 292 const int y_stride = xd->plane[0].pre[0].stride; \ 293 const int offset = bestmv->row * y_stride + bestmv->col; \ 294 const uint8_t *const y = xd->plane[0].pre[0].buf; \ 295 \ 296 int rr = ref_mv->row; \ 297 int rc = ref_mv->col; \ 298 int br = bestmv->row * 8; \ 299 int bc = bestmv->col * 8; \ 300 int hstep = 4; \ 301 const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \ 302 const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \ 303 const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \ 304 const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \ 305 int tr = br; \ 306 int tc = bc; \ 307 \ 308 bestmv->row *= 8; \ 309 bestmv->col *= 8; 310 311 static unsigned int setup_center_error(const MACROBLOCKD *xd, 312 const MV *bestmv, 313 const MV *ref_mv, 314 int error_per_bit, 315 const vp9_variance_fn_ptr_t *vfp, 316 const uint8_t *const src, 317 const int src_stride, 318 const uint8_t *const y, 319 int y_stride, 320 const uint8_t *second_pred, 321 int w, int h, int offset, 322 int *mvjcost, int *mvcost[2], 323 unsigned int *sse1, 324 int *distortion) { 325 unsigned int besterr; 326 #if CONFIG_VP9_HIGHBITDEPTH 327 if (second_pred != NULL) { 328 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 329 DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); 330 vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, 331 y_stride); 332 besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, 333 sse1); 334 } else { 335 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 336 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 337 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 338 } 339 } else { 340 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 341 } 342 *distortion = besterr; 343 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 344 #else 345 (void) xd; 346 if (second_pred != NULL) { 347 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 348 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 349 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 350 } else { 351 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 352 } 353 *distortion = besterr; 354 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 355 #endif // CONFIG_VP9_HIGHBITDEPTH 356 return besterr; 357 } 358 359 static INLINE int divide_and_round(const int n, const int d) { 360 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); 361 } 362 363 static INLINE int is_cost_list_wellbehaved(int *cost_list) { 364 return cost_list[0] < cost_list[1] && 365 cost_list[0] < cost_list[2] && 366 cost_list[0] < cost_list[3] && 367 cost_list[0] < cost_list[4]; 368 } 369 370 // Returns surface minima estimate at given precision in 1/2^n bits. 371 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C 372 // For a given set of costs S0, S1, S2, S3, S4 at points 373 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, 374 // the solution for the location of the minima (x0, y0) is given by: 375 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), 376 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). 377 // The code below is an integerized version of that. 378 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, 379 int bits) { 380 *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), 381 (cost_list[1] - 2 * cost_list[0] + cost_list[3])); 382 *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), 383 (cost_list[4] - 2 * cost_list[0] + cost_list[2])); 384 } 385 386 int vp9_find_best_sub_pixel_tree_pruned_evenmore( 387 const MACROBLOCK *x, 388 MV *bestmv, const MV *ref_mv, 389 int allow_hp, 390 int error_per_bit, 391 const vp9_variance_fn_ptr_t *vfp, 392 int forced_stop, 393 int iters_per_step, 394 int *cost_list, 395 int *mvjcost, int *mvcost[2], 396 int *distortion, 397 unsigned int *sse1, 398 const uint8_t *second_pred, 399 int w, int h) { 400 SETUP_SUBPEL_SEARCH; 401 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, 402 z, src_stride, y, y_stride, second_pred, 403 w, h, offset, mvjcost, mvcost, 404 sse1, distortion); 405 (void) halfiters; 406 (void) quarteriters; 407 (void) eighthiters; 408 (void) whichdir; 409 (void) allow_hp; 410 (void) forced_stop; 411 (void) hstep; 412 413 if (cost_list && 414 cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 415 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 416 cost_list[4] != INT_MAX && 417 is_cost_list_wellbehaved(cost_list)) { 418 int ir, ic; 419 unsigned int minpt; 420 get_cost_surf_min(cost_list, &ir, &ic, 2); 421 if (ir != 0 || ic != 0) { 422 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); 423 } 424 } else { 425 FIRST_LEVEL_CHECKS; 426 if (halfiters > 1) { 427 SECOND_LEVEL_CHECKS; 428 } 429 430 tr = br; 431 tc = bc; 432 433 // Each subsequent iteration checks at least one point in common with 434 // the last iteration could be 2 ( if diag selected) 1/4 pel 435 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 436 if (forced_stop != 2) { 437 hstep >>= 1; 438 FIRST_LEVEL_CHECKS; 439 if (quarteriters > 1) { 440 SECOND_LEVEL_CHECKS; 441 } 442 } 443 } 444 445 tr = br; 446 tc = bc; 447 448 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 449 hstep >>= 1; 450 FIRST_LEVEL_CHECKS; 451 if (eighthiters > 1) { 452 SECOND_LEVEL_CHECKS; 453 } 454 } 455 456 bestmv->row = br; 457 bestmv->col = bc; 458 459 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 460 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 461 return INT_MAX; 462 463 return besterr; 464 } 465 466 int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, 467 MV *bestmv, const MV *ref_mv, 468 int allow_hp, 469 int error_per_bit, 470 const vp9_variance_fn_ptr_t *vfp, 471 int forced_stop, 472 int iters_per_step, 473 int *cost_list, 474 int *mvjcost, int *mvcost[2], 475 int *distortion, 476 unsigned int *sse1, 477 const uint8_t *second_pred, 478 int w, int h) { 479 SETUP_SUBPEL_SEARCH; 480 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, 481 z, src_stride, y, y_stride, second_pred, 482 w, h, offset, mvjcost, mvcost, 483 sse1, distortion); 484 if (cost_list && 485 cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 486 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 487 cost_list[4] != INT_MAX && 488 is_cost_list_wellbehaved(cost_list)) { 489 unsigned int minpt; 490 int ir, ic; 491 get_cost_surf_min(cost_list, &ir, &ic, 1); 492 if (ir != 0 || ic != 0) { 493 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); 494 } 495 } else { 496 FIRST_LEVEL_CHECKS; 497 if (halfiters > 1) { 498 SECOND_LEVEL_CHECKS; 499 } 500 } 501 502 // Each subsequent iteration checks at least one point in common with 503 // the last iteration could be 2 ( if diag selected) 1/4 pel 504 505 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 506 if (forced_stop != 2) { 507 tr = br; 508 tc = bc; 509 hstep >>= 1; 510 FIRST_LEVEL_CHECKS; 511 if (quarteriters > 1) { 512 SECOND_LEVEL_CHECKS; 513 } 514 } 515 516 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 517 tr = br; 518 tc = bc; 519 hstep >>= 1; 520 FIRST_LEVEL_CHECKS; 521 if (eighthiters > 1) { 522 SECOND_LEVEL_CHECKS; 523 } 524 } 525 // These lines insure static analysis doesn't warn that 526 // tr and tc aren't used after the above point. 527 (void) tr; 528 (void) tc; 529 530 bestmv->row = br; 531 bestmv->col = bc; 532 533 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 534 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 535 return INT_MAX; 536 537 return besterr; 538 } 539 540 int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, 541 MV *bestmv, const MV *ref_mv, 542 int allow_hp, 543 int error_per_bit, 544 const vp9_variance_fn_ptr_t *vfp, 545 int forced_stop, 546 int iters_per_step, 547 int *cost_list, 548 int *mvjcost, int *mvcost[2], 549 int *distortion, 550 unsigned int *sse1, 551 const uint8_t *second_pred, 552 int w, int h) { 553 SETUP_SUBPEL_SEARCH; 554 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, 555 z, src_stride, y, y_stride, second_pred, 556 w, h, offset, mvjcost, mvcost, 557 sse1, distortion); 558 if (cost_list && 559 cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 560 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 561 cost_list[4] != INT_MAX) { 562 unsigned int left, right, up, down, diag; 563 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + 564 (cost_list[2] < cost_list[4] ? 0 : 2); 565 switch (whichdir) { 566 case 0: 567 CHECK_BETTER(left, tr, tc - hstep); 568 CHECK_BETTER(down, tr + hstep, tc); 569 CHECK_BETTER(diag, tr + hstep, tc - hstep); 570 break; 571 case 1: 572 CHECK_BETTER(right, tr, tc + hstep); 573 CHECK_BETTER(down, tr + hstep, tc); 574 CHECK_BETTER(diag, tr + hstep, tc + hstep); 575 break; 576 case 2: 577 CHECK_BETTER(left, tr, tc - hstep); 578 CHECK_BETTER(up, tr - hstep, tc); 579 CHECK_BETTER(diag, tr - hstep, tc - hstep); 580 break; 581 case 3: 582 CHECK_BETTER(right, tr, tc + hstep); 583 CHECK_BETTER(up, tr - hstep, tc); 584 CHECK_BETTER(diag, tr - hstep, tc + hstep); 585 break; 586 } 587 } else { 588 FIRST_LEVEL_CHECKS; 589 if (halfiters > 1) { 590 SECOND_LEVEL_CHECKS; 591 } 592 } 593 594 tr = br; 595 tc = bc; 596 597 // Each subsequent iteration checks at least one point in common with 598 // the last iteration could be 2 ( if diag selected) 1/4 pel 599 600 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 601 if (forced_stop != 2) { 602 hstep >>= 1; 603 FIRST_LEVEL_CHECKS; 604 if (quarteriters > 1) { 605 SECOND_LEVEL_CHECKS; 606 } 607 tr = br; 608 tc = bc; 609 } 610 611 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 612 hstep >>= 1; 613 FIRST_LEVEL_CHECKS; 614 if (eighthiters > 1) { 615 SECOND_LEVEL_CHECKS; 616 } 617 tr = br; 618 tc = bc; 619 } 620 // These lines insure static analysis doesn't warn that 621 // tr and tc aren't used after the above point. 622 (void) tr; 623 (void) tc; 624 625 bestmv->row = br; 626 bestmv->col = bc; 627 628 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 629 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 630 return INT_MAX; 631 632 return besterr; 633 } 634 635 static const MV search_step_table[12] = { 636 // left, right, up, down 637 {0, -4}, {0, 4}, {-4, 0}, {4, 0}, 638 {0, -2}, {0, 2}, {-2, 0}, {2, 0}, 639 {0, -1}, {0, 1}, {-1, 0}, {1, 0} 640 }; 641 642 int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, 643 MV *bestmv, const MV *ref_mv, 644 int allow_hp, 645 int error_per_bit, 646 const vp9_variance_fn_ptr_t *vfp, 647 int forced_stop, 648 int iters_per_step, 649 int *cost_list, 650 int *mvjcost, int *mvcost[2], 651 int *distortion, 652 unsigned int *sse1, 653 const uint8_t *second_pred, 654 int w, int h) { 655 const uint8_t *const z = x->plane[0].src.buf; 656 const uint8_t *const src_address = z; 657 const int src_stride = x->plane[0].src.stride; 658 const MACROBLOCKD *xd = &x->e_mbd; 659 unsigned int besterr = INT_MAX; 660 unsigned int sse; 661 int thismse; 662 const int y_stride = xd->plane[0].pre[0].stride; 663 const int offset = bestmv->row * y_stride + bestmv->col; 664 const uint8_t *const y = xd->plane[0].pre[0].buf; 665 666 int rr = ref_mv->row; 667 int rc = ref_mv->col; 668 int br = bestmv->row * 8; 669 int bc = bestmv->col * 8; 670 int hstep = 4; 671 int iter, round = 3 - forced_stop; 672 const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 673 const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 674 const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 675 const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 676 int tr = br; 677 int tc = bc; 678 const MV *search_step = search_step_table; 679 int idx, best_idx = -1; 680 unsigned int cost_array[5]; 681 int kr, kc; 682 683 if (!(allow_hp && vp9_use_mv_hp(ref_mv))) 684 if (round == 3) 685 round = 2; 686 687 bestmv->row *= 8; 688 bestmv->col *= 8; 689 690 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, 691 z, src_stride, y, y_stride, second_pred, 692 w, h, offset, mvjcost, mvcost, 693 sse1, distortion); 694 695 (void) cost_list; // to silence compiler warning 696 697 for (iter = 0; iter < round; ++iter) { 698 // Check vertical and horizontal sub-pixel positions. 699 for (idx = 0; idx < 4; ++idx) { 700 tr = br + search_step[idx].row; 701 tc = bc + search_step[idx].col; 702 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 703 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 704 MV this_mv; 705 this_mv.row = tr; 706 this_mv.col = tc; 707 if (second_pred == NULL) 708 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), 709 src_address, src_stride, &sse); 710 else 711 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 712 src_address, src_stride, &sse, second_pred); 713 cost_array[idx] = thismse + 714 mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); 715 716 if (cost_array[idx] < besterr) { 717 best_idx = idx; 718 besterr = cost_array[idx]; 719 *distortion = thismse; 720 *sse1 = sse; 721 } 722 } else { 723 cost_array[idx] = INT_MAX; 724 } 725 } 726 727 // Check diagonal sub-pixel position 728 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); 729 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); 730 731 tc = bc + kc; 732 tr = br + kr; 733 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 734 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 735 MV this_mv = {tr, tc}; 736 if (second_pred == NULL) 737 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), 738 src_address, src_stride, &sse); 739 else 740 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 741 src_address, src_stride, &sse, second_pred); 742 cost_array[4] = thismse + 743 mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); 744 745 if (cost_array[4] < besterr) { 746 best_idx = 4; 747 besterr = cost_array[4]; 748 *distortion = thismse; 749 *sse1 = sse; 750 } 751 } else { 752 cost_array[idx] = INT_MAX; 753 } 754 755 if (best_idx < 4 && best_idx >= 0) { 756 br += search_step[best_idx].row; 757 bc += search_step[best_idx].col; 758 } else if (best_idx == 4) { 759 br = tr; 760 bc = tc; 761 } 762 763 if (iters_per_step > 1 && best_idx != -1) 764 SECOND_LEVEL_CHECKS_BEST; 765 766 tr = br; 767 tc = bc; 768 769 search_step += 4; 770 hstep >>= 1; 771 best_idx = -1; 772 } 773 774 // Each subsequent iteration checks at least one point in common with 775 // the last iteration could be 2 ( if diag selected) 1/4 pel 776 777 // These lines insure static analysis doesn't warn that 778 // tr and tc aren't used after the above point. 779 (void) tr; 780 (void) tc; 781 782 bestmv->row = br; 783 bestmv->col = bc; 784 785 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 786 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 787 return INT_MAX; 788 789 return besterr; 790 } 791 792 #undef MVC 793 #undef PRE 794 #undef CHECK_BETTER 795 796 static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, 797 int range) { 798 return ((row - range) >= x->mv_row_min) & 799 ((row + range) <= x->mv_row_max) & 800 ((col - range) >= x->mv_col_min) & 801 ((col + range) <= x->mv_col_max); 802 } 803 804 static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { 805 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) && 806 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max); 807 } 808 809 #define CHECK_BETTER \ 810 {\ 811 if (thissad < bestsad) {\ 812 if (use_mvcost) \ 813 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\ 814 if (thissad < bestsad) {\ 815 bestsad = thissad;\ 816 best_site = i;\ 817 }\ 818 }\ 819 } 820 821 #define MAX_PATTERN_SCALES 11 822 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 823 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 824 825 // Calculate and return a sad+mvcost list around an integer best pel. 826 static INLINE void calc_int_cost_list(const MACROBLOCK *x, 827 const MV *ref_mv, 828 int sadpb, 829 const vp9_variance_fn_ptr_t *fn_ptr, 830 const MV *best_mv, 831 int *cost_list) { 832 static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; 833 const struct buf_2d *const what = &x->plane[0].src; 834 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; 835 const MV fcenter_mv = {ref_mv->row >> 3, ref_mv->col >> 3}; 836 int br = best_mv->row; 837 int bc = best_mv->col; 838 MV this_mv; 839 int i; 840 unsigned int sse; 841 842 this_mv.row = br; 843 this_mv.col = bc; 844 cost_list[0] = fn_ptr->vf(what->buf, what->stride, 845 get_buf_from_mv(in_what, &this_mv), 846 in_what->stride, &sse) + 847 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 848 if (check_bounds(x, br, bc, 1)) { 849 for (i = 0; i < 4; i++) { 850 const MV this_mv = {br + neighbors[i].row, 851 bc + neighbors[i].col}; 852 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 853 get_buf_from_mv(in_what, &this_mv), 854 in_what->stride, &sse) + 855 // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 856 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, 857 x->errorperbit); 858 } 859 } else { 860 for (i = 0; i < 4; i++) { 861 const MV this_mv = {br + neighbors[i].row, 862 bc + neighbors[i].col}; 863 if (!is_mv_in(x, &this_mv)) 864 cost_list[i + 1] = INT_MAX; 865 else 866 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 867 get_buf_from_mv(in_what, &this_mv), 868 in_what->stride, &sse) + 869 // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 870 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, 871 x->errorperbit); 872 } 873 } 874 } 875 876 // Generic pattern search function that searches over multiple scales. 877 // Each scale can have a different number of candidates and shape of 878 // candidates as indicated in the num_candidates and candidates arrays 879 // passed into this function 880 // 881 static int vp9_pattern_search(const MACROBLOCK *x, 882 MV *ref_mv, 883 int search_param, 884 int sad_per_bit, 885 int do_init_search, 886 int *cost_list, 887 const vp9_variance_fn_ptr_t *vfp, 888 int use_mvcost, 889 const MV *center_mv, 890 MV *best_mv, 891 const int num_candidates[MAX_PATTERN_SCALES], 892 const MV candidates[MAX_PATTERN_SCALES] 893 [MAX_PATTERN_CANDIDATES]) { 894 const MACROBLOCKD *const xd = &x->e_mbd; 895 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 896 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 897 }; 898 int i, s, t; 899 const struct buf_2d *const what = &x->plane[0].src; 900 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 901 int br, bc; 902 int bestsad = INT_MAX; 903 int thissad; 904 int k = -1; 905 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 906 int best_init_s = search_param_to_steps[search_param]; 907 // adjust ref_mv to make sure it is within MV range 908 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 909 br = ref_mv->row; 910 bc = ref_mv->col; 911 912 // Work out the start point for the search 913 bestsad = vfp->sdf(what->buf, what->stride, 914 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 915 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 916 917 // Search all possible scales upto the search param around the center point 918 // pick the scale of the point that is best as the starting scale of 919 // further steps around it. 920 if (do_init_search) { 921 s = best_init_s; 922 best_init_s = -1; 923 for (t = 0; t <= s; ++t) { 924 int best_site = -1; 925 if (check_bounds(x, br, bc, 1 << t)) { 926 for (i = 0; i < num_candidates[t]; i++) { 927 const MV this_mv = {br + candidates[t][i].row, 928 bc + candidates[t][i].col}; 929 thissad = vfp->sdf(what->buf, what->stride, 930 get_buf_from_mv(in_what, &this_mv), 931 in_what->stride); 932 CHECK_BETTER 933 } 934 } else { 935 for (i = 0; i < num_candidates[t]; i++) { 936 const MV this_mv = {br + candidates[t][i].row, 937 bc + candidates[t][i].col}; 938 if (!is_mv_in(x, &this_mv)) 939 continue; 940 thissad = vfp->sdf(what->buf, what->stride, 941 get_buf_from_mv(in_what, &this_mv), 942 in_what->stride); 943 CHECK_BETTER 944 } 945 } 946 if (best_site == -1) { 947 continue; 948 } else { 949 best_init_s = t; 950 k = best_site; 951 } 952 } 953 if (best_init_s != -1) { 954 br += candidates[best_init_s][k].row; 955 bc += candidates[best_init_s][k].col; 956 } 957 } 958 959 // If the center point is still the best, just skip this and move to 960 // the refinement step. 961 if (best_init_s != -1) { 962 int best_site = -1; 963 s = best_init_s; 964 965 do { 966 // No need to search all 6 points the 1st time if initial search was used 967 if (!do_init_search || s != best_init_s) { 968 if (check_bounds(x, br, bc, 1 << s)) { 969 for (i = 0; i < num_candidates[s]; i++) { 970 const MV this_mv = {br + candidates[s][i].row, 971 bc + candidates[s][i].col}; 972 thissad = vfp->sdf(what->buf, what->stride, 973 get_buf_from_mv(in_what, &this_mv), 974 in_what->stride); 975 CHECK_BETTER 976 } 977 } else { 978 for (i = 0; i < num_candidates[s]; i++) { 979 const MV this_mv = {br + candidates[s][i].row, 980 bc + candidates[s][i].col}; 981 if (!is_mv_in(x, &this_mv)) 982 continue; 983 thissad = vfp->sdf(what->buf, what->stride, 984 get_buf_from_mv(in_what, &this_mv), 985 in_what->stride); 986 CHECK_BETTER 987 } 988 } 989 990 if (best_site == -1) { 991 continue; 992 } else { 993 br += candidates[s][best_site].row; 994 bc += candidates[s][best_site].col; 995 k = best_site; 996 } 997 } 998 999 do { 1000 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1001 best_site = -1; 1002 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1003 next_chkpts_indices[1] = k; 1004 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1005 1006 if (check_bounds(x, br, bc, 1 << s)) { 1007 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1008 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1009 bc + candidates[s][next_chkpts_indices[i]].col}; 1010 thissad = vfp->sdf(what->buf, what->stride, 1011 get_buf_from_mv(in_what, &this_mv), 1012 in_what->stride); 1013 CHECK_BETTER 1014 } 1015 } else { 1016 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1017 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1018 bc + candidates[s][next_chkpts_indices[i]].col}; 1019 if (!is_mv_in(x, &this_mv)) 1020 continue; 1021 thissad = vfp->sdf(what->buf, what->stride, 1022 get_buf_from_mv(in_what, &this_mv), 1023 in_what->stride); 1024 CHECK_BETTER 1025 } 1026 } 1027 1028 if (best_site != -1) { 1029 k = next_chkpts_indices[best_site]; 1030 br += candidates[s][k].row; 1031 bc += candidates[s][k].col; 1032 } 1033 } while (best_site != -1); 1034 } while (s--); 1035 } 1036 1037 // Returns the one-away integer pel sad values around the best as follows: 1038 // cost_list[0]: cost at the best integer pel 1039 // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel 1040 // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel 1041 // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel 1042 // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel 1043 if (cost_list) { 1044 const MV best_mv = { br, bc }; 1045 calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); 1046 } 1047 best_mv->row = br; 1048 best_mv->col = bc; 1049 return bestsad; 1050 } 1051 1052 // A specialized function where the smallest scale search candidates 1053 // are 4 1-away neighbors, and cost_list is non-null 1054 // TODO(debargha): Merge this function with the one above. Also remove 1055 // use_mvcost option since it is always 1, to save unnecessary branches. 1056 static int vp9_pattern_search_sad(const MACROBLOCK *x, 1057 MV *ref_mv, 1058 int search_param, 1059 int sad_per_bit, 1060 int do_init_search, 1061 int *cost_list, 1062 const vp9_variance_fn_ptr_t *vfp, 1063 int use_mvcost, 1064 const MV *center_mv, 1065 MV *best_mv, 1066 const int num_candidates[MAX_PATTERN_SCALES], 1067 const MV candidates[MAX_PATTERN_SCALES] 1068 [MAX_PATTERN_CANDIDATES]) { 1069 const MACROBLOCKD *const xd = &x->e_mbd; 1070 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1071 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1072 }; 1073 int i, s, t; 1074 const struct buf_2d *const what = &x->plane[0].src; 1075 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1076 int br, bc; 1077 int bestsad = INT_MAX; 1078 int thissad; 1079 int k = -1; 1080 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1081 int best_init_s = search_param_to_steps[search_param]; 1082 // adjust ref_mv to make sure it is within MV range 1083 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1084 br = ref_mv->row; 1085 bc = ref_mv->col; 1086 if (cost_list != NULL) { 1087 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = 1088 INT_MAX; 1089 } 1090 1091 // Work out the start point for the search 1092 bestsad = vfp->sdf(what->buf, what->stride, 1093 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 1094 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1095 1096 // Search all possible scales upto the search param around the center point 1097 // pick the scale of the point that is best as the starting scale of 1098 // further steps around it. 1099 if (do_init_search) { 1100 s = best_init_s; 1101 best_init_s = -1; 1102 for (t = 0; t <= s; ++t) { 1103 int best_site = -1; 1104 if (check_bounds(x, br, bc, 1 << t)) { 1105 for (i = 0; i < num_candidates[t]; i++) { 1106 const MV this_mv = {br + candidates[t][i].row, 1107 bc + candidates[t][i].col}; 1108 thissad = vfp->sdf(what->buf, what->stride, 1109 get_buf_from_mv(in_what, &this_mv), 1110 in_what->stride); 1111 CHECK_BETTER 1112 } 1113 } else { 1114 for (i = 0; i < num_candidates[t]; i++) { 1115 const MV this_mv = {br + candidates[t][i].row, 1116 bc + candidates[t][i].col}; 1117 if (!is_mv_in(x, &this_mv)) 1118 continue; 1119 thissad = vfp->sdf(what->buf, what->stride, 1120 get_buf_from_mv(in_what, &this_mv), 1121 in_what->stride); 1122 CHECK_BETTER 1123 } 1124 } 1125 if (best_site == -1) { 1126 continue; 1127 } else { 1128 best_init_s = t; 1129 k = best_site; 1130 } 1131 } 1132 if (best_init_s != -1) { 1133 br += candidates[best_init_s][k].row; 1134 bc += candidates[best_init_s][k].col; 1135 } 1136 } 1137 1138 // If the center point is still the best, just skip this and move to 1139 // the refinement step. 1140 if (best_init_s != -1) { 1141 int do_sad = (num_candidates[0] == 4 && cost_list != NULL); 1142 int best_site = -1; 1143 s = best_init_s; 1144 1145 for (; s >= do_sad; s--) { 1146 if (!do_init_search || s != best_init_s) { 1147 if (check_bounds(x, br, bc, 1 << s)) { 1148 for (i = 0; i < num_candidates[s]; i++) { 1149 const MV this_mv = {br + candidates[s][i].row, 1150 bc + candidates[s][i].col}; 1151 thissad = vfp->sdf(what->buf, what->stride, 1152 get_buf_from_mv(in_what, &this_mv), 1153 in_what->stride); 1154 CHECK_BETTER 1155 } 1156 } else { 1157 for (i = 0; i < num_candidates[s]; i++) { 1158 const MV this_mv = {br + candidates[s][i].row, 1159 bc + candidates[s][i].col}; 1160 if (!is_mv_in(x, &this_mv)) 1161 continue; 1162 thissad = vfp->sdf(what->buf, what->stride, 1163 get_buf_from_mv(in_what, &this_mv), 1164 in_what->stride); 1165 CHECK_BETTER 1166 } 1167 } 1168 1169 if (best_site == -1) { 1170 continue; 1171 } else { 1172 br += candidates[s][best_site].row; 1173 bc += candidates[s][best_site].col; 1174 k = best_site; 1175 } 1176 } 1177 1178 do { 1179 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1180 best_site = -1; 1181 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1182 next_chkpts_indices[1] = k; 1183 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1184 1185 if (check_bounds(x, br, bc, 1 << s)) { 1186 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1187 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1188 bc + candidates[s][next_chkpts_indices[i]].col}; 1189 thissad = vfp->sdf(what->buf, what->stride, 1190 get_buf_from_mv(in_what, &this_mv), 1191 in_what->stride); 1192 CHECK_BETTER 1193 } 1194 } else { 1195 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1196 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1197 bc + candidates[s][next_chkpts_indices[i]].col}; 1198 if (!is_mv_in(x, &this_mv)) 1199 continue; 1200 thissad = vfp->sdf(what->buf, what->stride, 1201 get_buf_from_mv(in_what, &this_mv), 1202 in_what->stride); 1203 CHECK_BETTER 1204 } 1205 } 1206 1207 if (best_site != -1) { 1208 k = next_chkpts_indices[best_site]; 1209 br += candidates[s][k].row; 1210 bc += candidates[s][k].col; 1211 } 1212 } while (best_site != -1); 1213 } 1214 1215 // Note: If we enter the if below, then cost_list must be non-NULL. 1216 if (s == 0) { 1217 cost_list[0] = bestsad; 1218 if (!do_init_search || s != best_init_s) { 1219 if (check_bounds(x, br, bc, 1 << s)) { 1220 for (i = 0; i < num_candidates[s]; i++) { 1221 const MV this_mv = {br + candidates[s][i].row, 1222 bc + candidates[s][i].col}; 1223 cost_list[i + 1] = 1224 thissad = vfp->sdf(what->buf, what->stride, 1225 get_buf_from_mv(in_what, &this_mv), 1226 in_what->stride); 1227 CHECK_BETTER 1228 } 1229 } else { 1230 for (i = 0; i < num_candidates[s]; i++) { 1231 const MV this_mv = {br + candidates[s][i].row, 1232 bc + candidates[s][i].col}; 1233 if (!is_mv_in(x, &this_mv)) 1234 continue; 1235 cost_list[i + 1] = 1236 thissad = vfp->sdf(what->buf, what->stride, 1237 get_buf_from_mv(in_what, &this_mv), 1238 in_what->stride); 1239 CHECK_BETTER 1240 } 1241 } 1242 1243 if (best_site != -1) { 1244 br += candidates[s][best_site].row; 1245 bc += candidates[s][best_site].col; 1246 k = best_site; 1247 } 1248 } 1249 while (best_site != -1) { 1250 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1251 best_site = -1; 1252 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1253 next_chkpts_indices[1] = k; 1254 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1255 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1256 cost_list[((k + 2) % 4) + 1] = cost_list[0]; 1257 cost_list[0] = bestsad; 1258 1259 if (check_bounds(x, br, bc, 1 << s)) { 1260 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1261 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1262 bc + candidates[s][next_chkpts_indices[i]].col}; 1263 cost_list[next_chkpts_indices[i] + 1] = 1264 thissad = vfp->sdf(what->buf, what->stride, 1265 get_buf_from_mv(in_what, &this_mv), 1266 in_what->stride); 1267 CHECK_BETTER 1268 } 1269 } else { 1270 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1271 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 1272 bc + candidates[s][next_chkpts_indices[i]].col}; 1273 if (!is_mv_in(x, &this_mv)) { 1274 cost_list[next_chkpts_indices[i] + 1] = INT_MAX; 1275 continue; 1276 } 1277 cost_list[next_chkpts_indices[i] + 1] = 1278 thissad = vfp->sdf(what->buf, what->stride, 1279 get_buf_from_mv(in_what, &this_mv), 1280 in_what->stride); 1281 CHECK_BETTER 1282 } 1283 } 1284 1285 if (best_site != -1) { 1286 k = next_chkpts_indices[best_site]; 1287 br += candidates[s][k].row; 1288 bc += candidates[s][k].col; 1289 } 1290 } 1291 } 1292 } 1293 1294 // Returns the one-away integer pel sad values around the best as follows: 1295 // cost_list[0]: sad at the best integer pel 1296 // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel 1297 // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel 1298 // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel 1299 // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel 1300 if (cost_list) { 1301 static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; 1302 if (cost_list[0] == INT_MAX) { 1303 cost_list[0] = bestsad; 1304 if (check_bounds(x, br, bc, 1)) { 1305 for (i = 0; i < 4; i++) { 1306 const MV this_mv = { br + neighbors[i].row, 1307 bc + neighbors[i].col }; 1308 cost_list[i + 1] = vfp->sdf(what->buf, what->stride, 1309 get_buf_from_mv(in_what, &this_mv), 1310 in_what->stride); 1311 } 1312 } else { 1313 for (i = 0; i < 4; i++) { 1314 const MV this_mv = {br + neighbors[i].row, 1315 bc + neighbors[i].col}; 1316 if (!is_mv_in(x, &this_mv)) 1317 cost_list[i + 1] = INT_MAX; 1318 else 1319 cost_list[i + 1] = vfp->sdf(what->buf, what->stride, 1320 get_buf_from_mv(in_what, &this_mv), 1321 in_what->stride); 1322 } 1323 } 1324 } else { 1325 if (use_mvcost) { 1326 for (i = 0; i < 4; i++) { 1327 const MV this_mv = {br + neighbors[i].row, 1328 bc + neighbors[i].col}; 1329 if (cost_list[i + 1] != INT_MAX) { 1330 cost_list[i + 1] += 1331 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1332 } 1333 } 1334 } 1335 } 1336 } 1337 best_mv->row = br; 1338 best_mv->col = bc; 1339 return bestsad; 1340 } 1341 1342 int vp9_get_mvpred_var(const MACROBLOCK *x, 1343 const MV *best_mv, const MV *center_mv, 1344 const vp9_variance_fn_ptr_t *vfp, 1345 int use_mvcost) { 1346 const MACROBLOCKD *const xd = &x->e_mbd; 1347 const struct buf_2d *const what = &x->plane[0].src; 1348 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1349 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 1350 unsigned int unused; 1351 1352 return vfp->vf(what->buf, what->stride, 1353 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + 1354 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 1355 x->mvcost, x->errorperbit) : 0); 1356 } 1357 1358 int vp9_get_mvpred_av_var(const MACROBLOCK *x, 1359 const MV *best_mv, const MV *center_mv, 1360 const uint8_t *second_pred, 1361 const vp9_variance_fn_ptr_t *vfp, 1362 int use_mvcost) { 1363 const MACROBLOCKD *const xd = &x->e_mbd; 1364 const struct buf_2d *const what = &x->plane[0].src; 1365 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1366 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 1367 unsigned int unused; 1368 1369 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 1370 what->buf, what->stride, &unused, second_pred) + 1371 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 1372 x->mvcost, x->errorperbit) : 0); 1373 } 1374 1375 static int hex_search(const MACROBLOCK *x, 1376 MV *ref_mv, 1377 int search_param, 1378 int sad_per_bit, 1379 int do_init_search, 1380 int *cost_list, 1381 const vp9_variance_fn_ptr_t *vfp, 1382 int use_mvcost, 1383 const MV *center_mv, MV *best_mv) { 1384 // First scale has 8-closest points, the rest have 6 points in hex shape 1385 // at increasing scales 1386 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 1387 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 1388 }; 1389 // Note that the largest candidate step at each scale is 2^scale 1390 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1391 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 1392 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 1393 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 1394 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 1395 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 1396 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 1397 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 1398 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 1399 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 1400 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 1401 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 1402 { -1024, 0}}, 1403 }; 1404 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1405 do_init_search, cost_list, vfp, use_mvcost, 1406 center_mv, best_mv, 1407 hex_num_candidates, hex_candidates); 1408 } 1409 1410 static int bigdia_search(const MACROBLOCK *x, 1411 MV *ref_mv, 1412 int search_param, 1413 int sad_per_bit, 1414 int do_init_search, 1415 int *cost_list, 1416 const vp9_variance_fn_ptr_t *vfp, 1417 int use_mvcost, 1418 const MV *center_mv, 1419 MV *best_mv) { 1420 // First scale has 4-closest points, the rest have 8 points in diamond 1421 // shape at increasing scales 1422 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1423 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1424 }; 1425 // Note that the largest candidate step at each scale is 2^scale 1426 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 1427 [MAX_PATTERN_CANDIDATES] = { 1428 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 1429 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 1430 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 1431 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 1432 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 1433 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 1434 {-16, 16}, {-32, 0}}, 1435 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 1436 {-32, 32}, {-64, 0}}, 1437 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 1438 {-64, 64}, {-128, 0}}, 1439 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 1440 {-128, 128}, {-256, 0}}, 1441 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 1442 {-256, 256}, {-512, 0}}, 1443 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 1444 {-512, 512}, {-1024, 0}}, 1445 }; 1446 return vp9_pattern_search_sad(x, ref_mv, search_param, sad_per_bit, 1447 do_init_search, cost_list, vfp, use_mvcost, 1448 center_mv, best_mv, 1449 bigdia_num_candidates, bigdia_candidates); 1450 } 1451 1452 static int square_search(const MACROBLOCK *x, 1453 MV *ref_mv, 1454 int search_param, 1455 int sad_per_bit, 1456 int do_init_search, 1457 int *cost_list, 1458 const vp9_variance_fn_ptr_t *vfp, 1459 int use_mvcost, 1460 const MV *center_mv, 1461 MV *best_mv) { 1462 // All scales have 8 closest points in square shape 1463 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1464 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1465 }; 1466 // Note that the largest candidate step at each scale is 2^scale 1467 static const MV square_candidates[MAX_PATTERN_SCALES] 1468 [MAX_PATTERN_CANDIDATES] = { 1469 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 1470 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 1471 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 1472 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 1473 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 1474 {-16, 16}, {-16, 0}}, 1475 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 1476 {-32, 32}, {-32, 0}}, 1477 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 1478 {-64, 64}, {-64, 0}}, 1479 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 1480 {-128, 128}, {-128, 0}}, 1481 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 1482 {-256, 256}, {-256, 0}}, 1483 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 1484 {-512, 512}, {-512, 0}}, 1485 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 1486 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 1487 }; 1488 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 1489 do_init_search, cost_list, vfp, use_mvcost, 1490 center_mv, best_mv, 1491 square_num_candidates, square_candidates); 1492 } 1493 1494 static int fast_hex_search(const MACROBLOCK *x, 1495 MV *ref_mv, 1496 int search_param, 1497 int sad_per_bit, 1498 int do_init_search, // must be zero for fast_hex 1499 int *cost_list, 1500 const vp9_variance_fn_ptr_t *vfp, 1501 int use_mvcost, 1502 const MV *center_mv, 1503 MV *best_mv) { 1504 return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1505 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1506 center_mv, best_mv); 1507 } 1508 1509 static int fast_dia_search(const MACROBLOCK *x, 1510 MV *ref_mv, 1511 int search_param, 1512 int sad_per_bit, 1513 int do_init_search, 1514 int *cost_list, 1515 const vp9_variance_fn_ptr_t *vfp, 1516 int use_mvcost, 1517 const MV *center_mv, 1518 MV *best_mv) { 1519 return bigdia_search( 1520 x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit, 1521 do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); 1522 } 1523 1524 #undef CHECK_BETTER 1525 1526 int vp9_full_range_search_c(const MACROBLOCK *x, 1527 const search_site_config *cfg, 1528 MV *ref_mv, MV *best_mv, 1529 int search_param, int sad_per_bit, int *num00, 1530 const vp9_variance_fn_ptr_t *fn_ptr, 1531 const MV *center_mv) { 1532 const MACROBLOCKD *const xd = &x->e_mbd; 1533 const struct buf_2d *const what = &x->plane[0].src; 1534 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1535 const int range = 64; 1536 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1537 unsigned int best_sad = INT_MAX; 1538 int r, c, i; 1539 int start_col, end_col, start_row, end_row; 1540 1541 // The cfg and search_param parameters are not used in this search variant 1542 (void)cfg; 1543 (void)search_param; 1544 1545 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1546 *best_mv = *ref_mv; 1547 *num00 = 11; 1548 best_sad = fn_ptr->sdf(what->buf, what->stride, 1549 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 1550 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1551 start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row); 1552 start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col); 1553 end_row = VPXMIN(range, x->mv_row_max - ref_mv->row); 1554 end_col = VPXMIN(range, x->mv_col_max - ref_mv->col); 1555 1556 for (r = start_row; r <= end_row; ++r) { 1557 for (c = start_col; c <= end_col; c += 4) { 1558 if (c + 3 <= end_col) { 1559 unsigned int sads[4]; 1560 const uint8_t *addrs[4]; 1561 for (i = 0; i < 4; ++i) { 1562 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 1563 addrs[i] = get_buf_from_mv(in_what, &mv); 1564 } 1565 1566 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); 1567 1568 for (i = 0; i < 4; ++i) { 1569 if (sads[i] < best_sad) { 1570 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 1571 const unsigned int sad = sads[i] + 1572 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1573 if (sad < best_sad) { 1574 best_sad = sad; 1575 *best_mv = mv; 1576 } 1577 } 1578 } 1579 } else { 1580 for (i = 0; i < end_col - c; ++i) { 1581 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 1582 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1583 get_buf_from_mv(in_what, &mv), in_what->stride); 1584 if (sad < best_sad) { 1585 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1586 if (sad < best_sad) { 1587 best_sad = sad; 1588 *best_mv = mv; 1589 } 1590 } 1591 } 1592 } 1593 } 1594 } 1595 1596 return best_sad; 1597 } 1598 1599 int vp9_diamond_search_sad_c(const MACROBLOCK *x, 1600 const search_site_config *cfg, 1601 MV *ref_mv, MV *best_mv, int search_param, 1602 int sad_per_bit, int *num00, 1603 const vp9_variance_fn_ptr_t *fn_ptr, 1604 const MV *center_mv) { 1605 int i, j, step; 1606 1607 const MACROBLOCKD *const xd = &x->e_mbd; 1608 uint8_t *what = x->plane[0].src.buf; 1609 const int what_stride = x->plane[0].src.stride; 1610 const uint8_t *in_what; 1611 const int in_what_stride = xd->plane[0].pre[0].stride; 1612 const uint8_t *best_address; 1613 1614 unsigned int bestsad = INT_MAX; 1615 int best_site = 0; 1616 int last_site = 0; 1617 1618 int ref_row; 1619 int ref_col; 1620 1621 // search_param determines the length of the initial step and hence the number 1622 // of iterations. 1623 // 0 = initial step (MAX_FIRST_STEP) pel 1624 // 1 = (MAX_FIRST_STEP/2) pel, 1625 // 2 = (MAX_FIRST_STEP/4) pel... 1626 const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 1627 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; 1628 1629 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1630 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1631 ref_row = ref_mv->row; 1632 ref_col = ref_mv->col; 1633 *num00 = 0; 1634 best_mv->row = ref_row; 1635 best_mv->col = ref_col; 1636 1637 // Work out the start point for the search 1638 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1639 best_address = in_what; 1640 1641 // Check the starting position 1642 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) 1643 + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); 1644 1645 i = 1; 1646 1647 for (step = 0; step < tot_steps; step++) { 1648 int all_in = 1, t; 1649 1650 // All_in is true if every one of the points we are checking are within 1651 // the bounds of the image. 1652 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); 1653 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); 1654 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); 1655 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); 1656 1657 // If all the pixels are within the bounds we don't check whether the 1658 // search point is valid in this loop, otherwise we check each point 1659 // for validity.. 1660 if (all_in) { 1661 unsigned int sad_array[4]; 1662 1663 for (j = 0; j < cfg->searches_per_step; j += 4) { 1664 unsigned char const *block_offset[4]; 1665 1666 for (t = 0; t < 4; t++) 1667 block_offset[t] = ss[i + t].offset + best_address; 1668 1669 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1670 sad_array); 1671 1672 for (t = 0; t < 4; t++, i++) { 1673 if (sad_array[t] < bestsad) { 1674 const MV this_mv = {best_mv->row + ss[i].mv.row, 1675 best_mv->col + ss[i].mv.col}; 1676 sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, 1677 sad_per_bit); 1678 if (sad_array[t] < bestsad) { 1679 bestsad = sad_array[t]; 1680 best_site = i; 1681 } 1682 } 1683 } 1684 } 1685 } else { 1686 for (j = 0; j < cfg->searches_per_step; j++) { 1687 // Trap illegal vectors 1688 const MV this_mv = {best_mv->row + ss[i].mv.row, 1689 best_mv->col + ss[i].mv.col}; 1690 1691 if (is_mv_in(x, &this_mv)) { 1692 const uint8_t *const check_here = ss[i].offset + best_address; 1693 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1694 in_what_stride); 1695 1696 if (thissad < bestsad) { 1697 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1698 if (thissad < bestsad) { 1699 bestsad = thissad; 1700 best_site = i; 1701 } 1702 } 1703 } 1704 i++; 1705 } 1706 } 1707 if (best_site != last_site) { 1708 best_mv->row += ss[best_site].mv.row; 1709 best_mv->col += ss[best_site].mv.col; 1710 best_address += ss[best_site].offset; 1711 last_site = best_site; 1712 #if defined(NEW_DIAMOND_SEARCH) 1713 while (1) { 1714 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 1715 best_mv->col + ss[best_site].mv.col}; 1716 if (is_mv_in(x, &this_mv)) { 1717 const uint8_t *const check_here = ss[best_site].offset + best_address; 1718 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1719 in_what_stride); 1720 if (thissad < bestsad) { 1721 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1722 if (thissad < bestsad) { 1723 bestsad = thissad; 1724 best_mv->row += ss[best_site].mv.row; 1725 best_mv->col += ss[best_site].mv.col; 1726 best_address += ss[best_site].offset; 1727 continue; 1728 } 1729 } 1730 } 1731 break; 1732 } 1733 #endif 1734 } else if (best_address == in_what) { 1735 (*num00)++; 1736 } 1737 } 1738 return bestsad; 1739 } 1740 1741 static int vector_match(int16_t *ref, int16_t *src, int bwl) { 1742 int best_sad = INT_MAX; 1743 int this_sad; 1744 int d; 1745 int center, offset = 0; 1746 int bw = 4 << bwl; // redundant variable, to be changed in the experiments. 1747 for (d = 0; d <= bw; d += 16) { 1748 this_sad = vp9_vector_var(&ref[d], src, bwl); 1749 if (this_sad < best_sad) { 1750 best_sad = this_sad; 1751 offset = d; 1752 } 1753 } 1754 center = offset; 1755 1756 for (d = -8; d <= 8; d += 16) { 1757 int this_pos = offset + d; 1758 // check limit 1759 if (this_pos < 0 || this_pos > bw) 1760 continue; 1761 this_sad = vp9_vector_var(&ref[this_pos], src, bwl); 1762 if (this_sad < best_sad) { 1763 best_sad = this_sad; 1764 center = this_pos; 1765 } 1766 } 1767 offset = center; 1768 1769 for (d = -4; d <= 4; d += 8) { 1770 int this_pos = offset + d; 1771 // check limit 1772 if (this_pos < 0 || this_pos > bw) 1773 continue; 1774 this_sad = vp9_vector_var(&ref[this_pos], src, bwl); 1775 if (this_sad < best_sad) { 1776 best_sad = this_sad; 1777 center = this_pos; 1778 } 1779 } 1780 offset = center; 1781 1782 for (d = -2; d <= 2; d += 4) { 1783 int this_pos = offset + d; 1784 // check limit 1785 if (this_pos < 0 || this_pos > bw) 1786 continue; 1787 this_sad = vp9_vector_var(&ref[this_pos], src, bwl); 1788 if (this_sad < best_sad) { 1789 best_sad = this_sad; 1790 center = this_pos; 1791 } 1792 } 1793 offset = center; 1794 1795 for (d = -1; d <= 1; d += 2) { 1796 int this_pos = offset + d; 1797 // check limit 1798 if (this_pos < 0 || this_pos > bw) 1799 continue; 1800 this_sad = vp9_vector_var(&ref[this_pos], src, bwl); 1801 if (this_sad < best_sad) { 1802 best_sad = this_sad; 1803 center = this_pos; 1804 } 1805 } 1806 1807 return (center - (bw >> 1)); 1808 } 1809 1810 static const MV search_pos[4] = { 1811 {-1, 0}, {0, -1}, {0, 1}, {1, 0}, 1812 }; 1813 1814 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, 1815 BLOCK_SIZE bsize, 1816 int mi_row, int mi_col) { 1817 MACROBLOCKD *xd = &x->e_mbd; 1818 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; 1819 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; 1820 DECLARE_ALIGNED(16, int16_t, hbuf[128]); 1821 DECLARE_ALIGNED(16, int16_t, vbuf[128]); 1822 DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); 1823 DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); 1824 int idx; 1825 const int bw = 4 << b_width_log2_lookup[bsize]; 1826 const int bh = 4 << b_height_log2_lookup[bsize]; 1827 const int search_width = bw << 1; 1828 const int search_height = bh << 1; 1829 const int src_stride = x->plane[0].src.stride; 1830 const int ref_stride = xd->plane[0].pre[0].stride; 1831 uint8_t const *ref_buf, *src_buf; 1832 MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv; 1833 unsigned int best_sad, tmp_sad, this_sad[4]; 1834 MV this_mv; 1835 const int norm_factor = 3 + (bw >> 5); 1836 const YV12_BUFFER_CONFIG *scaled_ref_frame = 1837 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); 1838 1839 if (scaled_ref_frame) { 1840 int i; 1841 // Swap out the reference frame for a version that's been scaled to 1842 // match the resolution of the current frame, allowing the existing 1843 // motion search code to be used without additional modifications. 1844 for (i = 0; i < MAX_MB_PLANE; i++) 1845 backup_yv12[i] = xd->plane[i].pre[0]; 1846 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 1847 } 1848 1849 #if CONFIG_VP9_HIGHBITDEPTH 1850 { 1851 unsigned int this_sad; 1852 tmp_mv->row = 0; 1853 tmp_mv->col = 0; 1854 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 1855 xd->plane[0].pre[0].buf, ref_stride); 1856 1857 if (scaled_ref_frame) { 1858 int i; 1859 for (i = 0; i < MAX_MB_PLANE; i++) 1860 xd->plane[i].pre[0] = backup_yv12[i]; 1861 } 1862 return this_sad; 1863 } 1864 #endif 1865 1866 // Set up prediction 1-D reference set 1867 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); 1868 for (idx = 0; idx < search_width; idx += 16) { 1869 vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); 1870 ref_buf += 16; 1871 } 1872 1873 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; 1874 for (idx = 0; idx < search_height; ++idx) { 1875 vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor; 1876 ref_buf += ref_stride; 1877 } 1878 1879 // Set up src 1-D reference set 1880 for (idx = 0; idx < bw; idx += 16) { 1881 src_buf = x->plane[0].src.buf + idx; 1882 vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); 1883 } 1884 1885 src_buf = x->plane[0].src.buf; 1886 for (idx = 0; idx < bh; ++idx) { 1887 src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor; 1888 src_buf += src_stride; 1889 } 1890 1891 // Find the best match per 1-D search 1892 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); 1893 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); 1894 1895 this_mv = *tmp_mv; 1896 src_buf = x->plane[0].src.buf; 1897 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1898 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 1899 1900 { 1901 const uint8_t * const pos[4] = { 1902 ref_buf - ref_stride, 1903 ref_buf - 1, 1904 ref_buf + 1, 1905 ref_buf + ref_stride, 1906 }; 1907 1908 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); 1909 } 1910 1911 for (idx = 0; idx < 4; ++idx) { 1912 if (this_sad[idx] < best_sad) { 1913 best_sad = this_sad[idx]; 1914 tmp_mv->row = search_pos[idx].row + this_mv.row; 1915 tmp_mv->col = search_pos[idx].col + this_mv.col; 1916 } 1917 } 1918 1919 if (this_sad[0] < this_sad[3]) 1920 this_mv.row -= 1; 1921 else 1922 this_mv.row += 1; 1923 1924 if (this_sad[1] < this_sad[2]) 1925 this_mv.col -= 1; 1926 else 1927 this_mv.col += 1; 1928 1929 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1930 1931 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, 1932 ref_buf, ref_stride); 1933 if (best_sad > tmp_sad) { 1934 *tmp_mv = this_mv; 1935 best_sad = tmp_sad; 1936 } 1937 1938 tmp_mv->row *= 8; 1939 tmp_mv->col *= 8; 1940 1941 if (scaled_ref_frame) { 1942 int i; 1943 for (i = 0; i < MAX_MB_PLANE; i++) 1944 xd->plane[i].pre[0] = backup_yv12[i]; 1945 } 1946 1947 return best_sad; 1948 } 1949 1950 // Runs sequence of diamond searches in smaller steps for RD. 1951 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1952 point as the best match, we will do a final 1-away diamond 1953 refining search */ 1954 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, 1955 MV *mvp_full, int step_param, 1956 int sadpb, int further_steps, int do_refine, 1957 int *cost_list, 1958 const vp9_variance_fn_ptr_t *fn_ptr, 1959 const MV *ref_mv, MV *dst_mv) { 1960 MV temp_mv; 1961 int thissme, n, num00 = 0; 1962 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1963 step_param, sadpb, &n, 1964 fn_ptr, ref_mv); 1965 if (bestsme < INT_MAX) 1966 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1967 *dst_mv = temp_mv; 1968 1969 // If there won't be more n-step search, check to see if refining search is 1970 // needed. 1971 if (n > further_steps) 1972 do_refine = 0; 1973 1974 while (n < further_steps) { 1975 ++n; 1976 1977 if (num00) { 1978 num00--; 1979 } else { 1980 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1981 step_param + n, sadpb, &num00, 1982 fn_ptr, ref_mv); 1983 if (thissme < INT_MAX) 1984 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1985 1986 // check to see if refining search is needed. 1987 if (num00 > further_steps - n) 1988 do_refine = 0; 1989 1990 if (thissme < bestsme) { 1991 bestsme = thissme; 1992 *dst_mv = temp_mv; 1993 } 1994 } 1995 } 1996 1997 // final 1-away diamond refining search 1998 if (do_refine) { 1999 const int search_range = 8; 2000 MV best_mv = *dst_mv; 2001 thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, 2002 fn_ptr, ref_mv); 2003 if (thissme < INT_MAX) 2004 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 2005 if (thissme < bestsme) { 2006 bestsme = thissme; 2007 *dst_mv = best_mv; 2008 } 2009 } 2010 2011 // Return cost list. 2012 if (cost_list) { 2013 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 2014 } 2015 return bestsme; 2016 } 2017 2018 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, 2019 int sad_per_bit, int distance, 2020 const vp9_variance_fn_ptr_t *fn_ptr, 2021 const MV *center_mv, MV *best_mv) { 2022 int r, c; 2023 const MACROBLOCKD *const xd = &x->e_mbd; 2024 const struct buf_2d *const what = &x->plane[0].src; 2025 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2026 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); 2027 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); 2028 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); 2029 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); 2030 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 2031 int best_sad = fn_ptr->sdf(what->buf, what->stride, 2032 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 2033 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2034 *best_mv = *ref_mv; 2035 2036 for (r = row_min; r < row_max; ++r) { 2037 for (c = col_min; c < col_max; ++c) { 2038 const MV mv = {r, c}; 2039 const int sad = fn_ptr->sdf(what->buf, what->stride, 2040 get_buf_from_mv(in_what, &mv), in_what->stride) + 2041 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2042 if (sad < best_sad) { 2043 best_sad = sad; 2044 *best_mv = mv; 2045 } 2046 } 2047 } 2048 return best_sad; 2049 } 2050 2051 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, 2052 int sad_per_bit, int distance, 2053 const vp9_variance_fn_ptr_t *fn_ptr, 2054 const MV *center_mv, MV *best_mv) { 2055 int r; 2056 const MACROBLOCKD *const xd = &x->e_mbd; 2057 const struct buf_2d *const what = &x->plane[0].src; 2058 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2059 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); 2060 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); 2061 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); 2062 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); 2063 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 2064 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 2065 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 2066 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2067 *best_mv = *ref_mv; 2068 2069 for (r = row_min; r < row_max; ++r) { 2070 int c = col_min; 2071 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 2072 2073 if (fn_ptr->sdx3f != NULL) { 2074 while ((c + 2) < col_max) { 2075 int i; 2076 DECLARE_ALIGNED(16, uint32_t, sads[3]); 2077 2078 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 2079 sads); 2080 2081 for (i = 0; i < 3; ++i) { 2082 unsigned int sad = sads[i]; 2083 if (sad < best_sad) { 2084 const MV mv = {r, c}; 2085 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2086 if (sad < best_sad) { 2087 best_sad = sad; 2088 *best_mv = mv; 2089 } 2090 } 2091 ++check_here; 2092 ++c; 2093 } 2094 } 2095 } 2096 2097 while (c < col_max) { 2098 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 2099 check_here, in_what->stride); 2100 if (sad < best_sad) { 2101 const MV mv = {r, c}; 2102 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2103 if (sad < best_sad) { 2104 best_sad = sad; 2105 *best_mv = mv; 2106 } 2107 } 2108 ++check_here; 2109 ++c; 2110 } 2111 } 2112 2113 return best_sad; 2114 } 2115 2116 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, 2117 int sad_per_bit, int distance, 2118 const vp9_variance_fn_ptr_t *fn_ptr, 2119 const MV *center_mv, MV *best_mv) { 2120 int r; 2121 const MACROBLOCKD *const xd = &x->e_mbd; 2122 const struct buf_2d *const what = &x->plane[0].src; 2123 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2124 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min); 2125 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max); 2126 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min); 2127 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max); 2128 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 2129 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 2130 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 2131 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2132 *best_mv = *ref_mv; 2133 2134 for (r = row_min; r < row_max; ++r) { 2135 int c = col_min; 2136 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 2137 2138 if (fn_ptr->sdx8f != NULL) { 2139 while ((c + 7) < col_max) { 2140 int i; 2141 DECLARE_ALIGNED(16, uint32_t, sads[8]); 2142 2143 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, 2144 sads); 2145 2146 for (i = 0; i < 8; ++i) { 2147 unsigned int sad = sads[i]; 2148 if (sad < best_sad) { 2149 const MV mv = {r, c}; 2150 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2151 if (sad < best_sad) { 2152 best_sad = sad; 2153 *best_mv = mv; 2154 } 2155 } 2156 ++check_here; 2157 ++c; 2158 } 2159 } 2160 } 2161 2162 if (fn_ptr->sdx3f != NULL) { 2163 while ((c + 2) < col_max) { 2164 int i; 2165 DECLARE_ALIGNED(16, uint32_t, sads[3]); 2166 2167 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 2168 sads); 2169 2170 for (i = 0; i < 3; ++i) { 2171 unsigned int sad = sads[i]; 2172 if (sad < best_sad) { 2173 const MV mv = {r, c}; 2174 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2175 if (sad < best_sad) { 2176 best_sad = sad; 2177 *best_mv = mv; 2178 } 2179 } 2180 ++check_here; 2181 ++c; 2182 } 2183 } 2184 } 2185 2186 while (c < col_max) { 2187 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 2188 check_here, in_what->stride); 2189 if (sad < best_sad) { 2190 const MV mv = {r, c}; 2191 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2192 if (sad < best_sad) { 2193 best_sad = sad; 2194 *best_mv = mv; 2195 } 2196 } 2197 ++check_here; 2198 ++c; 2199 } 2200 } 2201 2202 return best_sad; 2203 } 2204 2205 int vp9_refining_search_sad(const MACROBLOCK *x, 2206 MV *ref_mv, int error_per_bit, 2207 int search_range, 2208 const vp9_variance_fn_ptr_t *fn_ptr, 2209 const MV *center_mv) { 2210 const MACROBLOCKD *const xd = &x->e_mbd; 2211 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 2212 const struct buf_2d *const what = &x->plane[0].src; 2213 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2214 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 2215 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 2216 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, 2217 in_what->stride) + 2218 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2219 int i, j; 2220 2221 for (i = 0; i < search_range; i++) { 2222 int best_site = -1; 2223 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & 2224 ((ref_mv->row + 1) < x->mv_row_max) & 2225 ((ref_mv->col - 1) > x->mv_col_min) & 2226 ((ref_mv->col + 1) < x->mv_col_max); 2227 2228 if (all_in) { 2229 unsigned int sads[4]; 2230 const uint8_t *const positions[4] = { 2231 best_address - in_what->stride, 2232 best_address - 1, 2233 best_address + 1, 2234 best_address + in_what->stride 2235 }; 2236 2237 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 2238 2239 for (j = 0; j < 4; ++j) { 2240 if (sads[j] < best_sad) { 2241 const MV mv = {ref_mv->row + neighbors[j].row, 2242 ref_mv->col + neighbors[j].col}; 2243 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2244 if (sads[j] < best_sad) { 2245 best_sad = sads[j]; 2246 best_site = j; 2247 } 2248 } 2249 } 2250 } else { 2251 for (j = 0; j < 4; ++j) { 2252 const MV mv = {ref_mv->row + neighbors[j].row, 2253 ref_mv->col + neighbors[j].col}; 2254 2255 if (is_mv_in(x, &mv)) { 2256 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 2257 get_buf_from_mv(in_what, &mv), 2258 in_what->stride); 2259 if (sad < best_sad) { 2260 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2261 if (sad < best_sad) { 2262 best_sad = sad; 2263 best_site = j; 2264 } 2265 } 2266 } 2267 } 2268 } 2269 2270 if (best_site == -1) { 2271 break; 2272 } else { 2273 ref_mv->row += neighbors[best_site].row; 2274 ref_mv->col += neighbors[best_site].col; 2275 best_address = get_buf_from_mv(in_what, ref_mv); 2276 } 2277 } 2278 2279 return best_sad; 2280 } 2281 2282 // This function is called when we do joint motion search in comp_inter_inter 2283 // mode. 2284 int vp9_refining_search_8p_c(const MACROBLOCK *x, 2285 MV *ref_mv, int error_per_bit, 2286 int search_range, 2287 const vp9_variance_fn_ptr_t *fn_ptr, 2288 const MV *center_mv, 2289 const uint8_t *second_pred) { 2290 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 2291 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 2292 const MACROBLOCKD *const xd = &x->e_mbd; 2293 const struct buf_2d *const what = &x->plane[0].src; 2294 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2295 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 2296 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, 2297 get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + 2298 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2299 int i, j; 2300 2301 for (i = 0; i < search_range; ++i) { 2302 int best_site = -1; 2303 2304 for (j = 0; j < 8; ++j) { 2305 const MV mv = {ref_mv->row + neighbors[j].row, 2306 ref_mv->col + neighbors[j].col}; 2307 2308 if (is_mv_in(x, &mv)) { 2309 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, 2310 get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); 2311 if (sad < best_sad) { 2312 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2313 if (sad < best_sad) { 2314 best_sad = sad; 2315 best_site = j; 2316 } 2317 } 2318 } 2319 } 2320 2321 if (best_site == -1) { 2322 break; 2323 } else { 2324 ref_mv->row += neighbors[best_site].row; 2325 ref_mv->col += neighbors[best_site].col; 2326 } 2327 } 2328 return best_sad; 2329 } 2330 2331 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, 2332 BLOCK_SIZE bsize, MV *mvp_full, 2333 int step_param, int error_per_bit, 2334 int *cost_list, 2335 const MV *ref_mv, MV *tmp_mv, 2336 int var_max, int rd) { 2337 const SPEED_FEATURES *const sf = &cpi->sf; 2338 const SEARCH_METHODS method = sf->mv.search_method; 2339 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; 2340 int var = 0; 2341 if (cost_list) { 2342 cost_list[0] = INT_MAX; 2343 cost_list[1] = INT_MAX; 2344 cost_list[2] = INT_MAX; 2345 cost_list[3] = INT_MAX; 2346 cost_list[4] = INT_MAX; 2347 } 2348 2349 switch (method) { 2350 case FAST_DIAMOND: 2351 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, 2352 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2353 break; 2354 case FAST_HEX: 2355 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, 2356 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2357 break; 2358 case HEX: 2359 var = hex_search(x, mvp_full, step_param, error_per_bit, 1, 2360 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2361 break; 2362 case SQUARE: 2363 var = square_search(x, mvp_full, step_param, error_per_bit, 1, 2364 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2365 break; 2366 case BIGDIA: 2367 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, 2368 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2369 break; 2370 case NSTEP: 2371 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, 2372 MAX_MVSEARCH_STEPS - 1 - step_param, 2373 1, cost_list, fn_ptr, ref_mv, tmp_mv); 2374 break; 2375 default: 2376 assert(0 && "Invalid search method."); 2377 } 2378 2379 if (method != NSTEP && rd && var < var_max) 2380 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); 2381 2382 return var; 2383 } 2384