1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <limits.h> 13 #include <math.h> 14 #include <stdio.h> 15 16 #include "./vpx_config.h" 17 #include "./vpx_dsp_rtcd.h" 18 19 #include "vpx_dsp/vpx_dsp_common.h" 20 #include "vpx_mem/vpx_mem.h" 21 #include "vpx_ports/mem.h" 22 23 #include "vp9/common/vp9_common.h" 24 #include "vp9/common/vp9_mvref_common.h" 25 #include "vp9/common/vp9_reconinter.h" 26 27 #include "vp9/encoder/vp9_encoder.h" 28 #include "vp9/encoder/vp9_mcomp.h" 29 30 // #define NEW_DIAMOND_SEARCH 31 32 static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 33 const MV *mv) { 34 return &buf->buf[mv->row * buf->stride + mv->col]; 35 } 36 37 void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { 38 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 39 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 40 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 41 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 42 43 col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); 44 row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); 45 col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); 46 row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); 47 48 // Get intersection of UMV window and valid MV window to reduce # of checks 49 // in diamond search. 50 if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; 51 if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; 52 if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; 53 if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; 54 } 55 56 void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, 57 const MvLimits *umv_window_limits, 58 const MV *ref_mv) { 59 subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, 60 ref_mv->col - MAX_FULL_PEL_VAL * 8); 61 subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, 62 ref_mv->col + MAX_FULL_PEL_VAL * 8); 63 subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, 64 ref_mv->row - MAX_FULL_PEL_VAL * 8); 65 subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, 66 ref_mv->row + MAX_FULL_PEL_VAL * 8); 67 68 subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); 69 subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); 70 subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); 71 subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); 72 } 73 74 int vp9_init_search_range(int size) { 75 int sr = 0; 76 // Minimum search size no matter what the passed in value. 77 size = VPXMAX(16, size); 78 79 while ((size << sr) < MAX_FULL_PEL_VAL) sr++; 80 81 sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); 82 return sr; 83 } 84 85 static INLINE int mv_cost(const MV *mv, const int *joint_cost, 86 int *const comp_cost[2]) { 87 assert(mv->row >= -MV_MAX && mv->row < MV_MAX); 88 assert(mv->col >= -MV_MAX && mv->col < MV_MAX); 89 return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + 90 comp_cost[1][mv->col]; 91 } 92 93 int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, 94 int *mvcost[2], int weight) { 95 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 96 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 97 } 98 99 #define PIXEL_TRANSFORM_ERROR_SCALE 4 100 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, 101 int *mvcost[2], int error_per_bit) { 102 if (mvcost) { 103 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 104 return (int)ROUND64_POWER_OF_TWO( 105 (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, 106 RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + 107 PIXEL_TRANSFORM_ERROR_SCALE); 108 } 109 return 0; 110 } 111 112 static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, 113 int sad_per_bit) { 114 const MV diff = { mv->row - ref->row, mv->col - ref->col }; 115 return ROUND_POWER_OF_TWO( 116 (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, 117 VP9_PROB_COST_SHIFT); 118 } 119 120 void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { 121 int len; 122 int ss_count = 0; 123 124 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 125 // Generate offsets for 4 search sites per step. 126 const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; 127 int i; 128 for (i = 0; i < 4; ++i, ++ss_count) { 129 cfg->ss_mv[ss_count] = ss_mvs[i]; 130 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 131 } 132 } 133 134 cfg->searches_per_step = 4; 135 cfg->total_steps = ss_count / cfg->searches_per_step; 136 } 137 138 void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { 139 int len; 140 int ss_count = 0; 141 142 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 143 // Generate offsets for 8 search sites per step. 144 const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, 145 { 0, len }, { -len, -len }, { -len, len }, 146 { len, -len }, { len, len } }; 147 int i; 148 for (i = 0; i < 8; ++i, ++ss_count) { 149 cfg->ss_mv[ss_count] = ss_mvs[i]; 150 cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; 151 } 152 } 153 154 cfg->searches_per_step = 8; 155 cfg->total_steps = ss_count / cfg->searches_per_step; 156 } 157 158 // convert motion vector component to offset for sv[a]f calc 159 static INLINE int sp(int x) { return x & 7; } 160 161 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { 162 return &buf[(r >> 3) * stride + (c >> 3)]; 163 } 164 165 #if CONFIG_VP9_HIGHBITDEPTH 166 /* checks if (r, c) has better score than previous best */ 167 #define CHECK_BETTER(v, r, c) \ 168 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 169 int64_t tmpmse; \ 170 const MV mv = { r, c }; \ 171 const MV ref_mv = { rr, rc }; \ 172 if (second_pred == NULL) { \ 173 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 174 src_stride, &sse); \ 175 } else { \ 176 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 177 src_stride, &sse, second_pred); \ 178 } \ 179 tmpmse = thismse; \ 180 tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ 181 if (tmpmse >= INT_MAX) { \ 182 v = INT_MAX; \ 183 } else if ((v = (uint32_t)tmpmse) < besterr) { \ 184 besterr = v; \ 185 br = r; \ 186 bc = c; \ 187 *distortion = thismse; \ 188 *sse1 = sse; \ 189 } \ 190 } else { \ 191 v = INT_MAX; \ 192 } 193 #else 194 /* checks if (r, c) has better score than previous best */ 195 #define CHECK_BETTER(v, r, c) \ 196 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 197 const MV mv = { r, c }; \ 198 const MV ref_mv = { rr, rc }; \ 199 if (second_pred == NULL) \ 200 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 201 src_stride, &sse); \ 202 else \ 203 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 204 src_stride, &sse, second_pred); \ 205 if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ 206 thismse) < besterr) { \ 207 besterr = v; \ 208 br = r; \ 209 bc = c; \ 210 *distortion = thismse; \ 211 *sse1 = sse; \ 212 } \ 213 } else { \ 214 v = INT_MAX; \ 215 } 216 217 #endif 218 #define FIRST_LEVEL_CHECKS \ 219 { \ 220 unsigned int left, right, up, down, diag; \ 221 CHECK_BETTER(left, tr, tc - hstep); \ 222 CHECK_BETTER(right, tr, tc + hstep); \ 223 CHECK_BETTER(up, tr - hstep, tc); \ 224 CHECK_BETTER(down, tr + hstep, tc); \ 225 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ 226 switch (whichdir) { \ 227 case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ 228 case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ 229 case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ 230 case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ 231 } \ 232 } 233 234 #define SECOND_LEVEL_CHECKS \ 235 { \ 236 int kr, kc; \ 237 unsigned int second; \ 238 if (tr != br && tc != bc) { \ 239 kr = br - tr; \ 240 kc = bc - tc; \ 241 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 242 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 243 } else if (tr == br && tc != bc) { \ 244 kc = bc - tc; \ 245 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 246 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 247 switch (whichdir) { \ 248 case 0: \ 249 case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ 250 case 2: \ 251 case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ 252 } \ 253 } else if (tr != br && tc == bc) { \ 254 kr = br - tr; \ 255 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 256 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 257 switch (whichdir) { \ 258 case 0: \ 259 case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ 260 case 1: \ 261 case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ 262 } \ 263 } \ 264 } 265 266 // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of 267 // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten 268 // later in the same way. 269 #define SECOND_LEVEL_CHECKS_BEST \ 270 { \ 271 unsigned int second; \ 272 int br0 = br; \ 273 int bc0 = bc; \ 274 assert(tr == br || tc == bc); \ 275 if (tr == br && tc != bc) { \ 276 kc = bc - tc; \ 277 } else if (tr != br && tc == bc) { \ 278 kr = br - tr; \ 279 } \ 280 CHECK_BETTER(second, br0 + kr, bc0); \ 281 CHECK_BETTER(second, br0, bc0 + kc); \ 282 if (br0 != br || bc0 != bc) { \ 283 CHECK_BETTER(second, br0 + kr, bc0 + kc); \ 284 } \ 285 } 286 287 #define SETUP_SUBPEL_SEARCH \ 288 const uint8_t *const z = x->plane[0].src.buf; \ 289 const int src_stride = x->plane[0].src.stride; \ 290 const MACROBLOCKD *xd = &x->e_mbd; \ 291 unsigned int besterr = UINT_MAX; \ 292 unsigned int sse; \ 293 unsigned int whichdir; \ 294 int thismse; \ 295 const unsigned int halfiters = iters_per_step; \ 296 const unsigned int quarteriters = iters_per_step; \ 297 const unsigned int eighthiters = iters_per_step; \ 298 const int y_stride = xd->plane[0].pre[0].stride; \ 299 const int offset = bestmv->row * y_stride + bestmv->col; \ 300 const uint8_t *const y = xd->plane[0].pre[0].buf; \ 301 \ 302 int rr = ref_mv->row; \ 303 int rc = ref_mv->col; \ 304 int br = bestmv->row * 8; \ 305 int bc = bestmv->col * 8; \ 306 int hstep = 4; \ 307 int minc, maxc, minr, maxr; \ 308 int tr = br; \ 309 int tc = bc; \ 310 MvLimits subpel_mv_limits; \ 311 \ 312 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ 313 minc = subpel_mv_limits.col_min; \ 314 maxc = subpel_mv_limits.col_max; \ 315 minr = subpel_mv_limits.row_min; \ 316 maxr = subpel_mv_limits.row_max; \ 317 \ 318 bestmv->row *= 8; \ 319 bestmv->col *= 8; 320 321 static unsigned int setup_center_error( 322 const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, 323 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, 324 const uint8_t *const src, const int src_stride, const uint8_t *const y, 325 int y_stride, const uint8_t *second_pred, int w, int h, int offset, 326 int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) { 327 #if CONFIG_VP9_HIGHBITDEPTH 328 uint64_t besterr; 329 if (second_pred != NULL) { 330 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 331 DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); 332 vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, 333 y_stride); 334 besterr = 335 vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); 336 } else { 337 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 338 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 339 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 340 } 341 } else { 342 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 343 } 344 *distortion = (uint32_t)besterr; 345 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 346 if (besterr >= UINT_MAX) return UINT_MAX; 347 return (uint32_t)besterr; 348 #else 349 uint32_t besterr; 350 (void)xd; 351 if (second_pred != NULL) { 352 DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); 353 vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 354 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 355 } else { 356 besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); 357 } 358 *distortion = besterr; 359 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 360 return besterr; 361 #endif // CONFIG_VP9_HIGHBITDEPTH 362 } 363 364 static INLINE int divide_and_round(const int n, const int d) { 365 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); 366 } 367 368 static INLINE int is_cost_list_wellbehaved(int *cost_list) { 369 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && 370 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; 371 } 372 373 // Returns surface minima estimate at given precision in 1/2^n bits. 374 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C 375 // For a given set of costs S0, S1, S2, S3, S4 at points 376 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, 377 // the solution for the location of the minima (x0, y0) is given by: 378 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), 379 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). 380 // The code below is an integerized version of that. 381 static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { 382 *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), 383 (cost_list[1] - 2 * cost_list[0] + cost_list[3])); 384 *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), 385 (cost_list[4] - 2 * cost_list[0] + cost_list[2])); 386 } 387 388 uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, 389 const MV *ref_mv, int allow_hp, 390 int error_per_bit, 391 const vp9_variance_fn_ptr_t *vfp, 392 int forced_stop, int iters_per_step, 393 int *cost_list, int *mvjcost, int *mvcost[2], 394 uint32_t *distortion, uint32_t *sse1, 395 const uint8_t *second_pred, int w, int h) { 396 SETUP_SUBPEL_SEARCH; 397 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 398 src_stride, y, y_stride, second_pred, w, h, 399 offset, mvjcost, mvcost, sse1, distortion); 400 (void)halfiters; 401 (void)quarteriters; 402 (void)eighthiters; 403 (void)whichdir; 404 (void)allow_hp; 405 (void)forced_stop; 406 (void)hstep; 407 (void)rr; 408 (void)rc; 409 (void)minr; 410 (void)minc; 411 (void)maxr; 412 (void)maxc; 413 (void)tr; 414 (void)tc; 415 (void)sse; 416 (void)thismse; 417 (void)cost_list; 418 419 return besterr; 420 } 421 422 uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( 423 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 424 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 425 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 426 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 427 int h) { 428 SETUP_SUBPEL_SEARCH; 429 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 430 src_stride, y, y_stride, second_pred, w, h, 431 offset, mvjcost, mvcost, sse1, distortion); 432 (void)halfiters; 433 (void)quarteriters; 434 (void)eighthiters; 435 (void)whichdir; 436 (void)allow_hp; 437 (void)forced_stop; 438 (void)hstep; 439 440 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 441 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 442 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 443 int ir, ic; 444 unsigned int minpt; 445 get_cost_surf_min(cost_list, &ir, &ic, 2); 446 if (ir != 0 || ic != 0) { 447 CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); 448 } 449 } else { 450 FIRST_LEVEL_CHECKS; 451 if (halfiters > 1) { 452 SECOND_LEVEL_CHECKS; 453 } 454 455 tr = br; 456 tc = bc; 457 458 // Each subsequent iteration checks at least one point in common with 459 // the last iteration could be 2 ( if diag selected) 1/4 pel 460 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 461 if (forced_stop != 2) { 462 hstep >>= 1; 463 FIRST_LEVEL_CHECKS; 464 if (quarteriters > 1) { 465 SECOND_LEVEL_CHECKS; 466 } 467 } 468 } 469 470 tr = br; 471 tc = bc; 472 473 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 474 hstep >>= 1; 475 FIRST_LEVEL_CHECKS; 476 if (eighthiters > 1) { 477 SECOND_LEVEL_CHECKS; 478 } 479 } 480 481 bestmv->row = br; 482 bestmv->col = bc; 483 484 return besterr; 485 } 486 487 uint32_t vp9_find_best_sub_pixel_tree_pruned_more( 488 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 489 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 490 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 491 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 492 int h) { 493 SETUP_SUBPEL_SEARCH; 494 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 495 src_stride, y, y_stride, second_pred, w, h, 496 offset, mvjcost, mvcost, sse1, distortion); 497 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 498 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 499 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 500 unsigned int minpt; 501 int ir, ic; 502 get_cost_surf_min(cost_list, &ir, &ic, 1); 503 if (ir != 0 || ic != 0) { 504 CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); 505 } 506 } else { 507 FIRST_LEVEL_CHECKS; 508 if (halfiters > 1) { 509 SECOND_LEVEL_CHECKS; 510 } 511 } 512 513 // Each subsequent iteration checks at least one point in common with 514 // the last iteration could be 2 ( if diag selected) 1/4 pel 515 516 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 517 if (forced_stop != 2) { 518 tr = br; 519 tc = bc; 520 hstep >>= 1; 521 FIRST_LEVEL_CHECKS; 522 if (quarteriters > 1) { 523 SECOND_LEVEL_CHECKS; 524 } 525 } 526 527 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 528 tr = br; 529 tc = bc; 530 hstep >>= 1; 531 FIRST_LEVEL_CHECKS; 532 if (eighthiters > 1) { 533 SECOND_LEVEL_CHECKS; 534 } 535 } 536 // These lines insure static analysis doesn't warn that 537 // tr and tc aren't used after the above point. 538 (void)tr; 539 (void)tc; 540 541 bestmv->row = br; 542 bestmv->col = bc; 543 544 return besterr; 545 } 546 547 uint32_t vp9_find_best_sub_pixel_tree_pruned( 548 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 549 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 550 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 551 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 552 int h) { 553 SETUP_SUBPEL_SEARCH; 554 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 555 src_stride, y, y_stride, second_pred, w, h, 556 offset, mvjcost, mvcost, sse1, distortion); 557 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 558 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 559 cost_list[4] != INT_MAX) { 560 unsigned int left, right, up, down, diag; 561 whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + 562 (cost_list[2] < cost_list[4] ? 0 : 2); 563 switch (whichdir) { 564 case 0: 565 CHECK_BETTER(left, tr, tc - hstep); 566 CHECK_BETTER(down, tr + hstep, tc); 567 CHECK_BETTER(diag, tr + hstep, tc - hstep); 568 break; 569 case 1: 570 CHECK_BETTER(right, tr, tc + hstep); 571 CHECK_BETTER(down, tr + hstep, tc); 572 CHECK_BETTER(diag, tr + hstep, tc + hstep); 573 break; 574 case 2: 575 CHECK_BETTER(left, tr, tc - hstep); 576 CHECK_BETTER(up, tr - hstep, tc); 577 CHECK_BETTER(diag, tr - hstep, tc - hstep); 578 break; 579 case 3: 580 CHECK_BETTER(right, tr, tc + hstep); 581 CHECK_BETTER(up, tr - hstep, tc); 582 CHECK_BETTER(diag, tr - hstep, tc + hstep); 583 break; 584 } 585 } else { 586 FIRST_LEVEL_CHECKS; 587 if (halfiters > 1) { 588 SECOND_LEVEL_CHECKS; 589 } 590 } 591 592 tr = br; 593 tc = bc; 594 595 // Each subsequent iteration checks at least one point in common with 596 // the last iteration could be 2 ( if diag selected) 1/4 pel 597 598 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 599 if (forced_stop != 2) { 600 hstep >>= 1; 601 FIRST_LEVEL_CHECKS; 602 if (quarteriters > 1) { 603 SECOND_LEVEL_CHECKS; 604 } 605 tr = br; 606 tc = bc; 607 } 608 609 if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { 610 hstep >>= 1; 611 FIRST_LEVEL_CHECKS; 612 if (eighthiters > 1) { 613 SECOND_LEVEL_CHECKS; 614 } 615 tr = br; 616 tc = bc; 617 } 618 // These lines insure static analysis doesn't warn that 619 // tr and tc aren't used after the above point. 620 (void)tr; 621 (void)tc; 622 623 bestmv->row = br; 624 bestmv->col = bc; 625 626 return besterr; 627 } 628 629 /* clang-format off */ 630 static const MV search_step_table[12] = { 631 // left, right, up, down 632 { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, 633 { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, 634 { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } 635 }; 636 /* clang-format on */ 637 638 uint32_t vp9_find_best_sub_pixel_tree( 639 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 640 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 641 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 642 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 643 int h) { 644 const uint8_t *const z = x->plane[0].src.buf; 645 const uint8_t *const src_address = z; 646 const int src_stride = x->plane[0].src.stride; 647 const MACROBLOCKD *xd = &x->e_mbd; 648 unsigned int besterr = UINT_MAX; 649 unsigned int sse; 650 int thismse; 651 const int y_stride = xd->plane[0].pre[0].stride; 652 const int offset = bestmv->row * y_stride + bestmv->col; 653 const uint8_t *const y = xd->plane[0].pre[0].buf; 654 655 int rr = ref_mv->row; 656 int rc = ref_mv->col; 657 int br = bestmv->row * 8; 658 int bc = bestmv->col * 8; 659 int hstep = 4; 660 int iter, round = 3 - forced_stop; 661 662 int minc, maxc, minr, maxr; 663 int tr = br; 664 int tc = bc; 665 const MV *search_step = search_step_table; 666 int idx, best_idx = -1; 667 unsigned int cost_array[5]; 668 int kr, kc; 669 MvLimits subpel_mv_limits; 670 671 vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); 672 minc = subpel_mv_limits.col_min; 673 maxc = subpel_mv_limits.col_max; 674 minr = subpel_mv_limits.row_min; 675 maxr = subpel_mv_limits.row_max; 676 677 if (!(allow_hp && use_mv_hp(ref_mv))) 678 if (round == 3) round = 2; 679 680 bestmv->row *= 8; 681 bestmv->col *= 8; 682 683 besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, 684 src_stride, y, y_stride, second_pred, w, h, 685 offset, mvjcost, mvcost, sse1, distortion); 686 687 (void)cost_list; // to silence compiler warning 688 689 for (iter = 0; iter < round; ++iter) { 690 // Check vertical and horizontal sub-pixel positions. 691 for (idx = 0; idx < 4; ++idx) { 692 tr = br + search_step[idx].row; 693 tc = bc + search_step[idx].col; 694 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 695 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 696 MV this_mv; 697 this_mv.row = tr; 698 this_mv.col = tc; 699 if (second_pred == NULL) 700 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, 701 src_stride, &sse); 702 else 703 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), 704 src_address, src_stride, &sse, second_pred); 705 cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, 706 mvcost, error_per_bit); 707 708 if (cost_array[idx] < besterr) { 709 best_idx = idx; 710 besterr = cost_array[idx]; 711 *distortion = thismse; 712 *sse1 = sse; 713 } 714 } else { 715 cost_array[idx] = UINT_MAX; 716 } 717 } 718 719 // Check diagonal sub-pixel position 720 kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); 721 kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); 722 723 tc = bc + kc; 724 tr = br + kr; 725 if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { 726 const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); 727 MV this_mv = { tr, tc }; 728 if (second_pred == NULL) 729 thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, 730 src_stride, &sse); 731 else 732 thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, 733 src_stride, &sse, second_pred); 734 cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, 735 error_per_bit); 736 737 if (cost_array[4] < besterr) { 738 best_idx = 4; 739 besterr = cost_array[4]; 740 *distortion = thismse; 741 *sse1 = sse; 742 } 743 } else { 744 cost_array[idx] = UINT_MAX; 745 } 746 747 if (best_idx < 4 && best_idx >= 0) { 748 br += search_step[best_idx].row; 749 bc += search_step[best_idx].col; 750 } else if (best_idx == 4) { 751 br = tr; 752 bc = tc; 753 } 754 755 if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST; 756 757 tr = br; 758 tc = bc; 759 760 search_step += 4; 761 hstep >>= 1; 762 best_idx = -1; 763 } 764 765 // Each subsequent iteration checks at least one point in common with 766 // the last iteration could be 2 ( if diag selected) 1/4 pel 767 768 // These lines insure static analysis doesn't warn that 769 // tr and tc aren't used after the above point. 770 (void)tr; 771 (void)tc; 772 773 bestmv->row = br; 774 bestmv->col = bc; 775 776 return besterr; 777 } 778 779 #undef CHECK_BETTER 780 781 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, 782 int range) { 783 return ((row - range) >= mv_limits->row_min) & 784 ((row + range) <= mv_limits->row_max) & 785 ((col - range) >= mv_limits->col_min) & 786 ((col + range) <= mv_limits->col_max); 787 } 788 789 static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { 790 return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && 791 (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); 792 } 793 794 #define CHECK_BETTER \ 795 { \ 796 if (thissad < bestsad) { \ 797 if (use_mvcost) \ 798 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ 799 if (thissad < bestsad) { \ 800 bestsad = thissad; \ 801 best_site = i; \ 802 } \ 803 } \ 804 } 805 806 #define MAX_PATTERN_SCALES 11 807 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 808 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 809 810 // Calculate and return a sad+mvcost list around an integer best pel. 811 static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, 812 int sadpb, 813 const vp9_variance_fn_ptr_t *fn_ptr, 814 const MV *best_mv, int *cost_list) { 815 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 816 const struct buf_2d *const what = &x->plane[0].src; 817 const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; 818 const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 819 int br = best_mv->row; 820 int bc = best_mv->col; 821 MV this_mv; 822 int i; 823 unsigned int sse; 824 825 this_mv.row = br; 826 this_mv.col = bc; 827 cost_list[0] = 828 fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), 829 in_what->stride, &sse) + 830 mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); 831 if (check_bounds(&x->mv_limits, br, bc, 1)) { 832 for (i = 0; i < 4; i++) { 833 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 834 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 835 get_buf_from_mv(in_what, &this_mv), 836 in_what->stride, &sse) + 837 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 838 x->mvcost, x->errorperbit); 839 } 840 } else { 841 for (i = 0; i < 4; i++) { 842 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 843 if (!is_mv_in(&x->mv_limits, &this_mv)) 844 cost_list[i + 1] = INT_MAX; 845 else 846 cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, 847 get_buf_from_mv(in_what, &this_mv), 848 in_what->stride, &sse) + 849 mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, 850 x->mvcost, x->errorperbit); 851 } 852 } 853 } 854 855 // Generic pattern search function that searches over multiple scales. 856 // Each scale can have a different number of candidates and shape of 857 // candidates as indicated in the num_candidates and candidates arrays 858 // passed into this function 859 // 860 static int vp9_pattern_search( 861 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 862 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 863 int use_mvcost, const MV *center_mv, MV *best_mv, 864 const int num_candidates[MAX_PATTERN_SCALES], 865 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 866 const MACROBLOCKD *const xd = &x->e_mbd; 867 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 868 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 869 }; 870 int i, s, t; 871 const struct buf_2d *const what = &x->plane[0].src; 872 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 873 int br, bc; 874 int bestsad = INT_MAX; 875 int thissad; 876 int k = -1; 877 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 878 int best_init_s = search_param_to_steps[search_param]; 879 // adjust ref_mv to make sure it is within MV range 880 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 881 x->mv_limits.row_min, x->mv_limits.row_max); 882 br = ref_mv->row; 883 bc = ref_mv->col; 884 885 // Work out the start point for the search 886 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 887 in_what->stride) + 888 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 889 890 // Search all possible scales upto the search param around the center point 891 // pick the scale of the point that is best as the starting scale of 892 // further steps around it. 893 if (do_init_search) { 894 s = best_init_s; 895 best_init_s = -1; 896 for (t = 0; t <= s; ++t) { 897 int best_site = -1; 898 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 899 for (i = 0; i < num_candidates[t]; i++) { 900 const MV this_mv = { br + candidates[t][i].row, 901 bc + candidates[t][i].col }; 902 thissad = 903 vfp->sdf(what->buf, what->stride, 904 get_buf_from_mv(in_what, &this_mv), in_what->stride); 905 CHECK_BETTER 906 } 907 } else { 908 for (i = 0; i < num_candidates[t]; i++) { 909 const MV this_mv = { br + candidates[t][i].row, 910 bc + candidates[t][i].col }; 911 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 912 thissad = 913 vfp->sdf(what->buf, what->stride, 914 get_buf_from_mv(in_what, &this_mv), in_what->stride); 915 CHECK_BETTER 916 } 917 } 918 if (best_site == -1) { 919 continue; 920 } else { 921 best_init_s = t; 922 k = best_site; 923 } 924 } 925 if (best_init_s != -1) { 926 br += candidates[best_init_s][k].row; 927 bc += candidates[best_init_s][k].col; 928 } 929 } 930 931 // If the center point is still the best, just skip this and move to 932 // the refinement step. 933 if (best_init_s != -1) { 934 int best_site = -1; 935 s = best_init_s; 936 937 do { 938 // No need to search all 6 points the 1st time if initial search was used 939 if (!do_init_search || s != best_init_s) { 940 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 941 for (i = 0; i < num_candidates[s]; i++) { 942 const MV this_mv = { br + candidates[s][i].row, 943 bc + candidates[s][i].col }; 944 thissad = 945 vfp->sdf(what->buf, what->stride, 946 get_buf_from_mv(in_what, &this_mv), in_what->stride); 947 CHECK_BETTER 948 } 949 } else { 950 for (i = 0; i < num_candidates[s]; i++) { 951 const MV this_mv = { br + candidates[s][i].row, 952 bc + candidates[s][i].col }; 953 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 954 thissad = 955 vfp->sdf(what->buf, what->stride, 956 get_buf_from_mv(in_what, &this_mv), in_what->stride); 957 CHECK_BETTER 958 } 959 } 960 961 if (best_site == -1) { 962 continue; 963 } else { 964 br += candidates[s][best_site].row; 965 bc += candidates[s][best_site].col; 966 k = best_site; 967 } 968 } 969 970 do { 971 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 972 best_site = -1; 973 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 974 next_chkpts_indices[1] = k; 975 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 976 977 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 978 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 979 const MV this_mv = { 980 br + candidates[s][next_chkpts_indices[i]].row, 981 bc + candidates[s][next_chkpts_indices[i]].col 982 }; 983 thissad = 984 vfp->sdf(what->buf, what->stride, 985 get_buf_from_mv(in_what, &this_mv), in_what->stride); 986 CHECK_BETTER 987 } 988 } else { 989 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 990 const MV this_mv = { 991 br + candidates[s][next_chkpts_indices[i]].row, 992 bc + candidates[s][next_chkpts_indices[i]].col 993 }; 994 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 995 thissad = 996 vfp->sdf(what->buf, what->stride, 997 get_buf_from_mv(in_what, &this_mv), in_what->stride); 998 CHECK_BETTER 999 } 1000 } 1001 1002 if (best_site != -1) { 1003 k = next_chkpts_indices[best_site]; 1004 br += candidates[s][k].row; 1005 bc += candidates[s][k].col; 1006 } 1007 } while (best_site != -1); 1008 } while (s--); 1009 } 1010 1011 // Returns the one-away integer pel sad values around the best as follows: 1012 // cost_list[0]: cost at the best integer pel 1013 // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel 1014 // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel 1015 // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel 1016 // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel 1017 if (cost_list) { 1018 const MV best_mv = { br, bc }; 1019 calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); 1020 } 1021 best_mv->row = br; 1022 best_mv->col = bc; 1023 return bestsad; 1024 } 1025 1026 // A specialized function where the smallest scale search candidates 1027 // are 4 1-away neighbors, and cost_list is non-null 1028 // TODO(debargha): Merge this function with the one above. Also remove 1029 // use_mvcost option since it is always 1, to save unnecessary branches. 1030 static int vp9_pattern_search_sad( 1031 const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, 1032 int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1033 int use_mvcost, const MV *center_mv, MV *best_mv, 1034 const int num_candidates[MAX_PATTERN_SCALES], 1035 const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { 1036 const MACROBLOCKD *const xd = &x->e_mbd; 1037 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 1038 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1039 }; 1040 int i, s, t; 1041 const struct buf_2d *const what = &x->plane[0].src; 1042 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1043 int br, bc; 1044 int bestsad = INT_MAX; 1045 int thissad; 1046 int k = -1; 1047 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1048 int best_init_s = search_param_to_steps[search_param]; 1049 // adjust ref_mv to make sure it is within MV range 1050 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1051 x->mv_limits.row_min, x->mv_limits.row_max); 1052 br = ref_mv->row; 1053 bc = ref_mv->col; 1054 if (cost_list != NULL) { 1055 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = 1056 INT_MAX; 1057 } 1058 1059 // Work out the start point for the search 1060 bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 1061 in_what->stride) + 1062 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1063 1064 // Search all possible scales upto the search param around the center point 1065 // pick the scale of the point that is best as the starting scale of 1066 // further steps around it. 1067 if (do_init_search) { 1068 s = best_init_s; 1069 best_init_s = -1; 1070 for (t = 0; t <= s; ++t) { 1071 int best_site = -1; 1072 if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { 1073 for (i = 0; i < num_candidates[t]; i++) { 1074 const MV this_mv = { br + candidates[t][i].row, 1075 bc + candidates[t][i].col }; 1076 thissad = 1077 vfp->sdf(what->buf, what->stride, 1078 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1079 CHECK_BETTER 1080 } 1081 } else { 1082 for (i = 0; i < num_candidates[t]; i++) { 1083 const MV this_mv = { br + candidates[t][i].row, 1084 bc + candidates[t][i].col }; 1085 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1086 thissad = 1087 vfp->sdf(what->buf, what->stride, 1088 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1089 CHECK_BETTER 1090 } 1091 } 1092 if (best_site == -1) { 1093 continue; 1094 } else { 1095 best_init_s = t; 1096 k = best_site; 1097 } 1098 } 1099 if (best_init_s != -1) { 1100 br += candidates[best_init_s][k].row; 1101 bc += candidates[best_init_s][k].col; 1102 } 1103 } 1104 1105 // If the center point is still the best, just skip this and move to 1106 // the refinement step. 1107 if (best_init_s != -1) { 1108 int do_sad = (num_candidates[0] == 4 && cost_list != NULL); 1109 int best_site = -1; 1110 s = best_init_s; 1111 1112 for (; s >= do_sad; s--) { 1113 if (!do_init_search || s != best_init_s) { 1114 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1115 for (i = 0; i < num_candidates[s]; i++) { 1116 const MV this_mv = { br + candidates[s][i].row, 1117 bc + candidates[s][i].col }; 1118 thissad = 1119 vfp->sdf(what->buf, what->stride, 1120 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1121 CHECK_BETTER 1122 } 1123 } else { 1124 for (i = 0; i < num_candidates[s]; i++) { 1125 const MV this_mv = { br + candidates[s][i].row, 1126 bc + candidates[s][i].col }; 1127 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1128 thissad = 1129 vfp->sdf(what->buf, what->stride, 1130 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1131 CHECK_BETTER 1132 } 1133 } 1134 1135 if (best_site == -1) { 1136 continue; 1137 } else { 1138 br += candidates[s][best_site].row; 1139 bc += candidates[s][best_site].col; 1140 k = best_site; 1141 } 1142 } 1143 1144 do { 1145 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1146 best_site = -1; 1147 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1148 next_chkpts_indices[1] = k; 1149 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1150 1151 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1152 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1153 const MV this_mv = { 1154 br + candidates[s][next_chkpts_indices[i]].row, 1155 bc + candidates[s][next_chkpts_indices[i]].col 1156 }; 1157 thissad = 1158 vfp->sdf(what->buf, what->stride, 1159 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1160 CHECK_BETTER 1161 } 1162 } else { 1163 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1164 const MV this_mv = { 1165 br + candidates[s][next_chkpts_indices[i]].row, 1166 bc + candidates[s][next_chkpts_indices[i]].col 1167 }; 1168 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1169 thissad = 1170 vfp->sdf(what->buf, what->stride, 1171 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1172 CHECK_BETTER 1173 } 1174 } 1175 1176 if (best_site != -1) { 1177 k = next_chkpts_indices[best_site]; 1178 br += candidates[s][k].row; 1179 bc += candidates[s][k].col; 1180 } 1181 } while (best_site != -1); 1182 } 1183 1184 // Note: If we enter the if below, then cost_list must be non-NULL. 1185 if (s == 0) { 1186 cost_list[0] = bestsad; 1187 if (!do_init_search || s != best_init_s) { 1188 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1189 for (i = 0; i < num_candidates[s]; i++) { 1190 const MV this_mv = { br + candidates[s][i].row, 1191 bc + candidates[s][i].col }; 1192 cost_list[i + 1] = thissad = 1193 vfp->sdf(what->buf, what->stride, 1194 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1195 CHECK_BETTER 1196 } 1197 } else { 1198 for (i = 0; i < num_candidates[s]; i++) { 1199 const MV this_mv = { br + candidates[s][i].row, 1200 bc + candidates[s][i].col }; 1201 if (!is_mv_in(&x->mv_limits, &this_mv)) continue; 1202 cost_list[i + 1] = thissad = 1203 vfp->sdf(what->buf, what->stride, 1204 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1205 CHECK_BETTER 1206 } 1207 } 1208 1209 if (best_site != -1) { 1210 br += candidates[s][best_site].row; 1211 bc += candidates[s][best_site].col; 1212 k = best_site; 1213 } 1214 } 1215 while (best_site != -1) { 1216 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1217 best_site = -1; 1218 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1219 next_chkpts_indices[1] = k; 1220 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1221 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1222 cost_list[((k + 2) % 4) + 1] = cost_list[0]; 1223 cost_list[0] = bestsad; 1224 1225 if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { 1226 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1227 const MV this_mv = { 1228 br + candidates[s][next_chkpts_indices[i]].row, 1229 bc + candidates[s][next_chkpts_indices[i]].col 1230 }; 1231 cost_list[next_chkpts_indices[i] + 1] = thissad = 1232 vfp->sdf(what->buf, what->stride, 1233 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1234 CHECK_BETTER 1235 } 1236 } else { 1237 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 1238 const MV this_mv = { 1239 br + candidates[s][next_chkpts_indices[i]].row, 1240 bc + candidates[s][next_chkpts_indices[i]].col 1241 }; 1242 if (!is_mv_in(&x->mv_limits, &this_mv)) { 1243 cost_list[next_chkpts_indices[i] + 1] = INT_MAX; 1244 continue; 1245 } 1246 cost_list[next_chkpts_indices[i] + 1] = thissad = 1247 vfp->sdf(what->buf, what->stride, 1248 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1249 CHECK_BETTER 1250 } 1251 } 1252 1253 if (best_site != -1) { 1254 k = next_chkpts_indices[best_site]; 1255 br += candidates[s][k].row; 1256 bc += candidates[s][k].col; 1257 } 1258 } 1259 } 1260 } 1261 1262 // Returns the one-away integer pel sad values around the best as follows: 1263 // cost_list[0]: sad at the best integer pel 1264 // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel 1265 // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel 1266 // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel 1267 // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel 1268 if (cost_list) { 1269 static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; 1270 if (cost_list[0] == INT_MAX) { 1271 cost_list[0] = bestsad; 1272 if (check_bounds(&x->mv_limits, br, bc, 1)) { 1273 for (i = 0; i < 4; i++) { 1274 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1275 cost_list[i + 1] = 1276 vfp->sdf(what->buf, what->stride, 1277 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1278 } 1279 } else { 1280 for (i = 0; i < 4; i++) { 1281 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1282 if (!is_mv_in(&x->mv_limits, &this_mv)) 1283 cost_list[i + 1] = INT_MAX; 1284 else 1285 cost_list[i + 1] = 1286 vfp->sdf(what->buf, what->stride, 1287 get_buf_from_mv(in_what, &this_mv), in_what->stride); 1288 } 1289 } 1290 } else { 1291 if (use_mvcost) { 1292 for (i = 0; i < 4; i++) { 1293 const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; 1294 if (cost_list[i + 1] != INT_MAX) { 1295 cost_list[i + 1] += 1296 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1297 } 1298 } 1299 } 1300 } 1301 } 1302 best_mv->row = br; 1303 best_mv->col = bc; 1304 return bestsad; 1305 } 1306 1307 int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, 1308 const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, 1309 int use_mvcost) { 1310 const MACROBLOCKD *const xd = &x->e_mbd; 1311 const struct buf_2d *const what = &x->plane[0].src; 1312 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1313 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1314 uint32_t unused; 1315 #if CONFIG_VP9_HIGHBITDEPTH 1316 uint64_t err = 1317 vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1318 in_what->stride, &unused); 1319 err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1320 x->errorperbit) 1321 : 0); 1322 if (err >= INT_MAX) return INT_MAX; 1323 return (int)err; 1324 #else 1325 return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), 1326 in_what->stride, &unused) + 1327 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1328 x->errorperbit) 1329 : 0); 1330 #endif 1331 } 1332 1333 int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, 1334 const MV *center_mv, const uint8_t *second_pred, 1335 const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { 1336 const MACROBLOCKD *const xd = &x->e_mbd; 1337 const struct buf_2d *const what = &x->plane[0].src; 1338 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1339 const MV mv = { best_mv->row * 8, best_mv->col * 8 }; 1340 unsigned int unused; 1341 1342 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 1343 what->buf, what->stride, &unused, second_pred) + 1344 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, 1345 x->errorperbit) 1346 : 0); 1347 } 1348 1349 static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1350 int sad_per_bit, int do_init_search, int *cost_list, 1351 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1352 const MV *center_mv, MV *best_mv) { 1353 // First scale has 8-closest points, the rest have 6 points in hex shape 1354 // at increasing scales 1355 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, 1356 6, 6, 6, 6, 6 }; 1357 // Note that the largest candidate step at each scale is 2^scale 1358 /* clang-format off */ 1359 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1360 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, 1361 { -1, 0 } }, 1362 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, 1363 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, 1364 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, 1365 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, 1366 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, 1367 { -32, 0 } }, 1368 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, 1369 { -64, 0 } }, 1370 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, 1371 { -128, 0 } }, 1372 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, 1373 { -256, 0 } }, 1374 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, 1375 { -512, 0 } }, 1376 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, 1377 { -512, 1024 }, { -1024, 0 } } 1378 }; 1379 /* clang-format on */ 1380 return vp9_pattern_search( 1381 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1382 use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); 1383 } 1384 1385 static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1386 int sad_per_bit, int do_init_search, int *cost_list, 1387 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1388 const MV *center_mv, MV *best_mv) { 1389 // First scale has 4-closest points, the rest have 8 points in diamond 1390 // shape at increasing scales 1391 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 1392 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1393 }; 1394 // Note that the largest candidate step at each scale is 2^scale 1395 /* clang-format off */ 1396 static const MV 1397 bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1398 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, 1399 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, 1400 { -1, 1 }, { -2, 0 } }, 1401 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, 1402 { -2, 2 }, { -4, 0 } }, 1403 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, 1404 { -4, 4 }, { -8, 0 } }, 1405 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, 1406 { -8, 8 }, { -16, 0 } }, 1407 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, 1408 { 0, 32 }, { -16, 16 }, { -32, 0 } }, 1409 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, 1410 { 0, 64 }, { -32, 32 }, { -64, 0 } }, 1411 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, 1412 { 0, 128 }, { -64, 64 }, { -128, 0 } }, 1413 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, 1414 { 0, 256 }, { -128, 128 }, { -256, 0 } }, 1415 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, 1416 { 0, 512 }, { -256, 256 }, { -512, 0 } }, 1417 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, 1418 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } } 1419 }; 1420 /* clang-format on */ 1421 return vp9_pattern_search_sad( 1422 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1423 use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); 1424 } 1425 1426 static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1427 int sad_per_bit, int do_init_search, int *cost_list, 1428 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1429 const MV *center_mv, MV *best_mv) { 1430 // All scales have 8 closest points in square shape 1431 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 1432 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1433 }; 1434 // Note that the largest candidate step at each scale is 2^scale 1435 /* clang-format off */ 1436 static const MV 1437 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 1438 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, 1439 { -1, 1 }, { -1, 0 } }, 1440 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, 1441 { -2, 2 }, { -2, 0 } }, 1442 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, 1443 { -4, 4 }, { -4, 0 } }, 1444 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, 1445 { -8, 8 }, { -8, 0 } }, 1446 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, 1447 { 0, 16 }, { -16, 16 }, { -16, 0 } }, 1448 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, 1449 { 0, 32 }, { -32, 32 }, { -32, 0 } }, 1450 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, 1451 { 0, 64 }, { -64, 64 }, { -64, 0 } }, 1452 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, 1453 { 0, 128 }, { -128, 128 }, { -128, 0 } }, 1454 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, 1455 { 0, 256 }, { -256, 256 }, { -256, 0 } }, 1456 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, 1457 { 0, 512 }, { -512, 512 }, { -512, 0 } }, 1458 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, 1459 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } } 1460 }; 1461 /* clang-format on */ 1462 return vp9_pattern_search( 1463 x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, 1464 use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); 1465 } 1466 1467 static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1468 int sad_per_bit, 1469 int do_init_search, // must be zero for fast_hex 1470 int *cost_list, const vp9_variance_fn_ptr_t *vfp, 1471 int use_mvcost, const MV *center_mv, MV *best_mv) { 1472 return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1473 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1474 center_mv, best_mv); 1475 } 1476 1477 static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, 1478 int sad_per_bit, int do_init_search, int *cost_list, 1479 const vp9_variance_fn_ptr_t *vfp, int use_mvcost, 1480 const MV *center_mv, MV *best_mv) { 1481 return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), 1482 sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, 1483 center_mv, best_mv); 1484 } 1485 1486 #undef CHECK_BETTER 1487 1488 // Exhuastive motion search around a given centre position with a given 1489 // step size. 1490 static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, 1491 int range, int step, int sad_per_bit, 1492 const vp9_variance_fn_ptr_t *fn_ptr, 1493 const MV *center_mv) { 1494 const MACROBLOCKD *const xd = &x->e_mbd; 1495 const struct buf_2d *const what = &x->plane[0].src; 1496 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1497 MV fcenter_mv = { center_mv->row, center_mv->col }; 1498 unsigned int best_sad = INT_MAX; 1499 int r, c, i; 1500 int start_col, end_col, start_row, end_row; 1501 int col_step = (step > 1) ? step : 4; 1502 1503 assert(step >= 1); 1504 1505 clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1506 x->mv_limits.row_min, x->mv_limits.row_max); 1507 *best_mv = fcenter_mv; 1508 best_sad = 1509 fn_ptr->sdf(what->buf, what->stride, 1510 get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + 1511 mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); 1512 start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); 1513 start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); 1514 end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); 1515 end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); 1516 1517 for (r = start_row; r <= end_row; r += step) { 1518 for (c = start_col; c <= end_col; c += col_step) { 1519 // Step > 1 means we are not checking every location in this pass. 1520 if (step > 1) { 1521 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; 1522 unsigned int sad = 1523 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 1524 in_what->stride); 1525 if (sad < best_sad) { 1526 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1527 if (sad < best_sad) { 1528 best_sad = sad; 1529 *best_mv = mv; 1530 } 1531 } 1532 } else { 1533 // 4 sads in a single call if we are checking every location 1534 if (c + 3 <= end_col) { 1535 unsigned int sads[4]; 1536 const uint8_t *addrs[4]; 1537 for (i = 0; i < 4; ++i) { 1538 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1539 addrs[i] = get_buf_from_mv(in_what, &mv); 1540 } 1541 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); 1542 1543 for (i = 0; i < 4; ++i) { 1544 if (sads[i] < best_sad) { 1545 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1546 const unsigned int sad = 1547 sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1548 if (sad < best_sad) { 1549 best_sad = sad; 1550 *best_mv = mv; 1551 } 1552 } 1553 } 1554 } else { 1555 for (i = 0; i < end_col - c; ++i) { 1556 const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; 1557 unsigned int sad = 1558 fn_ptr->sdf(what->buf, what->stride, 1559 get_buf_from_mv(in_what, &mv), in_what->stride); 1560 if (sad < best_sad) { 1561 sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); 1562 if (sad < best_sad) { 1563 best_sad = sad; 1564 *best_mv = mv; 1565 } 1566 } 1567 } 1568 } 1569 } 1570 } 1571 } 1572 1573 return best_sad; 1574 } 1575 1576 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, 1577 MV *ref_mv, MV *best_mv, int search_param, 1578 int sad_per_bit, int *num00, 1579 const vp9_variance_fn_ptr_t *fn_ptr, 1580 const MV *center_mv) { 1581 int i, j, step; 1582 1583 const MACROBLOCKD *const xd = &x->e_mbd; 1584 uint8_t *what = x->plane[0].src.buf; 1585 const int what_stride = x->plane[0].src.stride; 1586 const uint8_t *in_what; 1587 const int in_what_stride = xd->plane[0].pre[0].stride; 1588 const uint8_t *best_address; 1589 1590 unsigned int bestsad = INT_MAX; 1591 int best_site = -1; 1592 int last_site = -1; 1593 1594 int ref_row; 1595 int ref_col; 1596 1597 // search_param determines the length of the initial step and hence the number 1598 // of iterations. 1599 // 0 = initial step (MAX_FIRST_STEP) pel 1600 // 1 = (MAX_FIRST_STEP/2) pel, 1601 // 2 = (MAX_FIRST_STEP/4) pel... 1602 // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 1603 const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; 1604 const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; 1605 const int tot_steps = cfg->total_steps - search_param; 1606 1607 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 1608 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 1609 x->mv_limits.row_min, x->mv_limits.row_max); 1610 ref_row = ref_mv->row; 1611 ref_col = ref_mv->col; 1612 *num00 = 0; 1613 best_mv->row = ref_row; 1614 best_mv->col = ref_col; 1615 1616 // Work out the start point for the search 1617 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1618 best_address = in_what; 1619 1620 // Check the starting position 1621 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + 1622 mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); 1623 1624 i = 0; 1625 1626 for (step = 0; step < tot_steps; step++) { 1627 int all_in = 1, t; 1628 1629 // All_in is true if every one of the points we are checking are within 1630 // the bounds of the image. 1631 all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min); 1632 all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); 1633 all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); 1634 all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); 1635 1636 // If all the pixels are within the bounds we don't check whether the 1637 // search point is valid in this loop, otherwise we check each point 1638 // for validity.. 1639 if (all_in) { 1640 unsigned int sad_array[4]; 1641 1642 for (j = 0; j < cfg->searches_per_step; j += 4) { 1643 unsigned char const *block_offset[4]; 1644 1645 for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; 1646 1647 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1648 sad_array); 1649 1650 for (t = 0; t < 4; t++, i++) { 1651 if (sad_array[t] < bestsad) { 1652 const MV this_mv = { best_mv->row + ss_mv[i].row, 1653 best_mv->col + ss_mv[i].col }; 1654 sad_array[t] += 1655 mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1656 if (sad_array[t] < bestsad) { 1657 bestsad = sad_array[t]; 1658 best_site = i; 1659 } 1660 } 1661 } 1662 } 1663 } else { 1664 for (j = 0; j < cfg->searches_per_step; j++) { 1665 // Trap illegal vectors 1666 const MV this_mv = { best_mv->row + ss_mv[i].row, 1667 best_mv->col + ss_mv[i].col }; 1668 1669 if (is_mv_in(&x->mv_limits, &this_mv)) { 1670 const uint8_t *const check_here = ss_os[i] + best_address; 1671 unsigned int thissad = 1672 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1673 1674 if (thissad < bestsad) { 1675 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1676 if (thissad < bestsad) { 1677 bestsad = thissad; 1678 best_site = i; 1679 } 1680 } 1681 } 1682 i++; 1683 } 1684 } 1685 if (best_site != last_site) { 1686 best_mv->row += ss_mv[best_site].row; 1687 best_mv->col += ss_mv[best_site].col; 1688 best_address += ss_os[best_site]; 1689 last_site = best_site; 1690 #if defined(NEW_DIAMOND_SEARCH) 1691 while (1) { 1692 const MV this_mv = { best_mv->row + ss_mv[best_site].row, 1693 best_mv->col + ss_mv[best_site].col }; 1694 if (is_mv_in(&x->mv_limits, &this_mv)) { 1695 const uint8_t *const check_here = ss_os[best_site] + best_address; 1696 unsigned int thissad = 1697 fn_ptr->sdf(what, what_stride, check_here, in_what_stride); 1698 if (thissad < bestsad) { 1699 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 1700 if (thissad < bestsad) { 1701 bestsad = thissad; 1702 best_mv->row += ss_mv[best_site].row; 1703 best_mv->col += ss_mv[best_site].col; 1704 best_address += ss_os[best_site]; 1705 continue; 1706 } 1707 } 1708 } 1709 break; 1710 } 1711 #endif 1712 } else if (best_address == in_what) { 1713 (*num00)++; 1714 } 1715 } 1716 return bestsad; 1717 } 1718 1719 static int vector_match(int16_t *ref, int16_t *src, int bwl) { 1720 int best_sad = INT_MAX; 1721 int this_sad; 1722 int d; 1723 int center, offset = 0; 1724 int bw = 4 << bwl; // redundant variable, to be changed in the experiments. 1725 for (d = 0; d <= bw; d += 16) { 1726 this_sad = vpx_vector_var(&ref[d], src, bwl); 1727 if (this_sad < best_sad) { 1728 best_sad = this_sad; 1729 offset = d; 1730 } 1731 } 1732 center = offset; 1733 1734 for (d = -8; d <= 8; d += 16) { 1735 int this_pos = offset + d; 1736 // check limit 1737 if (this_pos < 0 || this_pos > bw) continue; 1738 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1739 if (this_sad < best_sad) { 1740 best_sad = this_sad; 1741 center = this_pos; 1742 } 1743 } 1744 offset = center; 1745 1746 for (d = -4; d <= 4; d += 8) { 1747 int this_pos = offset + d; 1748 // check limit 1749 if (this_pos < 0 || this_pos > bw) continue; 1750 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1751 if (this_sad < best_sad) { 1752 best_sad = this_sad; 1753 center = this_pos; 1754 } 1755 } 1756 offset = center; 1757 1758 for (d = -2; d <= 2; d += 4) { 1759 int this_pos = offset + d; 1760 // check limit 1761 if (this_pos < 0 || this_pos > bw) continue; 1762 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1763 if (this_sad < best_sad) { 1764 best_sad = this_sad; 1765 center = this_pos; 1766 } 1767 } 1768 offset = center; 1769 1770 for (d = -1; d <= 1; d += 2) { 1771 int this_pos = offset + d; 1772 // check limit 1773 if (this_pos < 0 || this_pos > bw) continue; 1774 this_sad = vpx_vector_var(&ref[this_pos], src, bwl); 1775 if (this_sad < best_sad) { 1776 best_sad = this_sad; 1777 center = this_pos; 1778 } 1779 } 1780 1781 return (center - (bw >> 1)); 1782 } 1783 1784 static const MV search_pos[4] = { 1785 { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, 1786 }; 1787 1788 unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, 1789 BLOCK_SIZE bsize, int mi_row, 1790 int mi_col) { 1791 MACROBLOCKD *xd = &x->e_mbd; 1792 MODE_INFO *mi = xd->mi[0]; 1793 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; 1794 DECLARE_ALIGNED(16, int16_t, hbuf[128]); 1795 DECLARE_ALIGNED(16, int16_t, vbuf[128]); 1796 DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); 1797 DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); 1798 int idx; 1799 const int bw = 4 << b_width_log2_lookup[bsize]; 1800 const int bh = 4 << b_height_log2_lookup[bsize]; 1801 const int search_width = bw << 1; 1802 const int search_height = bh << 1; 1803 const int src_stride = x->plane[0].src.stride; 1804 const int ref_stride = xd->plane[0].pre[0].stride; 1805 uint8_t const *ref_buf, *src_buf; 1806 MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; 1807 unsigned int best_sad, tmp_sad, this_sad[4]; 1808 MV this_mv; 1809 const int norm_factor = 3 + (bw >> 5); 1810 const YV12_BUFFER_CONFIG *scaled_ref_frame = 1811 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); 1812 1813 if (scaled_ref_frame) { 1814 int i; 1815 // Swap out the reference frame for a version that's been scaled to 1816 // match the resolution of the current frame, allowing the existing 1817 // motion search code to be used without additional modifications. 1818 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; 1819 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); 1820 } 1821 1822 #if CONFIG_VP9_HIGHBITDEPTH 1823 // TODO(jingning): Implement integral projection functions for high bit-depth 1824 // setting and remove this part of code. 1825 if (xd->bd != 8) { 1826 unsigned int this_sad; 1827 tmp_mv->row = 0; 1828 tmp_mv->col = 0; 1829 this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 1830 xd->plane[0].pre[0].buf, ref_stride); 1831 1832 if (scaled_ref_frame) { 1833 int i; 1834 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 1835 } 1836 return this_sad; 1837 } 1838 #endif 1839 1840 // Set up prediction 1-D reference set 1841 ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); 1842 for (idx = 0; idx < search_width; idx += 16) { 1843 vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); 1844 ref_buf += 16; 1845 } 1846 1847 ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; 1848 for (idx = 0; idx < search_height; ++idx) { 1849 vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; 1850 ref_buf += ref_stride; 1851 } 1852 1853 // Set up src 1-D reference set 1854 for (idx = 0; idx < bw; idx += 16) { 1855 src_buf = x->plane[0].src.buf + idx; 1856 vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); 1857 } 1858 1859 src_buf = x->plane[0].src.buf; 1860 for (idx = 0; idx < bh; ++idx) { 1861 src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; 1862 src_buf += src_stride; 1863 } 1864 1865 // Find the best match per 1-D search 1866 tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); 1867 tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); 1868 1869 this_mv = *tmp_mv; 1870 src_buf = x->plane[0].src.buf; 1871 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1872 best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 1873 1874 { 1875 const uint8_t *const pos[4] = { 1876 ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride, 1877 }; 1878 1879 cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); 1880 } 1881 1882 for (idx = 0; idx < 4; ++idx) { 1883 if (this_sad[idx] < best_sad) { 1884 best_sad = this_sad[idx]; 1885 tmp_mv->row = search_pos[idx].row + this_mv.row; 1886 tmp_mv->col = search_pos[idx].col + this_mv.col; 1887 } 1888 } 1889 1890 if (this_sad[0] < this_sad[3]) 1891 this_mv.row -= 1; 1892 else 1893 this_mv.row += 1; 1894 1895 if (this_sad[1] < this_sad[2]) 1896 this_mv.col -= 1; 1897 else 1898 this_mv.col += 1; 1899 1900 ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; 1901 1902 tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 1903 if (best_sad > tmp_sad) { 1904 *tmp_mv = this_mv; 1905 best_sad = tmp_sad; 1906 } 1907 1908 tmp_mv->row *= 8; 1909 tmp_mv->col *= 8; 1910 1911 if (scaled_ref_frame) { 1912 int i; 1913 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 1914 } 1915 1916 return best_sad; 1917 } 1918 1919 // Runs sequence of diamond searches in smaller steps for RD. 1920 /* do_refine: If last step (1-away) of n-step search doesn't pick the center 1921 point as the best match, we will do a final 1-away diamond 1922 refining search */ 1923 static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, 1924 int step_param, int sadpb, int further_steps, 1925 int do_refine, int *cost_list, 1926 const vp9_variance_fn_ptr_t *fn_ptr, 1927 const MV *ref_mv, MV *dst_mv) { 1928 MV temp_mv; 1929 int thissme, n, num00 = 0; 1930 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1931 step_param, sadpb, &n, fn_ptr, ref_mv); 1932 if (bestsme < INT_MAX) 1933 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1934 *dst_mv = temp_mv; 1935 1936 // If there won't be more n-step search, check to see if refining search is 1937 // needed. 1938 if (n > further_steps) do_refine = 0; 1939 1940 while (n < further_steps) { 1941 ++n; 1942 1943 if (num00) { 1944 num00--; 1945 } else { 1946 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1947 step_param + n, sadpb, &num00, fn_ptr, 1948 ref_mv); 1949 if (thissme < INT_MAX) 1950 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1951 1952 // check to see if refining search is needed. 1953 if (num00 > further_steps - n) do_refine = 0; 1954 1955 if (thissme < bestsme) { 1956 bestsme = thissme; 1957 *dst_mv = temp_mv; 1958 } 1959 } 1960 } 1961 1962 // final 1-away diamond refining search 1963 if (do_refine) { 1964 const int search_range = 8; 1965 MV best_mv = *dst_mv; 1966 thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, 1967 ref_mv); 1968 if (thissme < INT_MAX) 1969 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 1970 if (thissme < bestsme) { 1971 bestsme = thissme; 1972 *dst_mv = best_mv; 1973 } 1974 } 1975 1976 // Return cost list. 1977 if (cost_list) { 1978 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 1979 } 1980 return bestsme; 1981 } 1982 1983 #define MIN_RANGE 7 1984 #define MAX_RANGE 256 1985 #define MIN_INTERVAL 1 1986 // Runs an limited range exhaustive mesh search using a pattern set 1987 // according to the encode speed profile. 1988 static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, 1989 MV *centre_mv_full, int sadpb, int *cost_list, 1990 const vp9_variance_fn_ptr_t *fn_ptr, 1991 const MV *ref_mv, MV *dst_mv) { 1992 const SPEED_FEATURES *const sf = &cpi->sf; 1993 MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; 1994 MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; 1995 int bestsme; 1996 int i; 1997 int interval = sf->mesh_patterns[0].interval; 1998 int range = sf->mesh_patterns[0].range; 1999 int baseline_interval_divisor; 2000 2001 // Trap illegal values for interval and range for this function. 2002 if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || 2003 (interval > range)) 2004 return INT_MAX; 2005 2006 baseline_interval_divisor = range / interval; 2007 2008 // Check size of proposed first range against magnitude of the centre 2009 // value used as a starting point. 2010 range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); 2011 range = VPXMIN(range, MAX_RANGE); 2012 interval = VPXMAX(interval, range / baseline_interval_divisor); 2013 2014 // initial search 2015 bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, 2016 sadpb, fn_ptr, &temp_mv); 2017 2018 if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { 2019 // Progressive searches with range and step size decreasing each time 2020 // till we reach a step size of 1. Then break out. 2021 for (i = 1; i < MAX_MESH_STEP; ++i) { 2022 // First pass with coarser step and longer range 2023 bestsme = exhuastive_mesh_search( 2024 x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, 2025 sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); 2026 2027 if (sf->mesh_patterns[i].interval == 1) break; 2028 } 2029 } 2030 2031 if (bestsme < INT_MAX) 2032 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 2033 *dst_mv = temp_mv; 2034 2035 // Return cost list. 2036 if (cost_list) { 2037 calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); 2038 } 2039 return bestsme; 2040 } 2041 2042 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, 2043 int sad_per_bit, int distance, 2044 const vp9_variance_fn_ptr_t *fn_ptr, 2045 const MV *center_mv, MV *best_mv) { 2046 int r, c; 2047 const MACROBLOCKD *const xd = &x->e_mbd; 2048 const struct buf_2d *const what = &x->plane[0].src; 2049 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2050 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min); 2051 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max); 2052 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min); 2053 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max); 2054 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2055 int best_sad = 2056 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2057 in_what->stride) + 2058 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2059 *best_mv = *ref_mv; 2060 2061 for (r = row_min; r < row_max; ++r) { 2062 for (c = col_min; c < col_max; ++c) { 2063 const MV mv = { r, c }; 2064 const int sad = 2065 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 2066 in_what->stride) + 2067 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2068 if (sad < best_sad) { 2069 best_sad = sad; 2070 *best_mv = mv; 2071 } 2072 } 2073 } 2074 return best_sad; 2075 } 2076 2077 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, 2078 int sad_per_bit, int distance, 2079 const vp9_variance_fn_ptr_t *fn_ptr, 2080 const MV *center_mv, MV *best_mv) { 2081 int r; 2082 const MACROBLOCKD *const xd = &x->e_mbd; 2083 const struct buf_2d *const what = &x->plane[0].src; 2084 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2085 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min); 2086 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max); 2087 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min); 2088 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max); 2089 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2090 unsigned int best_sad = 2091 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2092 in_what->stride) + 2093 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2094 *best_mv = *ref_mv; 2095 2096 for (r = row_min; r < row_max; ++r) { 2097 int c = col_min; 2098 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 2099 2100 if (fn_ptr->sdx3f != NULL) { 2101 while ((c + 2) < col_max) { 2102 int i; 2103 DECLARE_ALIGNED(16, uint32_t, sads[3]); 2104 2105 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 2106 sads); 2107 2108 for (i = 0; i < 3; ++i) { 2109 unsigned int sad = sads[i]; 2110 if (sad < best_sad) { 2111 const MV mv = { r, c }; 2112 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2113 if (sad < best_sad) { 2114 best_sad = sad; 2115 *best_mv = mv; 2116 } 2117 } 2118 ++check_here; 2119 ++c; 2120 } 2121 } 2122 } 2123 2124 while (c < col_max) { 2125 unsigned int sad = 2126 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride); 2127 if (sad < best_sad) { 2128 const MV mv = { r, c }; 2129 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2130 if (sad < best_sad) { 2131 best_sad = sad; 2132 *best_mv = mv; 2133 } 2134 } 2135 ++check_here; 2136 ++c; 2137 } 2138 } 2139 2140 return best_sad; 2141 } 2142 2143 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, 2144 int sad_per_bit, int distance, 2145 const vp9_variance_fn_ptr_t *fn_ptr, 2146 const MV *center_mv, MV *best_mv) { 2147 int r; 2148 const MACROBLOCKD *const xd = &x->e_mbd; 2149 const struct buf_2d *const what = &x->plane[0].src; 2150 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2151 const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min); 2152 const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max); 2153 const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min); 2154 const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max); 2155 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2156 unsigned int best_sad = 2157 fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2158 in_what->stride) + 2159 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 2160 *best_mv = *ref_mv; 2161 2162 for (r = row_min; r < row_max; ++r) { 2163 int c = col_min; 2164 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 2165 2166 if (fn_ptr->sdx8f != NULL) { 2167 while ((c + 7) < col_max) { 2168 int i; 2169 DECLARE_ALIGNED(16, uint32_t, sads[8]); 2170 2171 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, 2172 sads); 2173 2174 for (i = 0; i < 8; ++i) { 2175 unsigned int sad = sads[i]; 2176 if (sad < best_sad) { 2177 const MV mv = { r, c }; 2178 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2179 if (sad < best_sad) { 2180 best_sad = sad; 2181 *best_mv = mv; 2182 } 2183 } 2184 ++check_here; 2185 ++c; 2186 } 2187 } 2188 } 2189 2190 if (fn_ptr->sdx3f != NULL) { 2191 while ((c + 2) < col_max) { 2192 int i; 2193 DECLARE_ALIGNED(16, uint32_t, sads[3]); 2194 2195 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 2196 sads); 2197 2198 for (i = 0; i < 3; ++i) { 2199 unsigned int sad = sads[i]; 2200 if (sad < best_sad) { 2201 const MV mv = { r, c }; 2202 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2203 if (sad < best_sad) { 2204 best_sad = sad; 2205 *best_mv = mv; 2206 } 2207 } 2208 ++check_here; 2209 ++c; 2210 } 2211 } 2212 } 2213 2214 while (c < col_max) { 2215 unsigned int sad = 2216 fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride); 2217 if (sad < best_sad) { 2218 const MV mv = { r, c }; 2219 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 2220 if (sad < best_sad) { 2221 best_sad = sad; 2222 *best_mv = mv; 2223 } 2224 } 2225 ++check_here; 2226 ++c; 2227 } 2228 } 2229 2230 return best_sad; 2231 } 2232 2233 int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2234 int search_range, 2235 const vp9_variance_fn_ptr_t *fn_ptr, 2236 const MV *center_mv) { 2237 const MACROBLOCKD *const xd = &x->e_mbd; 2238 const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 2239 const struct buf_2d *const what = &x->plane[0].src; 2240 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2241 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2242 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 2243 unsigned int best_sad = 2244 fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + 2245 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2246 int i, j; 2247 2248 for (i = 0; i < search_range; i++) { 2249 int best_site = -1; 2250 const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & 2251 ((ref_mv->row + 1) < x->mv_limits.row_max) & 2252 ((ref_mv->col - 1) > x->mv_limits.col_min) & 2253 ((ref_mv->col + 1) < x->mv_limits.col_max); 2254 2255 if (all_in) { 2256 unsigned int sads[4]; 2257 const uint8_t *const positions[4] = { best_address - in_what->stride, 2258 best_address - 1, best_address + 1, 2259 best_address + in_what->stride }; 2260 2261 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 2262 2263 for (j = 0; j < 4; ++j) { 2264 if (sads[j] < best_sad) { 2265 const MV mv = { ref_mv->row + neighbors[j].row, 2266 ref_mv->col + neighbors[j].col }; 2267 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2268 if (sads[j] < best_sad) { 2269 best_sad = sads[j]; 2270 best_site = j; 2271 } 2272 } 2273 } 2274 } else { 2275 for (j = 0; j < 4; ++j) { 2276 const MV mv = { ref_mv->row + neighbors[j].row, 2277 ref_mv->col + neighbors[j].col }; 2278 2279 if (is_mv_in(&x->mv_limits, &mv)) { 2280 unsigned int sad = 2281 fn_ptr->sdf(what->buf, what->stride, 2282 get_buf_from_mv(in_what, &mv), in_what->stride); 2283 if (sad < best_sad) { 2284 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2285 if (sad < best_sad) { 2286 best_sad = sad; 2287 best_site = j; 2288 } 2289 } 2290 } 2291 } 2292 } 2293 2294 if (best_site == -1) { 2295 break; 2296 } else { 2297 ref_mv->row += neighbors[best_site].row; 2298 ref_mv->col += neighbors[best_site].col; 2299 best_address = get_buf_from_mv(in_what, ref_mv); 2300 } 2301 } 2302 2303 return best_sad; 2304 } 2305 2306 // This function is called when we do joint motion search in comp_inter_inter 2307 // mode. 2308 int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, 2309 int search_range, 2310 const vp9_variance_fn_ptr_t *fn_ptr, 2311 const MV *center_mv, const uint8_t *second_pred) { 2312 const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, 2313 { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; 2314 const MACROBLOCKD *const xd = &x->e_mbd; 2315 const struct buf_2d *const what = &x->plane[0].src; 2316 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 2317 const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; 2318 unsigned int best_sad = INT_MAX; 2319 int i, j; 2320 clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, 2321 x->mv_limits.row_min, x->mv_limits.row_max); 2322 best_sad = 2323 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), 2324 in_what->stride, second_pred) + 2325 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 2326 2327 for (i = 0; i < search_range; ++i) { 2328 int best_site = -1; 2329 2330 for (j = 0; j < 8; ++j) { 2331 const MV mv = { ref_mv->row + neighbors[j].row, 2332 ref_mv->col + neighbors[j].col }; 2333 2334 if (is_mv_in(&x->mv_limits, &mv)) { 2335 unsigned int sad = 2336 fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), 2337 in_what->stride, second_pred); 2338 if (sad < best_sad) { 2339 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 2340 if (sad < best_sad) { 2341 best_sad = sad; 2342 best_site = j; 2343 } 2344 } 2345 } 2346 } 2347 2348 if (best_site == -1) { 2349 break; 2350 } else { 2351 ref_mv->row += neighbors[best_site].row; 2352 ref_mv->col += neighbors[best_site].col; 2353 } 2354 } 2355 return best_sad; 2356 } 2357 2358 int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 2359 MV *mvp_full, int step_param, int search_method, 2360 int error_per_bit, int *cost_list, const MV *ref_mv, 2361 MV *tmp_mv, int var_max, int rd) { 2362 const SPEED_FEATURES *const sf = &cpi->sf; 2363 const SEARCH_METHODS method = (SEARCH_METHODS)search_method; 2364 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; 2365 int var = 0; 2366 if (cost_list) { 2367 cost_list[0] = INT_MAX; 2368 cost_list[1] = INT_MAX; 2369 cost_list[2] = INT_MAX; 2370 cost_list[3] = INT_MAX; 2371 cost_list[4] = INT_MAX; 2372 } 2373 2374 switch (method) { 2375 case FAST_DIAMOND: 2376 var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, 2377 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2378 break; 2379 case FAST_HEX: 2380 var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, 2381 cost_list, fn_ptr, 1, ref_mv, tmp_mv); 2382 break; 2383 case HEX: 2384 var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2385 fn_ptr, 1, ref_mv, tmp_mv); 2386 break; 2387 case SQUARE: 2388 var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2389 fn_ptr, 1, ref_mv, tmp_mv); 2390 break; 2391 case BIGDIA: 2392 var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, 2393 fn_ptr, 1, ref_mv, tmp_mv); 2394 break; 2395 case NSTEP: 2396 var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, 2397 MAX_MVSEARCH_STEPS - 1 - step_param, 1, 2398 cost_list, fn_ptr, ref_mv, tmp_mv); 2399 2400 // Should we allow a follow on exhaustive search? 2401 if ((sf->exhaustive_searches_thresh < INT_MAX) && 2402 !cpi->rc.is_src_frame_alt_ref) { 2403 int64_t exhuastive_thr = sf->exhaustive_searches_thresh; 2404 exhuastive_thr >>= 2405 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); 2406 2407 // Threshold variance for an exhaustive full search. 2408 if (var > exhuastive_thr) { 2409 int var_ex; 2410 MV tmp_mv_ex; 2411 var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, 2412 cost_list, fn_ptr, ref_mv, &tmp_mv_ex); 2413 2414 if (var_ex < var) { 2415 var = var_ex; 2416 *tmp_mv = tmp_mv_ex; 2417 } 2418 } 2419 } 2420 break; 2421 default: assert(0 && "Invalid search method."); 2422 } 2423 2424 if (method != NSTEP && rd && var < var_max) 2425 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); 2426 2427 return var; 2428 } 2429 2430 // Note(yunqingwang): The following 2 functions are only used in the motion 2431 // vector unit test, which return extreme motion vectors allowed by the MV 2432 // limits. 2433 #define COMMON_MV_TEST \ 2434 SETUP_SUBPEL_SEARCH; \ 2435 \ 2436 (void)error_per_bit; \ 2437 (void)vfp; \ 2438 (void)z; \ 2439 (void)src_stride; \ 2440 (void)y; \ 2441 (void)y_stride; \ 2442 (void)second_pred; \ 2443 (void)w; \ 2444 (void)h; \ 2445 (void)offset; \ 2446 (void)mvjcost; \ 2447 (void)mvcost; \ 2448 (void)sse1; \ 2449 (void)distortion; \ 2450 \ 2451 (void)halfiters; \ 2452 (void)quarteriters; \ 2453 (void)eighthiters; \ 2454 (void)whichdir; \ 2455 (void)allow_hp; \ 2456 (void)forced_stop; \ 2457 (void)hstep; \ 2458 (void)rr; \ 2459 (void)rc; \ 2460 \ 2461 (void)tr; \ 2462 (void)tc; \ 2463 (void)sse; \ 2464 (void)thismse; \ 2465 (void)cost_list; 2466 2467 // Return the maximum MV. 2468 uint32_t vp9_return_max_sub_pixel_mv( 2469 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2470 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2471 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2472 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2473 int h) { 2474 COMMON_MV_TEST; 2475 2476 (void)minr; 2477 (void)minc; 2478 2479 bestmv->row = maxr; 2480 bestmv->col = maxc; 2481 besterr = 0; 2482 2483 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2484 // has to be 0. 2485 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2486 2487 return besterr; 2488 } 2489 // Return the minimum MV. 2490 uint32_t vp9_return_min_sub_pixel_mv( 2491 const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, 2492 int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, 2493 int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], 2494 uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, 2495 int h) { 2496 COMMON_MV_TEST; 2497 2498 (void)maxr; 2499 (void)maxc; 2500 2501 bestmv->row = minr; 2502 bestmv->col = minc; 2503 besterr = 0; 2504 2505 // In the sub-pel motion search, if hp is not used, then the last bit of mv 2506 // has to be 0. 2507 lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); 2508 2509 return besterr; 2510 } 2511