1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "mcomp.h" 13 #include "vpx_mem/vpx_mem.h" 14 15 #include <stdio.h> 16 #include <limits.h> 17 #include <math.h> 18 19 #ifdef ENTROPY_STATS 20 static int mv_ref_ct [31] [4] [2]; 21 static int mv_mode_cts [4] [2]; 22 #endif 23 24 static int mv_bits_sadcost[256]; 25 26 extern unsigned int vp8_sub_pixel_variance16x16s_neon 27 ( 28 unsigned char *src_ptr, 29 int src_pixels_per_line, 30 int xoffset, 31 int yoffset, 32 unsigned char *dst_ptr, 33 int dst_pixels_per_line, 34 unsigned int *sse 35 ); 36 extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon 37 ( 38 unsigned char *src_ptr, 39 int src_pixels_per_line, 40 unsigned char *dst_ptr, 41 int dst_pixels_per_line, 42 unsigned int *sse 43 ); 44 extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon 45 ( 46 unsigned char *src_ptr, 47 int src_pixels_per_line, 48 unsigned char *dst_ptr, 49 int dst_pixels_per_line, 50 unsigned int *sse 51 ); 52 extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon 53 ( 54 unsigned char *src_ptr, 55 int src_pixels_per_line, 56 unsigned char *dst_ptr, 57 int dst_pixels_per_line, 58 unsigned int *sse 59 ); 60 61 void vp8cx_init_mv_bits_sadcost() 62 { 63 int i; 64 65 for (i = 0; i < 256; i++) 66 { 67 mv_bits_sadcost[i] = (int)sqrt(i * 16); 68 } 69 } 70 71 72 int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight) 73 { 74 // MV costing is based on the distribution of vectors in the previous frame and as such will tend to 75 // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the 76 // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. 77 // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. 78 return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7; 79 } 80 81 int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit) 82 { 83 //int i; 84 //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8; 85 //return ( (vp8_mv_bit_cost(mv, ref, mvcost, 100) + 128) * error_per_bit) >> 8; 86 87 //i = (vp8_mv_bit_cost(mv, ref, mvcost, 100) * error_per_bit + 128) >> 8; 88 return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8; 89 //return (vp8_mv_bit_cost(mv, ref, mvcost, 128) * error_per_bit + 128) >> 8; 90 } 91 92 93 static int mv_bits(MV *mv, MV *ref, int *mvcost[2]) 94 { 95 // get the estimated number of bits for a motion vector, to be used for costing in SAD based 96 // motion estimation 97 return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8; 98 } 99 100 void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) 101 { 102 int Len; 103 int search_site_count = 0; 104 105 106 // Generate offsets for 4 search sites per step. 107 Len = MAX_FIRST_STEP; 108 x->ss[search_site_count].mv.col = 0; 109 x->ss[search_site_count].mv.row = 0; 110 x->ss[search_site_count].offset = 0; 111 search_site_count++; 112 113 while (Len > 0) 114 { 115 116 // Compute offsets for search sites. 117 x->ss[search_site_count].mv.col = 0; 118 x->ss[search_site_count].mv.row = -Len; 119 x->ss[search_site_count].offset = -Len * stride; 120 search_site_count++; 121 122 // Compute offsets for search sites. 123 x->ss[search_site_count].mv.col = 0; 124 x->ss[search_site_count].mv.row = Len; 125 x->ss[search_site_count].offset = Len * stride; 126 search_site_count++; 127 128 // Compute offsets for search sites. 129 x->ss[search_site_count].mv.col = -Len; 130 x->ss[search_site_count].mv.row = 0; 131 x->ss[search_site_count].offset = -Len; 132 search_site_count++; 133 134 // Compute offsets for search sites. 135 x->ss[search_site_count].mv.col = Len; 136 x->ss[search_site_count].mv.row = 0; 137 x->ss[search_site_count].offset = Len; 138 search_site_count++; 139 140 // Contract. 141 Len /= 2; 142 } 143 144 x->ss_count = search_site_count; 145 x->searches_per_step = 4; 146 } 147 148 void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) 149 { 150 int Len; 151 int search_site_count = 0; 152 153 // Generate offsets for 8 search sites per step. 154 Len = MAX_FIRST_STEP; 155 x->ss[search_site_count].mv.col = 0; 156 x->ss[search_site_count].mv.row = 0; 157 x->ss[search_site_count].offset = 0; 158 search_site_count++; 159 160 while (Len > 0) 161 { 162 163 // Compute offsets for search sites. 164 x->ss[search_site_count].mv.col = 0; 165 x->ss[search_site_count].mv.row = -Len; 166 x->ss[search_site_count].offset = -Len * stride; 167 search_site_count++; 168 169 // Compute offsets for search sites. 170 x->ss[search_site_count].mv.col = 0; 171 x->ss[search_site_count].mv.row = Len; 172 x->ss[search_site_count].offset = Len * stride; 173 search_site_count++; 174 175 // Compute offsets for search sites. 176 x->ss[search_site_count].mv.col = -Len; 177 x->ss[search_site_count].mv.row = 0; 178 x->ss[search_site_count].offset = -Len; 179 search_site_count++; 180 181 // Compute offsets for search sites. 182 x->ss[search_site_count].mv.col = Len; 183 x->ss[search_site_count].mv.row = 0; 184 x->ss[search_site_count].offset = Len; 185 search_site_count++; 186 187 // Compute offsets for search sites. 188 x->ss[search_site_count].mv.col = -Len; 189 x->ss[search_site_count].mv.row = -Len; 190 x->ss[search_site_count].offset = -Len * stride - Len; 191 search_site_count++; 192 193 // Compute offsets for search sites. 194 x->ss[search_site_count].mv.col = Len; 195 x->ss[search_site_count].mv.row = -Len; 196 x->ss[search_site_count].offset = -Len * stride + Len; 197 search_site_count++; 198 199 // Compute offsets for search sites. 200 x->ss[search_site_count].mv.col = -Len; 201 x->ss[search_site_count].mv.row = Len; 202 x->ss[search_site_count].offset = Len * stride - Len; 203 search_site_count++; 204 205 // Compute offsets for search sites. 206 x->ss[search_site_count].mv.col = Len; 207 x->ss[search_site_count].mv.row = Len; 208 x->ss[search_site_count].offset = Len * stride + Len; 209 search_site_count++; 210 211 212 // Contract. 213 Len /= 2; 214 } 215 216 x->ss_count = search_site_count; 217 x->searches_per_step = 8; 218 } 219 220 221 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) 222 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector 223 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc 224 #define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. 225 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; 226 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost 227 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best 228 #define MIN(x,y) (((x)<(y))?(x):(y)) 229 #define MAX(x,y) (((x)>(y))?(x):(y)) 230 231 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } 232 233 int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) 234 { 235 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; 236 unsigned char *z = (*(b->base_src) + b->src); 237 238 int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1; 239 int br = bestmv->row << 2, bc = bestmv->col << 2; 240 int tr = br, tc = bc; 241 unsigned int besterr = INT_MAX; 242 unsigned int left, right, up, down, diag; 243 unsigned int sse; 244 unsigned int whichdir; 245 unsigned int halfiters = 4; 246 unsigned int quarteriters = 4; 247 248 int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1)); 249 int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1)); 250 int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1)); 251 int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1)); 252 253 // central mv 254 bestmv->row <<= 3; 255 bestmv->col <<= 3; 256 257 // calculate central point error 258 besterr = vf(y, d->pre_stride, z, b->src_stride, &sse); 259 besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 260 261 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) 262 while (--halfiters) 263 { 264 // 1/2 pel 265 CHECK_BETTER(left, tr, tc - 2); 266 CHECK_BETTER(right, tr, tc + 2); 267 CHECK_BETTER(up, tr - 2, tc); 268 CHECK_BETTER(down, tr + 2, tc); 269 270 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 271 272 switch (whichdir) 273 { 274 case 0: 275 CHECK_BETTER(diag, tr - 2, tc - 2); 276 break; 277 case 1: 278 CHECK_BETTER(diag, tr - 2, tc + 2); 279 break; 280 case 2: 281 CHECK_BETTER(diag, tr + 2, tc - 2); 282 break; 283 case 3: 284 CHECK_BETTER(diag, tr + 2, tc + 2); 285 break; 286 } 287 288 // no reason to check the same one again. 289 if (tr == br && tc == bc) 290 break; 291 292 tr = br; 293 tc = bc; 294 } 295 296 // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) 297 // 1/4 pel 298 while (--quarteriters) 299 { 300 CHECK_BETTER(left, tr, tc - 1); 301 CHECK_BETTER(right, tr, tc + 1); 302 CHECK_BETTER(up, tr - 1, tc); 303 CHECK_BETTER(down, tr + 1, tc); 304 305 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 306 307 switch (whichdir) 308 { 309 case 0: 310 CHECK_BETTER(diag, tr - 1, tc - 1); 311 break; 312 case 1: 313 CHECK_BETTER(diag, tr - 1, tc + 1); 314 break; 315 case 2: 316 CHECK_BETTER(diag, tr + 1, tc - 1); 317 break; 318 case 3: 319 CHECK_BETTER(diag, tr + 1, tc + 1); 320 break; 321 } 322 323 // no reason to check the same one again. 324 if (tr == br && tc == bc) 325 break; 326 327 tr = br; 328 tc = bc; 329 } 330 331 bestmv->row = br << 1; 332 bestmv->col = bc << 1; 333 334 if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL)) 335 return INT_MAX; 336 337 return besterr; 338 } 339 #undef MVC 340 #undef PRE 341 #undef SP 342 #undef DIST 343 #undef ERR 344 #undef CHECK_BETTER 345 #undef MIN 346 #undef MAX 347 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) 348 { 349 int bestmse = INT_MAX; 350 MV startmv; 351 //MV this_mv; 352 MV this_mv; 353 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; 354 unsigned char *z = (*(b->base_src) + b->src); 355 int left, right, up, down, diag; 356 unsigned int sse; 357 int whichdir ; 358 359 360 // Trap uncodable vectors 361 if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) 362 { 363 bestmv->row <<= 3; 364 bestmv->col <<= 3; 365 return INT_MAX; 366 } 367 368 // central mv 369 bestmv->row <<= 3; 370 bestmv->col <<= 3; 371 startmv = *bestmv; 372 373 // calculate central point error 374 bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); 375 bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 376 377 // go left then right and check error 378 this_mv.row = startmv.row; 379 this_mv.col = ((startmv.col - 8) | 4); 380 left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); 381 left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 382 383 if (left < bestmse) 384 { 385 *bestmv = this_mv; 386 bestmse = left; 387 } 388 389 this_mv.col += 8; 390 right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); 391 right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 392 393 if (right < bestmse) 394 { 395 *bestmv = this_mv; 396 bestmse = right; 397 } 398 399 // go up then down and check error 400 this_mv.col = startmv.col; 401 this_mv.row = ((startmv.row - 8) | 4); 402 up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 403 up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 404 405 if (up < bestmse) 406 { 407 *bestmv = this_mv; 408 bestmse = up; 409 } 410 411 this_mv.row += 8; 412 down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); 413 down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 414 415 if (down < bestmse) 416 { 417 *bestmv = this_mv; 418 bestmse = down; 419 } 420 421 422 // now check 1 more diagonal 423 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 424 //for(whichdir =0;whichdir<4;whichdir++) 425 //{ 426 this_mv = startmv; 427 428 switch (whichdir) 429 { 430 case 0: 431 this_mv.col = (this_mv.col - 8) | 4; 432 this_mv.row = (this_mv.row - 8) | 4; 433 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 434 break; 435 case 1: 436 this_mv.col += 4; 437 this_mv.row = (this_mv.row - 8) | 4; 438 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 439 break; 440 case 2: 441 this_mv.col = (this_mv.col - 8) | 4; 442 this_mv.row += 4; 443 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); 444 break; 445 case 3: 446 this_mv.col += 4; 447 this_mv.row += 4; 448 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); 449 break; 450 } 451 452 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 453 454 if (diag < bestmse) 455 { 456 *bestmv = this_mv; 457 bestmse = diag; 458 } 459 460 // } 461 462 463 // time to check quarter pels. 464 if (bestmv->row < startmv.row) 465 y -= d->pre_stride; 466 467 if (bestmv->col < startmv.col) 468 y--; 469 470 startmv = *bestmv; 471 472 473 474 // go left then right and check error 475 this_mv.row = startmv.row; 476 477 if (startmv.col & 7) 478 { 479 this_mv.col = startmv.col - 2; 480 left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 481 } 482 else 483 { 484 this_mv.col = (startmv.col - 8) | 6; 485 left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); 486 } 487 488 left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 489 490 if (left < bestmse) 491 { 492 *bestmv = this_mv; 493 bestmse = left; 494 } 495 496 this_mv.col += 4; 497 right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 498 right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 499 500 if (right < bestmse) 501 { 502 *bestmv = this_mv; 503 bestmse = right; 504 } 505 506 // go up then down and check error 507 this_mv.col = startmv.col; 508 509 if (startmv.row & 7) 510 { 511 this_mv.row = startmv.row - 2; 512 up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 513 } 514 else 515 { 516 this_mv.row = (startmv.row - 8) | 6; 517 up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); 518 } 519 520 up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 521 522 if (up < bestmse) 523 { 524 *bestmv = this_mv; 525 bestmse = up; 526 } 527 528 this_mv.row += 4; 529 down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 530 down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 531 532 if (down < bestmse) 533 { 534 *bestmv = this_mv; 535 bestmse = down; 536 } 537 538 539 // now check 1 more diagonal 540 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 541 542 // for(whichdir=0;whichdir<4;whichdir++) 543 // { 544 this_mv = startmv; 545 546 switch (whichdir) 547 { 548 case 0: 549 550 if (startmv.row & 7) 551 { 552 this_mv.row -= 2; 553 554 if (startmv.col & 7) 555 { 556 this_mv.col -= 2; 557 diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 558 } 559 else 560 { 561 this_mv.col = (startmv.col - 8) | 6; 562 diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; 563 } 564 } 565 else 566 { 567 this_mv.row = (startmv.row - 8) | 6; 568 569 if (startmv.col & 7) 570 { 571 this_mv.col -= 2; 572 diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); 573 } 574 else 575 { 576 this_mv.col = (startmv.col - 8) | 6; 577 diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); 578 } 579 } 580 581 break; 582 case 1: 583 this_mv.col += 2; 584 585 if (startmv.row & 7) 586 { 587 this_mv.row -= 2; 588 diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 589 } 590 else 591 { 592 this_mv.row = (startmv.row - 8) | 6; 593 diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); 594 } 595 596 break; 597 case 2: 598 this_mv.row += 2; 599 600 if (startmv.col & 7) 601 { 602 this_mv.col -= 2; 603 diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 604 } 605 else 606 { 607 this_mv.col = (startmv.col - 8) | 6; 608 diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; 609 } 610 611 break; 612 case 3: 613 this_mv.col += 2; 614 this_mv.row += 2; 615 diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); 616 break; 617 } 618 619 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 620 621 if (diag < bestmse) 622 { 623 *bestmv = this_mv; 624 bestmse = diag; 625 } 626 627 // } 628 629 return bestmse; 630 } 631 632 int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) 633 { 634 int bestmse = INT_MAX; 635 MV startmv; 636 //MV this_mv; 637 MV this_mv; 638 unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; 639 unsigned char *z = (*(b->base_src) + b->src); 640 int left, right, up, down, diag; 641 unsigned int sse; 642 643 // Trap uncodable vectors 644 if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) 645 { 646 bestmv->row <<= 3; 647 bestmv->col <<= 3; 648 return INT_MAX; 649 } 650 651 // central mv 652 bestmv->row <<= 3; 653 bestmv->col <<= 3; 654 startmv = *bestmv; 655 656 // calculate central point error 657 bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); 658 bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 659 660 // go left then right and check error 661 this_mv.row = startmv.row; 662 this_mv.col = ((startmv.col - 8) | 4); 663 left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); 664 left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 665 666 if (left < bestmse) 667 { 668 *bestmv = this_mv; 669 bestmse = left; 670 } 671 672 this_mv.col += 8; 673 right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); 674 right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 675 676 if (right < bestmse) 677 { 678 *bestmv = this_mv; 679 bestmse = right; 680 } 681 682 // go up then down and check error 683 this_mv.col = startmv.col; 684 this_mv.row = ((startmv.row - 8) | 4); 685 up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 686 up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 687 688 if (up < bestmse) 689 { 690 *bestmv = this_mv; 691 bestmse = up; 692 } 693 694 this_mv.row += 8; 695 down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); 696 down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 697 698 if (down < bestmse) 699 { 700 *bestmv = this_mv; 701 bestmse = down; 702 } 703 704 // somewhat strangely not doing all the diagonals for half pel is slower than doing them. 705 #if 0 706 // now check 1 more diagonal - 707 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 708 this_mv = startmv; 709 710 switch (whichdir) 711 { 712 case 0: 713 this_mv.col = (this_mv.col - 8) | 4; 714 this_mv.row = (this_mv.row - 8) | 4; 715 diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); 716 break; 717 case 1: 718 this_mv.col += 4; 719 this_mv.row = (this_mv.row - 8) | 4; 720 diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); 721 break; 722 case 2: 723 this_mv.col = (this_mv.col - 8) | 4; 724 this_mv.row += 4; 725 diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); 726 break; 727 case 3: 728 this_mv.col += 4; 729 this_mv.row += 4; 730 diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); 731 break; 732 } 733 734 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 735 736 if (diag < bestmse) 737 { 738 *bestmv = this_mv; 739 bestmse = diag; 740 } 741 742 #else 743 this_mv.col = (this_mv.col - 8) | 4; 744 this_mv.row = (this_mv.row - 8) | 4; 745 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 746 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 747 748 if (diag < bestmse) 749 { 750 *bestmv = this_mv; 751 bestmse = diag; 752 } 753 754 this_mv.col += 8; 755 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); 756 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 757 758 if (diag < bestmse) 759 { 760 *bestmv = this_mv; 761 bestmse = diag; 762 } 763 764 this_mv.col = (this_mv.col - 8) | 4; 765 this_mv.row = startmv.row + 4; 766 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); 767 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 768 769 if (diag < bestmse) 770 { 771 *bestmv = this_mv; 772 bestmse = diag; 773 } 774 775 this_mv.col += 8; 776 diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); 777 diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 778 779 if (diag < bestmse) 780 { 781 *bestmv = this_mv; 782 bestmse = diag; 783 } 784 785 #endif 786 return bestmse; 787 } 788 789 #if 1 790 791 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) 792 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector 793 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. 794 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost 795 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best 796 const MV next_chkpts[6][3] = 797 { 798 {{ -2, 0}, { -1, -2}, {1, -2}}, 799 {{ -1, -2}, {1, -2}, {2, 0}}, 800 {{1, -2}, {2, 0}, {1, 2}}, 801 {{2, 0}, {1, 2}, { -1, 2}}, 802 {{1, 2}, { -1, 2}, { -2, 0}}, 803 {{ -1, 2}, { -2, 0}, { -1, -2}} 804 }; 805 int vp8_hex_search 806 ( 807 MACROBLOCK *x, 808 BLOCK *b, 809 BLOCKD *d, 810 MV *ref_mv, 811 MV *best_mv, 812 int search_param, 813 int error_per_bit, 814 int *num00, 815 vp8_variance_fn_t vf, 816 vp8_sad_fn_t sf, 817 int *mvsadcost[2], 818 int *mvcost[2] 819 ) 820 { 821 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; 822 MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; 823 int i, j; 824 unsigned char *src = (*(b->base_src) + b->src); 825 int src_stride = b->src_stride; 826 int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; 827 unsigned int besterr, thiserr = 0x7fffffff; 828 int k = -1, tk; 829 830 if (bc < x->mv_col_min) bc = x->mv_col_min; 831 832 if (bc > x->mv_col_max) bc = x->mv_col_max; 833 834 if (br < x->mv_row_min) br = x->mv_row_min; 835 836 if (br > x->mv_row_max) br = x->mv_row_max; 837 838 rr >>= 1; 839 rc >>= 1; 840 841 besterr = ERR(br, bc, thiserr); 842 843 // hex search 844 //j=0 845 tr = br; 846 tc = bc; 847 848 for (i = 0; i < 6; i++) 849 { 850 int nr = tr + hex[i].row, nc = tc + hex[i].col; 851 852 if (nc < x->mv_col_min) continue; 853 854 if (nc > x->mv_col_max) continue; 855 856 if (nr < x->mv_row_min) continue; 857 858 if (nr > x->mv_row_max) continue; 859 860 //CHECK_BETTER(thiserr,nr,nc); 861 if ((thiserr = ERR(nr, nc, besterr)) < besterr) 862 { 863 besterr = thiserr; 864 br = nr; 865 bc = nc; 866 k = i; 867 } 868 } 869 870 if (tr == br && tc == bc) 871 goto cal_neighbors; 872 873 for (j = 1; j < 127; j++) 874 { 875 tr = br; 876 tc = bc; 877 tk = k; 878 879 for (i = 0; i < 3; i++) 880 { 881 int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col; 882 883 if (nc < x->mv_col_min) continue; 884 885 if (nc > x->mv_col_max) continue; 886 887 if (nr < x->mv_row_min) continue; 888 889 if (nr > x->mv_row_max) continue; 890 891 //CHECK_BETTER(thiserr,nr,nc); 892 if ((thiserr = ERR(nr, nc, besterr)) < besterr) 893 { 894 besterr = thiserr; 895 br = nr; 896 bc = nc; //k=(tk+5+i)%6;} 897 k = tk + 5 + i; 898 899 if (k >= 12) k -= 12; 900 else if (k >= 6) k -= 6; 901 } 902 } 903 904 if (tr == br && tc == bc) 905 break; 906 } 907 908 // check 8 1 away neighbors 909 cal_neighbors: 910 tr = br; 911 tc = bc; 912 913 for (i = 0; i < 8; i++) 914 { 915 int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; 916 917 if (nc < x->mv_col_min) continue; 918 919 if (nc > x->mv_col_max) continue; 920 921 if (nr < x->mv_row_min) continue; 922 923 if (nr > x->mv_row_max) continue; 924 925 CHECK_BETTER(thiserr, nr, nc); 926 } 927 928 best_mv->row = br; 929 best_mv->col = bc; 930 931 return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; 932 } 933 #undef MVC 934 #undef PRE 935 #undef SP 936 #undef DIST 937 #undef ERR 938 #undef CHECK_BETTER 939 940 #else 941 942 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) 943 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector 944 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. 945 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost 946 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best 947 948 int vp8_hex_search 949 ( 950 MACROBLOCK *x, 951 BLOCK *b, 952 BLOCKD *d, 953 MV *ref_mv, 954 MV *best_mv, 955 int search_param, 956 int error_per_bit, 957 int *num00, 958 vp8_variance_fn_t vf, 959 vp8_sad_fn_t sf, 960 int *mvsadcost[2], 961 int *mvcost[2] 962 ) 963 { 964 MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ; 965 MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; 966 int i, j; 967 unsigned char *src = (*(b->base_src) + b->src); 968 int src_stride = b->src_stride; 969 //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc; 970 int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; 971 unsigned int besterr, thiserr = 0x7fffffff; 972 973 /* 974 if ( rc < x->mv_col_min) bc = x->mv_col_min; 975 if ( rc > x->mv_col_max) bc = x->mv_col_max; 976 if ( rr < x->mv_row_min) br = x->mv_row_min; 977 if ( rr > x->mv_row_max) br = x->mv_row_max; 978 rr>>=1; 979 rc>>=1; 980 br>>=3; 981 bc>>=3; 982 */ 983 if (bc < x->mv_col_min) bc = x->mv_col_min; 984 985 if (bc > x->mv_col_max) bc = x->mv_col_max; 986 987 if (br < x->mv_row_min) br = x->mv_row_min; 988 989 if (br > x->mv_row_max) br = x->mv_row_max; 990 991 rr >>= 1; 992 rc >>= 1; 993 994 besterr = ERR(br, bc, thiserr); 995 996 // hex search jbb changed to 127 to avoid max 256 problem steping by 2. 997 for (j = 0; j < 127; j++) 998 { 999 tr = br; 1000 tc = bc; 1001 1002 for (i = 0; i < 6; i++) 1003 { 1004 int nr = tr + hex[i].row, nc = tc + hex[i].col; 1005 1006 if (nc < x->mv_col_min) continue; 1007 1008 if (nc > x->mv_col_max) continue; 1009 1010 if (nr < x->mv_row_min) continue; 1011 1012 if (nr > x->mv_row_max) continue; 1013 1014 CHECK_BETTER(thiserr, nr, nc); 1015 } 1016 1017 if (tr == br && tc == bc) 1018 break; 1019 } 1020 1021 // check 8 1 away neighbors 1022 tr = br; 1023 tc = bc; 1024 1025 for (i = 0; i < 8; i++) 1026 { 1027 int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; 1028 1029 if (nc < x->mv_col_min) continue; 1030 1031 if (nc > x->mv_col_max) continue; 1032 1033 if (nr < x->mv_row_min) continue; 1034 1035 if (nr > x->mv_row_max) continue; 1036 1037 CHECK_BETTER(thiserr, nr, nc); 1038 } 1039 1040 best_mv->row = br; 1041 best_mv->col = bc; 1042 1043 return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; 1044 } 1045 #undef MVC 1046 #undef PRE 1047 #undef SP 1048 #undef DIST 1049 #undef ERR 1050 #undef CHECK_BETTER 1051 1052 #endif 1053 1054 int vp8_diamond_search_sad 1055 ( 1056 MACROBLOCK *x, 1057 BLOCK *b, 1058 BLOCKD *d, 1059 MV *ref_mv, 1060 MV *best_mv, 1061 int search_param, 1062 int error_per_bit, 1063 int *num00, 1064 vp8_variance_fn_ptr_t *fn_ptr, 1065 int *mvsadcost[2], 1066 int *mvcost[2] 1067 ) 1068 { 1069 int i, j, step; 1070 1071 unsigned char *what = (*(b->base_src) + b->src); 1072 int what_stride = b->src_stride; 1073 unsigned char *in_what; 1074 int in_what_stride = d->pre_stride; 1075 unsigned char *best_address; 1076 1077 int tot_steps; 1078 MV this_mv; 1079 1080 int bestsad = INT_MAX; 1081 int best_site = 0; 1082 int last_site = 0; 1083 1084 int ref_row = ref_mv->row >> 3; 1085 int ref_col = ref_mv->col >> 3; 1086 int this_row_offset; 1087 int this_col_offset; 1088 search_site *ss; 1089 1090 unsigned char *check_here; 1091 int thissad; 1092 1093 // Work out the start point for the search 1094 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); 1095 best_address = in_what; 1096 1097 // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits 1098 if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && 1099 (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) 1100 { 1101 // Check the starting position 1102 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); 1103 } 1104 1105 // search_param determines the length of the initial step and hence the number of iterations 1106 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1107 ss = &x->ss[search_param * x->searches_per_step]; 1108 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1109 1110 i = 1; 1111 best_mv->row = ref_row; 1112 best_mv->col = ref_col; 1113 1114 *num00 = 0; 1115 1116 for (step = 0; step < tot_steps ; step++) 1117 { 1118 for (j = 0 ; j < x->searches_per_step ; j++) 1119 { 1120 // Trap illegal vectors 1121 this_row_offset = best_mv->row + ss[i].mv.row; 1122 this_col_offset = best_mv->col + ss[i].mv.col; 1123 1124 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1125 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1126 1127 { 1128 check_here = ss[i].offset + best_address; 1129 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1130 1131 if (thissad < bestsad) 1132 { 1133 this_mv.row = this_row_offset << 3; 1134 this_mv.col = this_col_offset << 3; 1135 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); 1136 1137 if (thissad < bestsad) 1138 { 1139 bestsad = thissad; 1140 best_site = i; 1141 } 1142 } 1143 } 1144 1145 i++; 1146 } 1147 1148 if (best_site != last_site) 1149 { 1150 best_mv->row += ss[best_site].mv.row; 1151 best_mv->col += ss[best_site].mv.col; 1152 best_address += ss[best_site].offset; 1153 last_site = best_site; 1154 } 1155 else if (best_address == in_what) 1156 (*num00)++; 1157 } 1158 1159 this_mv.row = best_mv->row << 3; 1160 this_mv.col = best_mv->col << 3; 1161 1162 if (bestsad == INT_MAX) 1163 return INT_MAX; 1164 1165 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) 1166 + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1167 } 1168 1169 int vp8_diamond_search_sadx4 1170 ( 1171 MACROBLOCK *x, 1172 BLOCK *b, 1173 BLOCKD *d, 1174 MV *ref_mv, 1175 MV *best_mv, 1176 int search_param, 1177 int error_per_bit, 1178 int *num00, 1179 vp8_variance_fn_ptr_t *fn_ptr, 1180 int *mvsadcost[2], 1181 int *mvcost[2] 1182 ) 1183 { 1184 int i, j, step; 1185 1186 unsigned char *what = (*(b->base_src) + b->src); 1187 int what_stride = b->src_stride; 1188 unsigned char *in_what; 1189 int in_what_stride = d->pre_stride; 1190 unsigned char *best_address; 1191 1192 int tot_steps; 1193 MV this_mv; 1194 1195 int bestsad = INT_MAX; 1196 int best_site = 0; 1197 int last_site = 0; 1198 1199 int ref_row = ref_mv->row >> 3; 1200 int ref_col = ref_mv->col >> 3; 1201 int this_row_offset; 1202 int this_col_offset; 1203 search_site *ss; 1204 1205 unsigned char *check_here; 1206 int thissad; 1207 1208 // Work out the start point for the search 1209 in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); 1210 best_address = in_what; 1211 1212 // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits 1213 if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && 1214 (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) 1215 { 1216 // Check the starting position 1217 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); 1218 } 1219 1220 // search_param determines the length of the initial step and hence the number of iterations 1221 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1222 ss = &x->ss[search_param * x->searches_per_step]; 1223 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1224 1225 i = 1; 1226 best_mv->row = ref_row; 1227 best_mv->col = ref_col; 1228 1229 *num00 = 0; 1230 1231 for (step = 0; step < tot_steps ; step++) 1232 { 1233 int check_row_min, check_col_min, check_row_max, check_col_max; 1234 1235 check_row_min = x->mv_row_min - best_mv->row; 1236 check_row_max = x->mv_row_max - best_mv->row; 1237 check_col_min = x->mv_col_min - best_mv->col; 1238 check_col_max = x->mv_col_max - best_mv->col; 1239 1240 for (j = 0 ; j < x->searches_per_step ; j += 4) 1241 { 1242 char *block_offset[4]; 1243 unsigned int valid_block[4]; 1244 int all_in = 1, t; 1245 1246 for (t = 0; t < 4; t++) 1247 { 1248 valid_block [t] = (ss[t+i].mv.col > check_col_min); 1249 valid_block [t] &= (ss[t+i].mv.col < check_col_max); 1250 valid_block [t] &= (ss[t+i].mv.row > check_row_min); 1251 valid_block [t] &= (ss[t+i].mv.row < check_row_max); 1252 1253 all_in &= valid_block[t]; 1254 block_offset[t] = ss[i+t].offset + best_address; 1255 } 1256 1257 if (all_in) 1258 { 1259 int sad_array[4]; 1260 1261 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 1262 1263 for (t = 0; t < 4; t++, i++) 1264 { 1265 thissad = sad_array[t]; 1266 1267 if (thissad < bestsad) 1268 { 1269 this_mv.row = (best_mv->row + ss[i].mv.row) << 3; 1270 this_mv.col = (best_mv->col + ss[i].mv.col) << 3; 1271 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); 1272 1273 if (thissad < bestsad) 1274 { 1275 bestsad = thissad; 1276 best_site = i; 1277 } 1278 } 1279 } 1280 } 1281 else 1282 { 1283 int t; 1284 1285 for (t = 0; t < 4; i++, t++) 1286 { 1287 // Trap illegal vectors 1288 if (valid_block[t]) 1289 1290 { 1291 check_here = block_offset[t]; 1292 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1293 1294 if (thissad < bestsad) 1295 { 1296 this_row_offset = best_mv->row + ss[i].mv.row; 1297 this_col_offset = best_mv->col + ss[i].mv.col; 1298 1299 this_mv.row = this_row_offset << 3; 1300 this_mv.col = this_col_offset << 3; 1301 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); 1302 1303 if (thissad < bestsad) 1304 { 1305 bestsad = thissad; 1306 best_site = i; 1307 } 1308 } 1309 } 1310 } 1311 } 1312 } 1313 1314 if (best_site != last_site) 1315 { 1316 best_mv->row += ss[best_site].mv.row; 1317 best_mv->col += ss[best_site].mv.col; 1318 best_address += ss[best_site].offset; 1319 last_site = best_site; 1320 } 1321 else if (best_address == in_what) 1322 (*num00)++; 1323 } 1324 1325 this_mv.row = best_mv->row << 3; 1326 this_mv.col = best_mv->col << 3; 1327 1328 if (bestsad == INT_MAX) 1329 return INT_MAX; 1330 1331 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) 1332 + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1333 } 1334 1335 1336 #if !(CONFIG_REALTIME_ONLY) 1337 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) 1338 { 1339 unsigned char *what = (*(b->base_src) + b->src); 1340 int what_stride = b->src_stride; 1341 unsigned char *in_what; 1342 int in_what_stride = d->pre_stride; 1343 int mv_stride = d->pre_stride; 1344 unsigned char *bestaddress; 1345 MV *best_mv = &d->bmi.mv.as_mv; 1346 MV this_mv; 1347 int bestsad = INT_MAX; 1348 int r, c; 1349 1350 unsigned char *check_here; 1351 int thissad; 1352 1353 int ref_row = ref_mv->row >> 3; 1354 int ref_col = ref_mv->col >> 3; 1355 1356 int row_min = ref_row - distance; 1357 int row_max = ref_row + distance; 1358 int col_min = ref_col - distance; 1359 int col_max = ref_col + distance; 1360 1361 // Work out the mid point for the search 1362 in_what = *(d->base_pre) + d->pre; 1363 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; 1364 1365 best_mv->row = ref_row; 1366 best_mv->col = ref_col; 1367 1368 // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits 1369 if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && 1370 (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) 1371 { 1372 // Baseline value at the centre 1373 1374 //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); 1375 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); 1376 } 1377 1378 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border 1379 if (col_min < x->mv_col_min) 1380 col_min = x->mv_col_min; 1381 1382 if (col_max > x->mv_col_max) 1383 col_max = x->mv_col_max; 1384 1385 if (row_min < x->mv_row_min) 1386 row_min = x->mv_row_min; 1387 1388 if (row_max > x->mv_row_max) 1389 row_max = x->mv_row_max; 1390 1391 for (r = row_min; r < row_max ; r++) 1392 { 1393 this_mv.row = r << 3; 1394 check_here = r * mv_stride + in_what + col_min; 1395 1396 for (c = col_min; c < col_max; c++) 1397 { 1398 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1399 1400 this_mv.col = c << 3; 1401 //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14)); 1402 //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)]; 1403 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); 1404 1405 if (thissad < bestsad) 1406 { 1407 bestsad = thissad; 1408 best_mv->row = r; 1409 best_mv->col = c; 1410 bestaddress = check_here; 1411 } 1412 1413 check_here++; 1414 } 1415 } 1416 1417 this_mv.row = best_mv->row << 3; 1418 this_mv.col = best_mv->col << 3; 1419 1420 if (bestsad < INT_MAX) 1421 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) 1422 + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1423 else 1424 return INT_MAX; 1425 } 1426 1427 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) 1428 { 1429 unsigned char *what = (*(b->base_src) + b->src); 1430 int what_stride = b->src_stride; 1431 unsigned char *in_what; 1432 int in_what_stride = d->pre_stride; 1433 int mv_stride = d->pre_stride; 1434 unsigned char *bestaddress; 1435 MV *best_mv = &d->bmi.mv.as_mv; 1436 MV this_mv; 1437 int bestsad = INT_MAX; 1438 int r, c; 1439 1440 unsigned char *check_here; 1441 int thissad; 1442 1443 int ref_row = ref_mv->row >> 3; 1444 int ref_col = ref_mv->col >> 3; 1445 1446 int row_min = ref_row - distance; 1447 int row_max = ref_row + distance; 1448 int col_min = ref_col - distance; 1449 int col_max = ref_col + distance; 1450 1451 int sad_array[3]; 1452 1453 // Work out the mid point for the search 1454 in_what = *(d->base_pre) + d->pre; 1455 bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; 1456 1457 best_mv->row = ref_row; 1458 best_mv->col = ref_col; 1459 1460 // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits 1461 if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && 1462 (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) 1463 { 1464 // Baseline value at the centre 1465 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); 1466 } 1467 1468 // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border 1469 if (col_min < x->mv_col_min) 1470 col_min = x->mv_col_min; 1471 1472 if (col_max > x->mv_col_max) 1473 col_max = x->mv_col_max; 1474 1475 if (row_min < x->mv_row_min) 1476 row_min = x->mv_row_min; 1477 1478 if (row_max > x->mv_row_max) 1479 row_max = x->mv_row_max; 1480 1481 for (r = row_min; r < row_max ; r++) 1482 { 1483 this_mv.row = r << 3; 1484 check_here = r * mv_stride + in_what + col_min; 1485 c = col_min; 1486 1487 while ((c + 3) < col_max) 1488 { 1489 int i; 1490 1491 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); 1492 1493 for (i = 0; i < 3; i++) 1494 { 1495 thissad = sad_array[i]; 1496 1497 if (thissad < bestsad) 1498 { 1499 this_mv.col = c << 3; 1500 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); 1501 1502 if (thissad < bestsad) 1503 { 1504 bestsad = thissad; 1505 best_mv->row = r; 1506 best_mv->col = c; 1507 bestaddress = check_here; 1508 } 1509 } 1510 1511 check_here++; 1512 c++; 1513 } 1514 } 1515 1516 while (c < col_max) 1517 { 1518 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1519 1520 if (thissad < bestsad) 1521 { 1522 this_mv.col = c << 3; 1523 thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); 1524 1525 if (thissad < bestsad) 1526 { 1527 bestsad = thissad; 1528 best_mv->row = r; 1529 best_mv->col = c; 1530 bestaddress = check_here; 1531 } 1532 } 1533 1534 check_here ++; 1535 c ++; 1536 } 1537 1538 } 1539 1540 this_mv.row = best_mv->row << 3; 1541 this_mv.col = best_mv->col << 3; 1542 1543 if (bestsad < INT_MAX) 1544 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) 1545 + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 1546 else 1547 return INT_MAX; 1548 } 1549 #endif 1550 1551 #ifdef ENTROPY_STATS 1552 void print_mode_context(void) 1553 { 1554 FILE *f = fopen("modecont.c", "w"); 1555 int i, j; 1556 1557 fprintf(f, "#include \"entropy.h\"\n"); 1558 fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); 1559 fprintf(f, "{\n"); 1560 1561 for (j = 0; j < 6; j++) 1562 { 1563 fprintf(f, " { // %d \n", j); 1564 fprintf(f, " "); 1565 1566 for (i = 0; i < 4; i++) 1567 { 1568 int overal_prob; 1569 int this_prob; 1570 int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; 1571 1572 // Overall probs 1573 count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; 1574 1575 if (count) 1576 overal_prob = 256 * mv_mode_cts[i][0] / count; 1577 else 1578 overal_prob = 128; 1579 1580 if (overal_prob == 0) 1581 overal_prob = 1; 1582 1583 // context probs 1584 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; 1585 1586 if (count) 1587 this_prob = 256 * mv_ref_ct[j][i][0] / count; 1588 else 1589 this_prob = 128; 1590 1591 if (this_prob == 0) 1592 this_prob = 1; 1593 1594 fprintf(f, "%5d, ", this_prob); 1595 //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); 1596 //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); 1597 } 1598 1599 fprintf(f, " },\n"); 1600 } 1601 1602 fprintf(f, "};\n"); 1603 fclose(f); 1604 } 1605 1606 /* MV ref count ENTROPY_STATS stats code */ 1607 #ifdef ENTROPY_STATS 1608 void init_mv_ref_counts() 1609 { 1610 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); 1611 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); 1612 } 1613 1614 void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) 1615 { 1616 if (m == ZEROMV) 1617 { 1618 ++mv_ref_ct [ct[0]] [0] [0]; 1619 ++mv_mode_cts[0][0]; 1620 } 1621 else 1622 { 1623 ++mv_ref_ct [ct[0]] [0] [1]; 1624 ++mv_mode_cts[0][1]; 1625 1626 if (m == NEARESTMV) 1627 { 1628 ++mv_ref_ct [ct[1]] [1] [0]; 1629 ++mv_mode_cts[1][0]; 1630 } 1631 else 1632 { 1633 ++mv_ref_ct [ct[1]] [1] [1]; 1634 ++mv_mode_cts[1][1]; 1635 1636 if (m == NEARMV) 1637 { 1638 ++mv_ref_ct [ct[2]] [2] [0]; 1639 ++mv_mode_cts[2][0]; 1640 } 1641 else 1642 { 1643 ++mv_ref_ct [ct[2]] [2] [1]; 1644 ++mv_mode_cts[2][1]; 1645 1646 if (m == NEWMV) 1647 { 1648 ++mv_ref_ct [ct[3]] [3] [0]; 1649 ++mv_mode_cts[3][0]; 1650 } 1651 else 1652 { 1653 ++mv_ref_ct [ct[3]] [3] [1]; 1654 ++mv_mode_cts[3][1]; 1655 } 1656 } 1657 } 1658 } 1659 } 1660 1661 #endif/* END MV ref count ENTROPY_STATS stats code */ 1662 1663 #endif 1664