1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "onyxc_int.h" 13 #include "onyx_int.h" 14 #include "systemdependent.h" 15 #include "quantize.h" 16 #include "alloccommon.h" 17 #include "mcomp.h" 18 #include "firstpass.h" 19 #include "psnr.h" 20 #include "vpx_scale/vpxscale.h" 21 #include "extend.h" 22 #include "ratectrl.h" 23 #include "quant_common.h" 24 #include "segmentation.h" 25 #include "g_common.h" 26 #include "vpx_scale/yv12extend.h" 27 #include "postproc.h" 28 #include "vpx_mem/vpx_mem.h" 29 #include "swapyv12buffer.h" 30 #include "threading.h" 31 #include "vpx_ports/vpx_timer.h" 32 #include "vpxerrors.h" 33 34 #include <math.h> 35 #include <limits.h> 36 37 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering 38 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering 39 40 #define USE_FILTER_LUT 1 41 #if VP8_TEMPORAL_ALT_REF 42 43 #if USE_FILTER_LUT 44 static int modifier_lut[7][19] = 45 { 46 // Strength=0 47 {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 48 // Strength=1 49 {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 50 // Strength=2 51 {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 52 // Strength=3 53 {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 54 // Strength=4 55 {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 56 // Strength=5 57 {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, 58 // Strength=6 59 {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} 60 }; 61 #endif 62 static void build_predictors_mb 63 ( 64 MACROBLOCKD *x, 65 unsigned char *y_mb_ptr, 66 unsigned char *u_mb_ptr, 67 unsigned char *v_mb_ptr, 68 int stride, 69 int mv_row, 70 int mv_col, 71 unsigned char *pred 72 ) 73 { 74 int offset; 75 unsigned char *yptr, *uptr, *vptr; 76 77 // Y 78 yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); 79 80 if ((mv_row | mv_col) & 7) 81 { 82 // vp8_sixtap_predict16x16_c(yptr, stride, 83 // mv_col & 7, mv_row & 7, &pred[0], 16); 84 x->subpixel_predict16x16(yptr, stride, 85 mv_col & 7, mv_row & 7, &pred[0], 16); 86 } 87 else 88 { 89 //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16); 90 RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16); 91 } 92 93 // U & V 94 mv_row >>= 1; 95 mv_col >>= 1; 96 stride >>= 1; 97 offset = (mv_row >> 3) * stride + (mv_col >> 3); 98 uptr = u_mb_ptr + offset; 99 vptr = v_mb_ptr + offset; 100 101 if ((mv_row | mv_col) & 7) 102 { 103 x->subpixel_predict8x8(uptr, stride, 104 mv_col & 7, mv_row & 7, &pred[256], 8); 105 x->subpixel_predict8x8(vptr, stride, 106 mv_col & 7, mv_row & 7, &pred[320], 8); 107 } 108 else 109 { 110 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8); 111 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); 112 } 113 } 114 static void apply_temporal_filter 115 ( 116 unsigned char *frame1, 117 unsigned int stride, 118 unsigned char *frame2, 119 unsigned int block_size, 120 int strength, 121 int filter_weight, 122 unsigned int *accumulator, 123 unsigned int *count 124 ) 125 { 126 int i, j, k; 127 int modifier; 128 int byte = 0; 129 130 #if USE_FILTER_LUT 131 int *lut = modifier_lut[strength]; 132 #endif 133 134 for (i = 0,k = 0; i < block_size; i++) 135 { 136 for (j = 0; j < block_size; j++, k++) 137 { 138 139 int src_byte = frame1[byte]; 140 int pixel_value = *frame2++; 141 142 #if USE_FILTER_LUT 143 // LUT implementation -- 144 // improves precision of filter 145 modifier = abs(src_byte-pixel_value); 146 modifier = modifier>18 ? 0 : lut[modifier]; 147 #else 148 modifier = src_byte; 149 modifier -= pixel_value; 150 modifier *= modifier; 151 modifier >>= strength; 152 modifier *= 3; 153 154 if (modifier > 16) 155 modifier = 16; 156 157 modifier = 16 - modifier; 158 #endif 159 modifier *= filter_weight; 160 161 count[k] += modifier; 162 accumulator[k] += modifier * pixel_value; 163 164 byte++; 165 } 166 167 byte += stride - block_size; 168 } 169 } 170 171 #if ALT_REF_MC_ENABLED 172 static int dummy_cost[2*mv_max+1]; 173 174 static int find_matching_mb 175 ( 176 VP8_COMP *cpi, 177 YV12_BUFFER_CONFIG *arf_frame, 178 YV12_BUFFER_CONFIG *frame_ptr, 179 int mb_offset, 180 int error_thresh 181 ) 182 { 183 MACROBLOCK *x = &cpi->mb; 184 int thissme; 185 int step_param; 186 int further_steps; 187 int n = 0; 188 int sadpb = x->sadperbit16; 189 int bestsme = INT_MAX; 190 int num00 = 0; 191 192 BLOCK *b = &x->block[0]; 193 BLOCKD *d = &x->e_mbd.block[0]; 194 MV best_ref_mv1 = {0,0}; 195 196 int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; 197 int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; 198 199 // Save input state 200 unsigned char **base_src = b->base_src; 201 int src = b->src; 202 int src_stride = b->src_stride; 203 unsigned char **base_pre = d->base_pre; 204 int pre = d->pre; 205 int pre_stride = d->pre_stride; 206 207 // Setup frame pointers 208 b->base_src = &arf_frame->y_buffer; 209 b->src_stride = arf_frame->y_stride; 210 b->src = mb_offset; 211 212 d->base_pre = &frame_ptr->y_buffer; 213 d->pre_stride = frame_ptr->y_stride; 214 d->pre = mb_offset; 215 216 // Further step/diamond searches as necessary 217 if (cpi->Speed < 8) 218 { 219 step_param = cpi->sf.first_step + 220 ((cpi->Speed > 5) ? 1 : 0); 221 further_steps = 222 (cpi->sf.max_step_search_steps - 1)-step_param; 223 } 224 else 225 { 226 step_param = cpi->sf.first_step + 2; 227 further_steps = 0; 228 } 229 230 if (1/*cpi->sf.search_method == HEX*/) 231 { 232 // TODO Check that the 16x16 vf & sdf are selected here 233 bestsme = vp8_hex_search(x, b, d, 234 &best_ref_mv1, &d->bmi.mv.as_mv, 235 step_param, 236 sadpb/*x->errorperbit*/, 237 &num00, &cpi->fn_ptr[BLOCK_16X16], 238 mvsadcost, mvcost); 239 } 240 else 241 { 242 int mv_x, mv_y; 243 244 bestsme = cpi->diamond_search_sad(x, b, d, 245 &best_ref_mv1, &d->bmi.mv.as_mv, 246 step_param, 247 sadpb / 2/*x->errorperbit*/, 248 &num00, &cpi->fn_ptr[BLOCK_16X16], 249 mvsadcost, mvcost); //sadpb < 9 250 251 // Further step/diamond searches as necessary 252 n = 0; 253 //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; 254 255 n = num00; 256 num00 = 0; 257 258 while (n < further_steps) 259 { 260 n++; 261 262 if (num00) 263 num00--; 264 else 265 { 266 thissme = cpi->diamond_search_sad(x, b, d, 267 &best_ref_mv1, &d->bmi.mv.as_mv, 268 step_param + n, 269 sadpb / 4/*x->errorperbit*/, 270 &num00, &cpi->fn_ptr[BLOCK_16X16], 271 mvsadcost, mvcost); //sadpb = 9 272 273 if (thissme < bestsme) 274 { 275 bestsme = thissme; 276 mv_y = d->bmi.mv.as_mv.row; 277 mv_x = d->bmi.mv.as_mv.col; 278 } 279 else 280 { 281 d->bmi.mv.as_mv.row = mv_y; 282 d->bmi.mv.as_mv.col = mv_x; 283 } 284 } 285 } 286 } 287 288 #if ALT_REF_SUBPEL_ENABLED 289 // Try sub-pixel MC? 290 //if (bestsme > error_thresh && bestsme < INT_MAX) 291 { 292 bestsme = cpi->find_fractional_mv_step(x, b, d, 293 &d->bmi.mv.as_mv, &best_ref_mv1, 294 x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 295 cpi->mb.mvcost); 296 } 297 #endif 298 299 // Save input state 300 b->base_src = base_src; 301 b->src = src; 302 b->src_stride = src_stride; 303 d->base_pre = base_pre; 304 d->pre = pre; 305 d->pre_stride = pre_stride; 306 307 return bestsme; 308 } 309 #endif 310 311 static void vp8cx_temp_blur1_c 312 ( 313 VP8_COMP *cpi, 314 int frame_count, 315 int alt_ref_index, 316 int strength 317 ) 318 { 319 int byte; 320 int frame; 321 int mb_col, mb_row; 322 unsigned int filter_weight[MAX_LAG_BUFFERS]; 323 unsigned char *mm_ptr = cpi->fp_motion_map; 324 int cols = cpi->common.mb_cols; 325 int rows = cpi->common.mb_rows; 326 int MBs = cpi->common.MBs; 327 int mb_y_offset = 0; 328 int mb_uv_offset = 0; 329 unsigned int accumulator[384]; 330 unsigned int count[384]; 331 MACROBLOCKD *mbd = &cpi->mb.e_mbd; 332 YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; 333 unsigned char *dst1, *dst2; 334 DECLARE_ALIGNED(16, unsigned char, predictor[384]); 335 336 // Save input state 337 unsigned char *y_buffer = mbd->pre.y_buffer; 338 unsigned char *u_buffer = mbd->pre.u_buffer; 339 unsigned char *v_buffer = mbd->pre.v_buffer; 340 341 if (!cpi->use_weighted_temporal_filter) 342 { 343 // Temporal filtering is unweighted 344 for (frame = 0; frame < frame_count; frame++) 345 filter_weight[frame] = 1; 346 } 347 348 for (mb_row = 0; mb_row < rows; mb_row++) 349 { 350 #if ALT_REF_MC_ENABLED 351 // Reduced search extent by 3 for 6-tap filter & smaller UMV border 352 cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19)); 353 cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) 354 + (VP8BORDERINPIXELS - 19); 355 #endif 356 357 for (mb_col = 0; mb_col < cols; mb_col++) 358 { 359 int i, j, k, w; 360 int weight_cap; 361 int stride; 362 363 vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); 364 vpx_memset(count, 0, 384*sizeof(unsigned int)); 365 366 #if ALT_REF_MC_ENABLED 367 // Reduced search extent by 3 for 6-tap filter & smaller UMV border 368 cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19)); 369 cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) 370 + (VP8BORDERINPIXELS - 19); 371 #endif 372 373 // Read & process macroblock weights from motion map 374 if (cpi->use_weighted_temporal_filter) 375 { 376 weight_cap = 2; 377 378 for (frame = alt_ref_index-1; frame >= 0; frame--) 379 { 380 w = *(mm_ptr + (frame+1)*MBs); 381 filter_weight[frame] = w < weight_cap ? w : weight_cap; 382 weight_cap = w; 383 } 384 385 filter_weight[alt_ref_index] = 2; 386 387 weight_cap = 2; 388 389 for (frame = alt_ref_index+1; frame < frame_count; frame++) 390 { 391 w = *(mm_ptr + frame*MBs); 392 filter_weight[frame] = w < weight_cap ? w : weight_cap; 393 weight_cap = w; 394 } 395 396 } 397 398 for (frame = 0; frame < frame_count; frame++) 399 { 400 int err; 401 402 if (cpi->frames[frame] == NULL) 403 continue; 404 405 mbd->block[0].bmi.mv.as_mv.row = 0; 406 mbd->block[0].bmi.mv.as_mv.col = 0; 407 408 #if ALT_REF_MC_ENABLED 409 //if (filter_weight[frame] == 0) 410 { 411 #define THRESH_LOW 10000 412 #define THRESH_HIGH 20000 413 414 // Correlation has been lost try MC 415 err = find_matching_mb ( cpi, 416 cpi->frames[alt_ref_index], 417 cpi->frames[frame], 418 mb_y_offset, 419 THRESH_LOW ); 420 421 if (filter_weight[frame] < 2) 422 { 423 // Set weight depending on error 424 filter_weight[frame] = err<THRESH_LOW 425 ? 2 : err<THRESH_HIGH ? 1 : 0; 426 } 427 } 428 #endif 429 if (filter_weight[frame] != 0) 430 { 431 // Construct the predictors 432 build_predictors_mb ( 433 mbd, 434 cpi->frames[frame]->y_buffer + mb_y_offset, 435 cpi->frames[frame]->u_buffer + mb_uv_offset, 436 cpi->frames[frame]->v_buffer + mb_uv_offset, 437 cpi->frames[frame]->y_stride, 438 mbd->block[0].bmi.mv.as_mv.row, 439 mbd->block[0].bmi.mv.as_mv.col, 440 predictor ); 441 442 // Apply the filter (YUV) 443 apply_temporal_filter ( f->y_buffer + mb_y_offset, 444 f->y_stride, 445 predictor, 446 16, 447 strength, 448 filter_weight[frame], 449 accumulator, 450 count ); 451 452 apply_temporal_filter ( f->u_buffer + mb_uv_offset, 453 f->uv_stride, 454 predictor + 256, 455 8, 456 strength, 457 filter_weight[frame], 458 accumulator + 256, 459 count + 256 ); 460 461 apply_temporal_filter ( f->v_buffer + mb_uv_offset, 462 f->uv_stride, 463 predictor + 320, 464 8, 465 strength, 466 filter_weight[frame], 467 accumulator + 320, 468 count + 320 ); 469 } 470 } 471 472 // Normalize filter output to produce AltRef frame 473 dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer; 474 stride = cpi->alt_ref_buffer.source_buffer.y_stride; 475 byte = mb_y_offset; 476 for (i = 0,k = 0; i < 16; i++) 477 { 478 for (j = 0; j < 16; j++, k++) 479 { 480 unsigned int pval = accumulator[k] + (count[k] >> 1); 481 pval *= cpi->fixed_divide[count[k]]; 482 pval >>= 19; 483 484 dst1[byte] = (unsigned char)pval; 485 486 // move to next pixel 487 byte++; 488 } 489 490 byte += stride - 16; 491 } 492 493 dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer; 494 dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer; 495 stride = cpi->alt_ref_buffer.source_buffer.uv_stride; 496 byte = mb_uv_offset; 497 for (i = 0,k = 256; i < 8; i++) 498 { 499 for (j = 0; j < 8; j++, k++) 500 { 501 int m=k+64; 502 503 // U 504 unsigned int pval = accumulator[k] + (count[k] >> 1); 505 pval *= cpi->fixed_divide[count[k]]; 506 pval >>= 19; 507 dst1[byte] = (unsigned char)pval; 508 509 // V 510 pval = accumulator[m] + (count[m] >> 1); 511 pval *= cpi->fixed_divide[count[m]]; 512 pval >>= 19; 513 dst2[byte] = (unsigned char)pval; 514 515 // move to next pixel 516 byte++; 517 } 518 519 byte += stride - 8; 520 } 521 522 mm_ptr++; 523 mb_y_offset += 16; 524 mb_uv_offset += 8; 525 } 526 527 mb_y_offset += 16*f->y_stride-f->y_width; 528 mb_uv_offset += 8*f->uv_stride-f->uv_width; 529 } 530 531 // Restore input state 532 mbd->pre.y_buffer = y_buffer; 533 mbd->pre.u_buffer = u_buffer; 534 mbd->pre.v_buffer = v_buffer; 535 } 536 537 void vp8cx_temp_filter_c 538 ( 539 VP8_COMP *cpi 540 ) 541 { 542 int frame = 0; 543 544 int num_frames_backward = 0; 545 int num_frames_forward = 0; 546 int frames_to_blur_backward = 0; 547 int frames_to_blur_forward = 0; 548 int frames_to_blur = 0; 549 int start_frame = 0; 550 unsigned int filtered = 0; 551 552 int strength = cpi->oxcf.arnr_strength; 553 554 int blur_type = cpi->oxcf.arnr_type; 555 556 int max_frames = cpi->active_arnr_frames; 557 558 num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index; 559 560 if (num_frames_backward < 0) 561 num_frames_backward += cpi->oxcf.lag_in_frames; 562 563 num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1); 564 565 switch (blur_type) 566 { 567 case 1: 568 ///////////////////////////////////////// 569 // Backward Blur 570 571 frames_to_blur_backward = num_frames_backward; 572 573 if (frames_to_blur_backward >= max_frames) 574 frames_to_blur_backward = max_frames - 1; 575 576 frames_to_blur = frames_to_blur_backward + 1; 577 break; 578 579 case 2: 580 ///////////////////////////////////////// 581 // Forward Blur 582 583 frames_to_blur_forward = num_frames_forward; 584 585 if (frames_to_blur_forward >= max_frames) 586 frames_to_blur_forward = max_frames - 1; 587 588 frames_to_blur = frames_to_blur_forward + 1; 589 break; 590 591 case 3: 592 default: 593 ///////////////////////////////////////// 594 // Center Blur 595 frames_to_blur_forward = num_frames_forward; 596 frames_to_blur_backward = num_frames_backward; 597 598 if (frames_to_blur_forward > frames_to_blur_backward) 599 frames_to_blur_forward = frames_to_blur_backward; 600 601 if (frames_to_blur_backward > frames_to_blur_forward) 602 frames_to_blur_backward = frames_to_blur_forward; 603 604 // When max_frames is even we have 1 more frame backward than forward 605 if (frames_to_blur_forward > (max_frames - 1) / 2) 606 frames_to_blur_forward = ((max_frames - 1) / 2); 607 608 if (frames_to_blur_backward > (max_frames / 2)) 609 frames_to_blur_backward = (max_frames / 2); 610 611 frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; 612 break; 613 } 614 615 start_frame = (cpi->last_alt_ref_sei 616 + frames_to_blur_forward) % cpi->oxcf.lag_in_frames; 617 618 #ifdef DEBUGFWG 619 // DEBUG FWG 620 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" 621 , max_frames 622 , num_frames_backward 623 , num_frames_forward 624 , frames_to_blur 625 , frames_to_blur_backward 626 , frames_to_blur_forward 627 , cpi->source_encode_index 628 , cpi->last_alt_ref_sei 629 , start_frame); 630 #endif 631 632 // Setup frame pointers, NULL indicates frame not included in filter 633 vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); 634 for (frame = 0; frame < frames_to_blur; frame++) 635 { 636 int which_buffer = start_frame - frame; 637 638 if (which_buffer < 0) 639 which_buffer += cpi->oxcf.lag_in_frames; 640 641 cpi->frames[frames_to_blur-1-frame] 642 = &cpi->src_buffer[which_buffer].source_buffer; 643 } 644 645 vp8cx_temp_blur1_c ( 646 cpi, 647 frames_to_blur, 648 frames_to_blur_backward, 649 strength ); 650 } 651 #endif 652