1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <limits.h> 13 14 #include "vp9/common/vp9_onyxc_int.h" 15 #include "vp9/common/vp9_reconinter.h" 16 #include "vp9/encoder/vp9_onyx_int.h" 17 #include "vp9/common/vp9_systemdependent.h" 18 #include "vp9/encoder/vp9_quantize.h" 19 #include "vp9/common/vp9_alloccommon.h" 20 #include "vp9/encoder/vp9_mcomp.h" 21 #include "vp9/encoder/vp9_firstpass.h" 22 #include "vp9/encoder/vp9_psnr.h" 23 #include "vpx_scale/vpx_scale.h" 24 #include "vp9/common/vp9_extend.h" 25 #include "vp9/encoder/vp9_ratectrl.h" 26 #include "vp9/common/vp9_quant_common.h" 27 #include "vp9/encoder/vp9_segmentation.h" 28 #include "vpx_mem/vpx_mem.h" 29 #include "vpx_ports/vpx_timer.h" 30 31 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering 32 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering 33 34 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, 35 uint8_t *y_mb_ptr, 36 uint8_t *u_mb_ptr, 37 uint8_t *v_mb_ptr, 38 int stride, 39 int mv_row, 40 int mv_col, 41 uint8_t *pred) { 42 const int which_mv = 0; 43 MV mv = { mv_row, mv_col }; 44 45 vp9_build_inter_predictor(y_mb_ptr, stride, 46 &pred[0], 16, 47 &mv, 48 &xd->scale_factor[which_mv], 49 16, 16, 50 which_mv, 51 &xd->subpix, MV_PRECISION_Q3); 52 53 stride = (stride + 1) >> 1; 54 55 vp9_build_inter_predictor(u_mb_ptr, stride, 56 &pred[256], 8, 57 &mv, 58 &xd->scale_factor[which_mv], 59 8, 8, 60 which_mv, 61 &xd->subpix, MV_PRECISION_Q4); 62 63 vp9_build_inter_predictor(v_mb_ptr, stride, 64 &pred[320], 8, 65 &mv, 66 &xd->scale_factor[which_mv], 67 8, 8, 68 which_mv, 69 &xd->subpix, MV_PRECISION_Q4); 70 } 71 72 void vp9_temporal_filter_apply_c(uint8_t *frame1, 73 unsigned int stride, 74 uint8_t *frame2, 75 unsigned int block_size, 76 int strength, 77 int filter_weight, 78 unsigned int *accumulator, 79 uint16_t *count) { 80 unsigned int i, j, k; 81 int modifier; 82 int byte = 0; 83 84 for (i = 0, k = 0; i < block_size; i++) { 85 for (j = 0; j < block_size; j++, k++) { 86 87 int src_byte = frame1[byte]; 88 int pixel_value = *frame2++; 89 90 modifier = src_byte - pixel_value; 91 // This is an integer approximation of: 92 // float coeff = (3.0 * modifer * modifier) / pow(2, strength); 93 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); 94 modifier *= modifier; 95 modifier *= 3; 96 modifier += 1 << (strength - 1); 97 modifier >>= strength; 98 99 if (modifier > 16) 100 modifier = 16; 101 102 modifier = 16 - modifier; 103 modifier *= filter_weight; 104 105 count[k] += modifier; 106 accumulator[k] += modifier * pixel_value; 107 108 byte++; 109 } 110 111 byte += stride - block_size; 112 } 113 } 114 115 #if ALT_REF_MC_ENABLED 116 117 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, 118 uint8_t *arf_frame_buf, 119 uint8_t *frame_ptr_buf, 120 int stride, 121 int error_thresh) { 122 MACROBLOCK *x = &cpi->mb; 123 MACROBLOCKD* const xd = &x->e_mbd; 124 int step_param; 125 int sadpb = x->sadperbit16; 126 int bestsme = INT_MAX; 127 128 int_mv best_ref_mv1; 129 int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ 130 int_mv *ref_mv; 131 132 // Save input state 133 struct buf_2d src = x->plane[0].src; 134 struct buf_2d pre = xd->plane[0].pre[0]; 135 136 best_ref_mv1.as_int = 0; 137 best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3; 138 best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3; 139 140 // Setup frame pointers 141 x->plane[0].src.buf = arf_frame_buf; 142 x->plane[0].src.stride = stride; 143 xd->plane[0].pre[0].buf = frame_ptr_buf; 144 xd->plane[0].pre[0].stride = stride; 145 146 // Further step/diamond searches as necessary 147 if (cpi->speed < 8) 148 step_param = cpi->sf.reduce_first_step_size + ((cpi->speed > 5) ? 1 : 0); 149 else 150 step_param = cpi->sf.reduce_first_step_size + 2; 151 step_param = MIN(step_param, (cpi->sf.max_step_search_steps - 2)); 152 153 /*cpi->sf.search_method == HEX*/ 154 // TODO Check that the 16x16 vf & sdf are selected here 155 // Ignore mv costing by sending NULL pointer instead of cost arrays 156 ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0]; 157 bestsme = vp9_hex_search(x, &best_ref_mv1_full, 158 step_param, sadpb, 1, 159 &cpi->fn_ptr[BLOCK_16X16], 160 0, &best_ref_mv1, ref_mv); 161 162 #if ALT_REF_SUBPEL_ENABLED 163 // Try sub-pixel MC? 164 // if (bestsme > error_thresh && bestsme < INT_MAX) 165 { 166 int distortion; 167 unsigned int sse; 168 // Ignore mv costing by sending NULL pointer instead of cost array 169 bestsme = cpi->find_fractional_mv_step(x, ref_mv, 170 &best_ref_mv1, 171 x->errorperbit, 172 &cpi->fn_ptr[BLOCK_16X16], 173 0, cpi->sf.subpel_iters_per_step, 174 NULL, NULL, 175 &distortion, &sse); 176 } 177 #endif 178 179 // Restore input state 180 x->plane[0].src = src; 181 xd->plane[0].pre[0] = pre; 182 183 return bestsme; 184 } 185 #endif 186 187 static void temporal_filter_iterate_c(VP9_COMP *cpi, 188 int frame_count, 189 int alt_ref_index, 190 int strength) { 191 int byte; 192 int frame; 193 int mb_col, mb_row; 194 unsigned int filter_weight; 195 int mb_cols = cpi->common.mb_cols; 196 int mb_rows = cpi->common.mb_rows; 197 int mb_y_offset = 0; 198 int mb_uv_offset = 0; 199 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 + 8 * 8 + 8 * 8); 200 DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 + 8 * 8 + 8 * 8); 201 MACROBLOCKD *mbd = &cpi->mb.e_mbd; 202 YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; 203 uint8_t *dst1, *dst2; 204 DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8); 205 206 // Save input state 207 uint8_t* input_buffer[MAX_MB_PLANE]; 208 int i; 209 210 for (i = 0; i < MAX_MB_PLANE; i++) 211 input_buffer[i] = mbd->plane[i].pre[0].buf; 212 213 for (mb_row = 0; mb_row < mb_rows; mb_row++) { 214 #if ALT_REF_MC_ENABLED 215 // Source frames are extended to 16 pixels. This is different than 216 // L/A/G reference frames that have a border of 32 (VP9BORDERINPIXELS) 217 // A 6/8 tap filter is used for motion search. This requires 2 pixels 218 // before and 3 pixels after. So the largest Y mv on a border would 219 // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the 220 // Y and therefore only extended by 8. The largest mv that a UV block 221 // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv. 222 // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than 223 // 8 - VP9_INTERP_EXTEND. 224 // To keep the mv in play for both Y and UV planes the max that it 225 // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). 226 cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 227 cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) 228 + (17 - 2 * VP9_INTERP_EXTEND); 229 #endif 230 231 for (mb_col = 0; mb_col < mb_cols; mb_col++) { 232 int i, j, k; 233 int stride; 234 235 vpx_memset(accumulator, 0, 384 * sizeof(unsigned int)); 236 vpx_memset(count, 0, 384 * sizeof(uint16_t)); 237 238 #if ALT_REF_MC_ENABLED 239 cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); 240 cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) 241 + (17 - 2 * VP9_INTERP_EXTEND); 242 #endif 243 244 for (frame = 0; frame < frame_count; frame++) { 245 if (cpi->frames[frame] == NULL) 246 continue; 247 248 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0; 249 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0; 250 251 if (frame == alt_ref_index) { 252 filter_weight = 2; 253 } else { 254 int err = 0; 255 #if ALT_REF_MC_ENABLED 256 #define THRESH_LOW 10000 257 #define THRESH_HIGH 20000 258 259 // Find best match in this frame by MC 260 err = temporal_filter_find_matching_mb_c 261 (cpi, 262 cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, 263 cpi->frames[frame]->y_buffer + mb_y_offset, 264 cpi->frames[frame]->y_stride, 265 THRESH_LOW); 266 #endif 267 // Assign higher weight to matching MB if it's error 268 // score is lower. If not applying MC default behavior 269 // is to weight all MBs equal. 270 filter_weight = err < THRESH_LOW 271 ? 2 : err < THRESH_HIGH ? 1 : 0; 272 } 273 274 if (filter_weight != 0) { 275 // Construct the predictors 276 temporal_filter_predictors_mb_c 277 (mbd, 278 cpi->frames[frame]->y_buffer + mb_y_offset, 279 cpi->frames[frame]->u_buffer + mb_uv_offset, 280 cpi->frames[frame]->v_buffer + mb_uv_offset, 281 cpi->frames[frame]->y_stride, 282 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row, 283 mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col, 284 predictor); 285 286 // Apply the filter (YUV) 287 vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, 288 predictor, 16, strength, filter_weight, 289 accumulator, count); 290 291 vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, 292 predictor + 256, 8, strength, filter_weight, 293 accumulator + 256, count + 256); 294 295 vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, 296 predictor + 320, 8, strength, filter_weight, 297 accumulator + 320, count + 320); 298 } 299 } 300 301 // Normalize filter output to produce AltRef frame 302 dst1 = cpi->alt_ref_buffer.y_buffer; 303 stride = cpi->alt_ref_buffer.y_stride; 304 byte = mb_y_offset; 305 for (i = 0, k = 0; i < 16; i++) { 306 for (j = 0; j < 16; j++, k++) { 307 unsigned int pval = accumulator[k] + (count[k] >> 1); 308 pval *= cpi->fixed_divide[count[k]]; 309 pval >>= 19; 310 311 dst1[byte] = (uint8_t)pval; 312 313 // move to next pixel 314 byte++; 315 } 316 317 byte += stride - 16; 318 } 319 320 dst1 = cpi->alt_ref_buffer.u_buffer; 321 dst2 = cpi->alt_ref_buffer.v_buffer; 322 stride = cpi->alt_ref_buffer.uv_stride; 323 byte = mb_uv_offset; 324 for (i = 0, k = 256; i < 8; i++) { 325 for (j = 0; j < 8; j++, k++) { 326 int m = k + 64; 327 328 // U 329 unsigned int pval = accumulator[k] + (count[k] >> 1); 330 pval *= cpi->fixed_divide[count[k]]; 331 pval >>= 19; 332 dst1[byte] = (uint8_t)pval; 333 334 // V 335 pval = accumulator[m] + (count[m] >> 1); 336 pval *= cpi->fixed_divide[count[m]]; 337 pval >>= 19; 338 dst2[byte] = (uint8_t)pval; 339 340 // move to next pixel 341 byte++; 342 } 343 344 byte += stride - 8; 345 } 346 347 mb_y_offset += 16; 348 mb_uv_offset += 8; 349 } 350 351 mb_y_offset += 16 * (f->y_stride - mb_cols); 352 mb_uv_offset += 8 * (f->uv_stride - mb_cols); 353 } 354 355 // Restore input state 356 for (i = 0; i < MAX_MB_PLANE; i++) 357 mbd->plane[i].pre[0].buf = input_buffer[i]; 358 } 359 360 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { 361 VP9_COMMON *const cm = &cpi->common; 362 363 int frame = 0; 364 365 int frames_to_blur_backward = 0; 366 int frames_to_blur_forward = 0; 367 int frames_to_blur = 0; 368 int start_frame = 0; 369 370 int strength = cpi->active_arnr_strength; 371 int blur_type = cpi->oxcf.arnr_type; 372 int max_frames = cpi->active_arnr_frames; 373 374 const int num_frames_backward = distance; 375 const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) 376 - (num_frames_backward + 1); 377 378 switch (blur_type) { 379 case 1: 380 // Backward Blur 381 frames_to_blur_backward = num_frames_backward; 382 383 if (frames_to_blur_backward >= max_frames) 384 frames_to_blur_backward = max_frames - 1; 385 386 frames_to_blur = frames_to_blur_backward + 1; 387 break; 388 389 case 2: 390 // Forward Blur 391 392 frames_to_blur_forward = num_frames_forward; 393 394 if (frames_to_blur_forward >= max_frames) 395 frames_to_blur_forward = max_frames - 1; 396 397 frames_to_blur = frames_to_blur_forward + 1; 398 break; 399 400 case 3: 401 default: 402 // Center Blur 403 frames_to_blur_forward = num_frames_forward; 404 frames_to_blur_backward = num_frames_backward; 405 406 if (frames_to_blur_forward > frames_to_blur_backward) 407 frames_to_blur_forward = frames_to_blur_backward; 408 409 if (frames_to_blur_backward > frames_to_blur_forward) 410 frames_to_blur_backward = frames_to_blur_forward; 411 412 // When max_frames is even we have 1 more frame backward than forward 413 if (frames_to_blur_forward > (max_frames - 1) / 2) 414 frames_to_blur_forward = ((max_frames - 1) / 2); 415 416 if (frames_to_blur_backward > (max_frames / 2)) 417 frames_to_blur_backward = (max_frames / 2); 418 419 frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; 420 break; 421 } 422 423 start_frame = distance + frames_to_blur_forward; 424 425 #ifdef DEBUGFWG 426 // DEBUG FWG 427 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" 428 , max_frames 429 , num_frames_backward 430 , num_frames_forward 431 , frames_to_blur 432 , frames_to_blur_backward 433 , frames_to_blur_forward 434 , cpi->source_encode_index 435 , cpi->last_alt_ref_sei 436 , start_frame); 437 #endif 438 439 // Setup scaling factors. Scaling on each of the arnr frames is not supported 440 vp9_setup_scale_factors_for_frame(&cpi->mb.e_mbd.scale_factor[0], 441 cm->yv12_fb[cm->new_fb_idx].y_crop_width, 442 cm->yv12_fb[cm->new_fb_idx].y_crop_height, 443 cm->width, cm->height); 444 445 // Setup frame pointers, NULL indicates frame not included in filter 446 vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); 447 for (frame = 0; frame < frames_to_blur; frame++) { 448 int which_buffer = start_frame - frame; 449 struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 450 which_buffer); 451 cpi->frames[frames_to_blur - 1 - frame] = &buf->img; 452 } 453 454 temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward, 455 strength); 456 } 457 458 void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, 459 const int group_boost) { 460 int half_gf_int; 461 int frames_after_arf; 462 int frames_bwd = cpi->oxcf.arnr_max_frames - 1; 463 int frames_fwd = cpi->oxcf.arnr_max_frames - 1; 464 int q; 465 466 // Define the arnr filter width for this group of frames: 467 // We only filter frames that lie within a distance of half 468 // the GF interval from the ARF frame. We also have to trap 469 // cases where the filter extends beyond the end of clip. 470 // Note: this_frame->frame has been updated in the loop 471 // so it now points at the ARF frame. 472 half_gf_int = cpi->baseline_gf_interval >> 1; 473 frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); 474 475 switch (cpi->oxcf.arnr_type) { 476 case 1: // Backward filter 477 frames_fwd = 0; 478 if (frames_bwd > half_gf_int) 479 frames_bwd = half_gf_int; 480 break; 481 482 case 2: // Forward filter 483 if (frames_fwd > half_gf_int) 484 frames_fwd = half_gf_int; 485 if (frames_fwd > frames_after_arf) 486 frames_fwd = frames_after_arf; 487 frames_bwd = 0; 488 break; 489 490 case 3: // Centered filter 491 default: 492 frames_fwd >>= 1; 493 if (frames_fwd > frames_after_arf) 494 frames_fwd = frames_after_arf; 495 if (frames_fwd > half_gf_int) 496 frames_fwd = half_gf_int; 497 498 frames_bwd = frames_fwd; 499 500 // For even length filter there is one more frame backward 501 // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. 502 if (frames_bwd < half_gf_int) 503 frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; 504 break; 505 } 506 507 cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; 508 509 // Adjust the strength based on active max q 510 q = ((int)vp9_convert_qindex_to_q(cpi->active_worst_quality) >> 1); 511 if (q > 8) { 512 cpi->active_arnr_strength = cpi->oxcf.arnr_strength; 513 } else { 514 cpi->active_arnr_strength = cpi->oxcf.arnr_strength - (8 - q); 515 if (cpi->active_arnr_strength < 0) 516 cpi->active_arnr_strength = 0; 517 } 518 519 // Adjust number of frames in filter and strength based on gf boost level. 520 if (cpi->active_arnr_frames > (group_boost / 150)) { 521 cpi->active_arnr_frames = (group_boost / 150); 522 cpi->active_arnr_frames += !(cpi->active_arnr_frames & 1); 523 } 524 if (cpi->active_arnr_strength > (group_boost / 300)) { 525 cpi->active_arnr_strength = (group_boost / 300); 526 } 527 } 528