1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "vpx_config.h" 13 #include "vpx_rtcd.h" 14 #include "encodemb.h" 15 #include "vp8/common/reconinter.h" 16 #include "quantize.h" 17 #include "tokenize.h" 18 #include "vp8/common/invtrans.h" 19 #include "vpx_mem/vpx_mem.h" 20 #include "rdopt.h" 21 22 void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) 23 { 24 unsigned char *src_ptr = (*(be->base_src) + be->src); 25 short *diff_ptr = be->src_diff; 26 unsigned char *pred_ptr = bd->predictor; 27 int src_stride = be->src_stride; 28 29 int r, c; 30 31 for (r = 0; r < 4; r++) 32 { 33 for (c = 0; c < 4; c++) 34 { 35 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; 36 } 37 38 diff_ptr += pitch; 39 pred_ptr += pitch; 40 src_ptr += src_stride; 41 } 42 } 43 44 void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, 45 int src_stride, unsigned char *upred, 46 unsigned char *vpred, int pred_stride) 47 { 48 short *udiff = diff + 256; 49 short *vdiff = diff + 320; 50 51 int r, c; 52 53 for (r = 0; r < 8; r++) 54 { 55 for (c = 0; c < 8; c++) 56 { 57 udiff[c] = usrc[c] - upred[c]; 58 } 59 60 udiff += 8; 61 upred += pred_stride; 62 usrc += src_stride; 63 } 64 65 for (r = 0; r < 8; r++) 66 { 67 for (c = 0; c < 8; c++) 68 { 69 vdiff[c] = vsrc[c] - vpred[c]; 70 } 71 72 vdiff += 8; 73 vpred += pred_stride; 74 vsrc += src_stride; 75 } 76 } 77 78 void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, 79 unsigned char *pred, int pred_stride) 80 { 81 int r, c; 82 83 for (r = 0; r < 16; r++) 84 { 85 for (c = 0; c < 16; c++) 86 { 87 diff[c] = src[c] - pred[c]; 88 } 89 90 diff += 16; 91 pred += pred_stride; 92 src += src_stride; 93 } 94 } 95 96 static void vp8_subtract_mb(MACROBLOCK *x) 97 { 98 BLOCK *b = &x->block[0]; 99 100 vp8_subtract_mby(x->src_diff, *(b->base_src), 101 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 102 vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, 103 x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, 104 x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride); 105 } 106 107 static void build_dcblock(MACROBLOCK *x) 108 { 109 short *src_diff_ptr = &x->src_diff[384]; 110 int i; 111 112 for (i = 0; i < 16; i++) 113 { 114 src_diff_ptr[i] = x->coeff[i * 16]; 115 } 116 } 117 118 void vp8_transform_mbuv(MACROBLOCK *x) 119 { 120 int i; 121 122 for (i = 16; i < 24; i += 2) 123 { 124 x->short_fdct8x4(&x->block[i].src_diff[0], 125 &x->block[i].coeff[0], 16); 126 } 127 } 128 129 130 void vp8_transform_intra_mby(MACROBLOCK *x) 131 { 132 int i; 133 134 for (i = 0; i < 16; i += 2) 135 { 136 x->short_fdct8x4(&x->block[i].src_diff[0], 137 &x->block[i].coeff[0], 32); 138 } 139 140 /* build dc block from 16 y dc values */ 141 build_dcblock(x); 142 143 /* do 2nd order transform on the dc block */ 144 x->short_walsh4x4(&x->block[24].src_diff[0], 145 &x->block[24].coeff[0], 8); 146 147 } 148 149 150 static void transform_mb(MACROBLOCK *x) 151 { 152 int i; 153 154 for (i = 0; i < 16; i += 2) 155 { 156 x->short_fdct8x4(&x->block[i].src_diff[0], 157 &x->block[i].coeff[0], 32); 158 } 159 160 /* build dc block from 16 y dc values */ 161 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 162 build_dcblock(x); 163 164 for (i = 16; i < 24; i += 2) 165 { 166 x->short_fdct8x4(&x->block[i].src_diff[0], 167 &x->block[i].coeff[0], 16); 168 } 169 170 /* do 2nd order transform on the dc block */ 171 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 172 x->short_walsh4x4(&x->block[24].src_diff[0], 173 &x->block[24].coeff[0], 8); 174 175 } 176 177 178 static void transform_mby(MACROBLOCK *x) 179 { 180 int i; 181 182 for (i = 0; i < 16; i += 2) 183 { 184 x->short_fdct8x4(&x->block[i].src_diff[0], 185 &x->block[i].coeff[0], 32); 186 } 187 188 /* build dc block from 16 y dc values */ 189 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) 190 { 191 build_dcblock(x); 192 x->short_walsh4x4(&x->block[24].src_diff[0], 193 &x->block[24].coeff[0], 8); 194 } 195 } 196 197 198 199 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) 200 201 typedef struct vp8_token_state vp8_token_state; 202 203 struct vp8_token_state{ 204 int rate; 205 int error; 206 signed char next; 207 signed char token; 208 short qc; 209 }; 210 211 /* TODO: experiments to find optimal multiple numbers */ 212 #define Y1_RD_MULT 4 213 #define UV_RD_MULT 2 214 #define Y2_RD_MULT 16 215 216 static const int plane_rd_mult[4]= 217 { 218 Y1_RD_MULT, 219 Y2_RD_MULT, 220 UV_RD_MULT, 221 Y1_RD_MULT 222 }; 223 224 static void optimize_b(MACROBLOCK *mb, int ib, int type, 225 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 226 { 227 BLOCK *b; 228 BLOCKD *d; 229 vp8_token_state tokens[17][2]; 230 unsigned best_mask[2]; 231 const short *dequant_ptr; 232 const short *coeff_ptr; 233 short *qcoeff_ptr; 234 short *dqcoeff_ptr; 235 int eob; 236 int i0; 237 int rc; 238 int x; 239 int sz = 0; 240 int next; 241 int rdmult; 242 int rddiv; 243 int final_eob; 244 int rd_cost0; 245 int rd_cost1; 246 int rate0; 247 int rate1; 248 int error0; 249 int error1; 250 int t0; 251 int t1; 252 int best; 253 int band; 254 int pt; 255 int i; 256 int err_mult = plane_rd_mult[type]; 257 258 b = &mb->block[ib]; 259 d = &mb->e_mbd.block[ib]; 260 261 /* Enable this to test the effect of RDO as a replacement for the dynamic 262 * zero bin instead of an augmentation of it. 263 */ 264 #if 0 265 vp8_strict_quantize_b(b, d); 266 #endif 267 268 dequant_ptr = d->dequant; 269 coeff_ptr = b->coeff; 270 qcoeff_ptr = d->qcoeff; 271 dqcoeff_ptr = d->dqcoeff; 272 i0 = !type; 273 eob = *d->eob; 274 275 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 276 rdmult = mb->rdmult * err_mult; 277 if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME) 278 rdmult = (rdmult * 9)>>4; 279 280 rddiv = mb->rddiv; 281 best_mask[0] = best_mask[1] = 0; 282 /* Initialize the sentinel node of the trellis. */ 283 tokens[eob][0].rate = 0; 284 tokens[eob][0].error = 0; 285 tokens[eob][0].next = 16; 286 tokens[eob][0].token = DCT_EOB_TOKEN; 287 tokens[eob][0].qc = 0; 288 *(tokens[eob] + 1) = *(tokens[eob] + 0); 289 next = eob; 290 for (i = eob; i-- > i0;) 291 { 292 int base_bits; 293 int d2; 294 int dx; 295 296 rc = vp8_default_zig_zag1d[i]; 297 x = qcoeff_ptr[rc]; 298 /* Only add a trellis state for non-zero coefficients. */ 299 if (x) 300 { 301 int shortcut=0; 302 error0 = tokens[next][0].error; 303 error1 = tokens[next][1].error; 304 /* Evaluate the first possibility for this state. */ 305 rate0 = tokens[next][0].rate; 306 rate1 = tokens[next][1].rate; 307 t0 = (vp8_dct_value_tokens_ptr + x)->Token; 308 /* Consider both possible successor states. */ 309 if (next < 16) 310 { 311 band = vp8_coef_bands[i + 1]; 312 pt = vp8_prev_token_class[t0]; 313 rate0 += 314 mb->token_costs[type][band][pt][tokens[next][0].token]; 315 rate1 += 316 mb->token_costs[type][band][pt][tokens[next][1].token]; 317 } 318 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 319 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 320 if (rd_cost0 == rd_cost1) 321 { 322 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 323 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 324 } 325 /* And pick the best. */ 326 best = rd_cost1 < rd_cost0; 327 base_bits = *(vp8_dct_value_cost_ptr + x); 328 dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; 329 d2 = dx*dx; 330 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 331 tokens[i][0].error = d2 + (best ? error1 : error0); 332 tokens[i][0].next = next; 333 tokens[i][0].token = t0; 334 tokens[i][0].qc = x; 335 best_mask[0] |= best << i; 336 /* Evaluate the second possibility for this state. */ 337 rate0 = tokens[next][0].rate; 338 rate1 = tokens[next][1].rate; 339 340 if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) && 341 (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc])) 342 shortcut = 1; 343 else 344 shortcut = 0; 345 346 if(shortcut) 347 { 348 sz = -(x < 0); 349 x -= 2*sz + 1; 350 } 351 352 /* Consider both possible successor states. */ 353 if (!x) 354 { 355 /* If we reduced this coefficient to zero, check to see if 356 * we need to move the EOB back here. 357 */ 358 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? 359 DCT_EOB_TOKEN : ZERO_TOKEN; 360 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? 361 DCT_EOB_TOKEN : ZERO_TOKEN; 362 } 363 else 364 { 365 t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token; 366 } 367 if (next < 16) 368 { 369 band = vp8_coef_bands[i + 1]; 370 if(t0!=DCT_EOB_TOKEN) 371 { 372 pt = vp8_prev_token_class[t0]; 373 rate0 += mb->token_costs[type][band][pt][ 374 tokens[next][0].token]; 375 } 376 if(t1!=DCT_EOB_TOKEN) 377 { 378 pt = vp8_prev_token_class[t1]; 379 rate1 += mb->token_costs[type][band][pt][ 380 tokens[next][1].token]; 381 } 382 } 383 384 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 385 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 386 if (rd_cost0 == rd_cost1) 387 { 388 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 389 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 390 } 391 /* And pick the best. */ 392 best = rd_cost1 < rd_cost0; 393 base_bits = *(vp8_dct_value_cost_ptr + x); 394 395 if(shortcut) 396 { 397 dx -= (dequant_ptr[rc] + sz) ^ sz; 398 d2 = dx*dx; 399 } 400 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 401 tokens[i][1].error = d2 + (best ? error1 : error0); 402 tokens[i][1].next = next; 403 tokens[i][1].token =best?t1:t0; 404 tokens[i][1].qc = x; 405 best_mask[1] |= best << i; 406 /* Finally, make this the new head of the trellis. */ 407 next = i; 408 } 409 /* There's no choice to make for a zero coefficient, so we don't 410 * add a new trellis node, but we do need to update the costs. 411 */ 412 else 413 { 414 band = vp8_coef_bands[i + 1]; 415 t0 = tokens[next][0].token; 416 t1 = tokens[next][1].token; 417 /* Update the cost of each path if we're past the EOB token. */ 418 if (t0 != DCT_EOB_TOKEN) 419 { 420 tokens[next][0].rate += mb->token_costs[type][band][0][t0]; 421 tokens[next][0].token = ZERO_TOKEN; 422 } 423 if (t1 != DCT_EOB_TOKEN) 424 { 425 tokens[next][1].rate += mb->token_costs[type][band][0][t1]; 426 tokens[next][1].token = ZERO_TOKEN; 427 } 428 /* Don't update next, because we didn't add a new node. */ 429 } 430 } 431 432 /* Now pick the best path through the whole trellis. */ 433 band = vp8_coef_bands[i + 1]; 434 VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); 435 rate0 = tokens[next][0].rate; 436 rate1 = tokens[next][1].rate; 437 error0 = tokens[next][0].error; 438 error1 = tokens[next][1].error; 439 t0 = tokens[next][0].token; 440 t1 = tokens[next][1].token; 441 rate0 += mb->token_costs[type][band][pt][t0]; 442 rate1 += mb->token_costs[type][band][pt][t1]; 443 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); 444 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); 445 if (rd_cost0 == rd_cost1) 446 { 447 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); 448 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); 449 } 450 best = rd_cost1 < rd_cost0; 451 final_eob = i0 - 1; 452 for (i = next; i < eob; i = next) 453 { 454 x = tokens[i][best].qc; 455 if (x) 456 final_eob = i; 457 rc = vp8_default_zig_zag1d[i]; 458 qcoeff_ptr[rc] = x; 459 dqcoeff_ptr[rc] = x * dequant_ptr[rc]; 460 next = tokens[i][best].next; 461 best = (best_mask[best] >> i) & 1; 462 } 463 final_eob++; 464 465 *a = *l = (final_eob != !type); 466 *d->eob = (char)final_eob; 467 } 468 static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, 469 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) 470 { 471 int sum=0; 472 int i; 473 BLOCKD *bd = &x->block[24]; 474 475 if(bd->dequant[0]>=35 && bd->dequant[1]>=35) 476 return; 477 478 for(i=0;i<(*bd->eob);i++) 479 { 480 int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]]; 481 sum+= (coef>=0)?coef:-coef; 482 if(sum>=35) 483 return; 484 } 485 /************************************************************************** 486 our inverse hadamard transform effectively is weighted sum of all 16 inputs 487 with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And 488 dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the 489 output after inverse wht and idct will be all zero. A sum of absolute value 490 smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht 491 fall between -35 and +35. 492 **************************************************************************/ 493 if(sum < 35) 494 { 495 for(i=0;i<(*bd->eob);i++) 496 { 497 int rc = vp8_default_zig_zag1d[i]; 498 bd->qcoeff[rc]=0; 499 bd->dqcoeff[rc]=0; 500 } 501 *bd->eob = 0; 502 *a = *l = (*bd->eob != !type); 503 } 504 } 505 506 static void optimize_mb(MACROBLOCK *x) 507 { 508 int b; 509 int type; 510 int has_2nd_order; 511 512 ENTROPY_CONTEXT_PLANES t_above, t_left; 513 ENTROPY_CONTEXT *ta; 514 ENTROPY_CONTEXT *tl; 515 516 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 517 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 518 519 ta = (ENTROPY_CONTEXT *)&t_above; 520 tl = (ENTROPY_CONTEXT *)&t_left; 521 522 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 523 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 524 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 525 526 for (b = 0; b < 16; b++) 527 { 528 optimize_b(x, b, type, 529 ta + vp8_block2above[b], tl + vp8_block2left[b]); 530 } 531 532 for (b = 16; b < 24; b++) 533 { 534 optimize_b(x, b, PLANE_TYPE_UV, 535 ta + vp8_block2above[b], tl + vp8_block2left[b]); 536 } 537 538 if (has_2nd_order) 539 { 540 b=24; 541 optimize_b(x, b, PLANE_TYPE_Y2, 542 ta + vp8_block2above[b], tl + vp8_block2left[b]); 543 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 544 ta + vp8_block2above[b], tl + vp8_block2left[b]); 545 } 546 } 547 548 549 void vp8_optimize_mby(MACROBLOCK *x) 550 { 551 int b; 552 int type; 553 int has_2nd_order; 554 555 ENTROPY_CONTEXT_PLANES t_above, t_left; 556 ENTROPY_CONTEXT *ta; 557 ENTROPY_CONTEXT *tl; 558 559 if (!x->e_mbd.above_context) 560 return; 561 562 if (!x->e_mbd.left_context) 563 return; 564 565 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 566 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 567 568 ta = (ENTROPY_CONTEXT *)&t_above; 569 tl = (ENTROPY_CONTEXT *)&t_left; 570 571 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED 572 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); 573 type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; 574 575 for (b = 0; b < 16; b++) 576 { 577 optimize_b(x, b, type, 578 ta + vp8_block2above[b], tl + vp8_block2left[b]); 579 } 580 581 582 if (has_2nd_order) 583 { 584 b=24; 585 optimize_b(x, b, PLANE_TYPE_Y2, 586 ta + vp8_block2above[b], tl + vp8_block2left[b]); 587 check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, 588 ta + vp8_block2above[b], tl + vp8_block2left[b]); 589 } 590 } 591 592 void vp8_optimize_mbuv(MACROBLOCK *x) 593 { 594 int b; 595 ENTROPY_CONTEXT_PLANES t_above, t_left; 596 ENTROPY_CONTEXT *ta; 597 ENTROPY_CONTEXT *tl; 598 599 if (!x->e_mbd.above_context) 600 return; 601 602 if (!x->e_mbd.left_context) 603 return; 604 605 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); 606 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); 607 608 ta = (ENTROPY_CONTEXT *)&t_above; 609 tl = (ENTROPY_CONTEXT *)&t_left; 610 611 for (b = 16; b < 24; b++) 612 { 613 optimize_b(x, b, PLANE_TYPE_UV, 614 ta + vp8_block2above[b], tl + vp8_block2left[b]); 615 } 616 } 617 618 void vp8_encode_inter16x16(MACROBLOCK *x) 619 { 620 vp8_build_inter_predictors_mb(&x->e_mbd); 621 622 vp8_subtract_mb(x); 623 624 transform_mb(x); 625 626 vp8_quantize_mb(x); 627 628 if (x->optimize) 629 optimize_mb(x); 630 } 631 632 /* this funciton is used by first pass only */ 633 void vp8_encode_inter16x16y(MACROBLOCK *x) 634 { 635 BLOCK *b = &x->block[0]; 636 637 vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer, 638 x->e_mbd.dst.y_stride); 639 640 vp8_subtract_mby(x->src_diff, *(b->base_src), 641 b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); 642 643 transform_mby(x); 644 645 vp8_quantize_mby(x); 646 647 vp8_inverse_transform_mby(&x->e_mbd); 648 } 649