1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "./vp9_rtcd.h" 13 #include "./vpx_config.h" 14 15 #include "vpx_mem/vpx_mem.h" 16 17 #include "vp9/common/vp9_idct.h" 18 #include "vp9/common/vp9_reconinter.h" 19 #include "vp9/common/vp9_reconintra.h" 20 #include "vp9/common/vp9_systemdependent.h" 21 22 #include "vp9/encoder/vp9_dct.h" 23 #include "vp9/encoder/vp9_encodemb.h" 24 #include "vp9/encoder/vp9_quantize.h" 25 #include "vp9/encoder/vp9_rdopt.h" 26 #include "vp9/encoder/vp9_tokenize.h" 27 28 void vp9_subtract_block_c(int rows, int cols, 29 int16_t *diff_ptr, ptrdiff_t diff_stride, 30 const uint8_t *src_ptr, ptrdiff_t src_stride, 31 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { 32 int r, c; 33 34 for (r = 0; r < rows; r++) { 35 for (c = 0; c < cols; c++) 36 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; 37 38 diff_ptr += diff_stride; 39 pred_ptr += pred_stride; 40 src_ptr += src_stride; 41 } 42 } 43 44 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 45 struct macroblock_plane *const p = &x->plane[plane]; 46 const MACROBLOCKD *const xd = &x->e_mbd; 47 const struct macroblockd_plane *const pd = &xd->plane[plane]; 48 const int bw = plane_block_width(bsize, pd); 49 const int bh = plane_block_height(bsize, pd); 50 51 vp9_subtract_block(bh, bw, p->src_diff, bw, 52 p->src.buf, p->src.stride, 53 pd->dst.buf, pd->dst.stride); 54 } 55 56 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { 57 subtract_plane(x, bsize, 0); 58 } 59 60 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { 61 int i; 62 63 for (i = 1; i < MAX_MB_PLANE; i++) 64 subtract_plane(x, bsize, i); 65 } 66 67 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 68 vp9_subtract_sby(x, bsize); 69 vp9_subtract_sbuv(x, bsize); 70 } 71 72 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) 73 typedef struct vp9_token_state vp9_token_state; 74 75 struct vp9_token_state { 76 int rate; 77 int error; 78 int next; 79 signed char token; 80 short qc; 81 }; 82 83 // TODO(jimbankoski): experiment to find optimal RD numbers. 84 #define Y1_RD_MULT 4 85 #define UV_RD_MULT 2 86 87 static const int plane_rd_mult[4] = { 88 Y1_RD_MULT, 89 UV_RD_MULT, 90 }; 91 92 #define UPDATE_RD_COST()\ 93 {\ 94 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ 95 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ 96 if (rd_cost0 == rd_cost1) {\ 97 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ 98 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ 99 }\ 100 } 101 102 // This function is a place holder for now but may ultimately need 103 // to scan previous tokens to work out the correct context. 104 static int trellis_get_coeff_context(const int16_t *scan, 105 const int16_t *nb, 106 int idx, int token, 107 uint8_t *token_cache) { 108 int bak = token_cache[scan[idx]], pt; 109 token_cache[scan[idx]] = vp9_pt_energy_class[token]; 110 pt = get_coef_context(nb, token_cache, idx + 1); 111 token_cache[scan[idx]] = bak; 112 return pt; 113 } 114 115 static void optimize_b(MACROBLOCK *mb, 116 int plane, int block, BLOCK_SIZE plane_bsize, 117 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 118 TX_SIZE tx_size) { 119 MACROBLOCKD *const xd = &mb->e_mbd; 120 struct macroblockd_plane *pd = &xd->plane[plane]; 121 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); 122 vp9_token_state tokens[1025][2]; 123 unsigned best_index[1025][2]; 124 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); 125 int16_t *qcoeff_ptr; 126 int16_t *dqcoeff_ptr; 127 int eob = pd->eobs[block], final_eob, sz = 0; 128 const int i0 = 0; 129 int rc, x, next, i; 130 int64_t rdmult, rddiv, rd_cost0, rd_cost1; 131 int rate0, rate1, error0, error1, t0, t1; 132 int best, band, pt; 133 PLANE_TYPE type = pd->plane_type; 134 int err_mult = plane_rd_mult[type]; 135 const int default_eob = 16 << (tx_size << 1); 136 const int16_t *scan, *nb; 137 const int mul = 1 + (tx_size == TX_32X32); 138 uint8_t token_cache[1024]; 139 const int16_t *dequant_ptr = pd->dequant; 140 const uint8_t *const band_translate = get_band_translate(tx_size); 141 142 assert((!type && !plane) || (type && plane)); 143 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); 144 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); 145 get_scan(xd, tx_size, type, block, &scan, &nb); 146 assert(eob <= default_eob); 147 148 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 149 rdmult = mb->rdmult * err_mult; 150 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) 151 rdmult = (rdmult * 9) >> 4; 152 rddiv = mb->rddiv; 153 /* Initialize the sentinel node of the trellis. */ 154 tokens[eob][0].rate = 0; 155 tokens[eob][0].error = 0; 156 tokens[eob][0].next = default_eob; 157 tokens[eob][0].token = DCT_EOB_TOKEN; 158 tokens[eob][0].qc = 0; 159 *(tokens[eob] + 1) = *(tokens[eob] + 0); 160 next = eob; 161 for (i = 0; i < eob; i++) 162 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ 163 qcoeff_ptr[scan[i]]].token]; 164 165 for (i = eob; i-- > i0;) { 166 int base_bits, d2, dx; 167 168 rc = scan[i]; 169 x = qcoeff_ptr[rc]; 170 /* Only add a trellis state for non-zero coefficients. */ 171 if (x) { 172 int shortcut = 0; 173 error0 = tokens[next][0].error; 174 error1 = tokens[next][1].error; 175 /* Evaluate the first possibility for this state. */ 176 rate0 = tokens[next][0].rate; 177 rate1 = tokens[next][1].rate; 178 t0 = (vp9_dct_value_tokens_ptr + x)->token; 179 /* Consider both possible successor states. */ 180 if (next < default_eob) { 181 band = band_translate[i + 1]; 182 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 183 rate0 += 184 mb->token_costs[tx_size][type][ref][band][0][pt] 185 [tokens[next][0].token]; 186 rate1 += 187 mb->token_costs[tx_size][type][ref][band][0][pt] 188 [tokens[next][1].token]; 189 } 190 UPDATE_RD_COST(); 191 /* And pick the best. */ 192 best = rd_cost1 < rd_cost0; 193 base_bits = *(vp9_dct_value_cost_ptr + x); 194 dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]); 195 d2 = dx * dx; 196 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 197 tokens[i][0].error = d2 + (best ? error1 : error0); 198 tokens[i][0].next = next; 199 tokens[i][0].token = t0; 200 tokens[i][0].qc = x; 201 best_index[i][0] = best; 202 203 /* Evaluate the second possibility for this state. */ 204 rate0 = tokens[next][0].rate; 205 rate1 = tokens[next][1].rate; 206 207 if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) && 208 (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul + 209 dequant_ptr[rc != 0])) 210 shortcut = 1; 211 else 212 shortcut = 0; 213 214 if (shortcut) { 215 sz = -(x < 0); 216 x -= 2 * sz + 1; 217 } 218 219 /* Consider both possible successor states. */ 220 if (!x) { 221 /* If we reduced this coefficient to zero, check to see if 222 * we need to move the EOB back here. 223 */ 224 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? 225 DCT_EOB_TOKEN : ZERO_TOKEN; 226 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? 227 DCT_EOB_TOKEN : ZERO_TOKEN; 228 } else { 229 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; 230 } 231 if (next < default_eob) { 232 band = band_translate[i + 1]; 233 if (t0 != DCT_EOB_TOKEN) { 234 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 235 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] 236 [tokens[next][0].token]; 237 } 238 if (t1 != DCT_EOB_TOKEN) { 239 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); 240 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] 241 [tokens[next][1].token]; 242 } 243 } 244 245 UPDATE_RD_COST(); 246 /* And pick the best. */ 247 best = rd_cost1 < rd_cost0; 248 base_bits = *(vp9_dct_value_cost_ptr + x); 249 250 if (shortcut) { 251 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; 252 d2 = dx * dx; 253 } 254 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 255 tokens[i][1].error = d2 + (best ? error1 : error0); 256 tokens[i][1].next = next; 257 tokens[i][1].token = best ? t1 : t0; 258 tokens[i][1].qc = x; 259 best_index[i][1] = best; 260 /* Finally, make this the new head of the trellis. */ 261 next = i; 262 } else { 263 /* There's no choice to make for a zero coefficient, so we don't 264 * add a new trellis node, but we do need to update the costs. 265 */ 266 band = band_translate[i + 1]; 267 t0 = tokens[next][0].token; 268 t1 = tokens[next][1].token; 269 /* Update the cost of each path if we're past the EOB token. */ 270 if (t0 != DCT_EOB_TOKEN) { 271 tokens[next][0].rate += 272 mb->token_costs[tx_size][type][ref][band][1][0][t0]; 273 tokens[next][0].token = ZERO_TOKEN; 274 } 275 if (t1 != DCT_EOB_TOKEN) { 276 tokens[next][1].rate += 277 mb->token_costs[tx_size][type][ref][band][1][0][t1]; 278 tokens[next][1].token = ZERO_TOKEN; 279 } 280 best_index[i][0] = best_index[i][1] = 0; 281 /* Don't update next, because we didn't add a new node. */ 282 } 283 } 284 285 /* Now pick the best path through the whole trellis. */ 286 band = band_translate[i + 1]; 287 pt = combine_entropy_contexts(*a, *l); 288 rate0 = tokens[next][0].rate; 289 rate1 = tokens[next][1].rate; 290 error0 = tokens[next][0].error; 291 error1 = tokens[next][1].error; 292 t0 = tokens[next][0].token; 293 t1 = tokens[next][1].token; 294 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; 295 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; 296 UPDATE_RD_COST(); 297 best = rd_cost1 < rd_cost0; 298 final_eob = i0 - 1; 299 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); 300 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); 301 for (i = next; i < eob; i = next) { 302 x = tokens[i][best].qc; 303 if (x) { 304 final_eob = i; 305 } 306 rc = scan[i]; 307 qcoeff_ptr[rc] = x; 308 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; 309 310 next = tokens[i][best].next; 311 best = best_index[i][best]; 312 } 313 final_eob++; 314 315 xd->plane[plane].eobs[block] = final_eob; 316 *a = *l = (final_eob > 0); 317 } 318 319 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, 320 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { 321 int x, y; 322 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); 323 optimize_b(mb, plane, block, plane_bsize, 324 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); 325 } 326 327 static void optimize_init_b(int plane, BLOCK_SIZE bsize, 328 struct encode_b_args *args) { 329 const MACROBLOCKD *xd = &args->x->e_mbd; 330 const struct macroblockd_plane* const pd = &xd->plane[plane]; 331 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 332 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 333 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 334 const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 335 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; 336 337 vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane], 338 pd->above_context, pd->left_context, 339 num_4x4_w, num_4x4_h); 340 } 341 342 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, 343 TX_SIZE tx_size, void *arg) { 344 struct encode_b_args* const args = arg; 345 MACROBLOCK* const x = args->x; 346 MACROBLOCKD* const xd = &x->e_mbd; 347 struct macroblock_plane *const p = &x->plane[plane]; 348 struct macroblockd_plane *const pd = &xd->plane[plane]; 349 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); 350 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); 351 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 352 const int16_t *scan, *iscan; 353 uint16_t *eob = &pd->eobs[block]; 354 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; 355 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; 356 int xoff, yoff; 357 int16_t *src_diff; 358 359 switch (tx_size) { 360 case TX_32X32: 361 scan = vp9_default_scan_32x32; 362 iscan = vp9_default_iscan_32x32; 363 block >>= 6; 364 xoff = 32 * (block & twmask); 365 yoff = 32 * (block >> twl); 366 src_diff = p->src_diff + 4 * bw * yoff + xoff; 367 if (x->use_lp32x32fdct) 368 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); 369 else 370 vp9_fdct32x32(src_diff, coeff, bw * 4); 371 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 372 p->quant, p->quant_shift, qcoeff, dqcoeff, 373 pd->dequant, p->zbin_extra, eob, scan, iscan); 374 break; 375 case TX_16X16: 376 scan = vp9_default_scan_16x16; 377 iscan = vp9_default_iscan_16x16; 378 block >>= 4; 379 xoff = 16 * (block & twmask); 380 yoff = 16 * (block >> twl); 381 src_diff = p->src_diff + 4 * bw * yoff + xoff; 382 vp9_fdct16x16(src_diff, coeff, bw * 4); 383 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 384 p->quant, p->quant_shift, qcoeff, dqcoeff, 385 pd->dequant, p->zbin_extra, eob, scan, iscan); 386 break; 387 case TX_8X8: 388 scan = vp9_default_scan_8x8; 389 iscan = vp9_default_iscan_8x8; 390 block >>= 2; 391 xoff = 8 * (block & twmask); 392 yoff = 8 * (block >> twl); 393 src_diff = p->src_diff + 4 * bw * yoff + xoff; 394 vp9_fdct8x8(src_diff, coeff, bw * 4); 395 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 396 p->quant, p->quant_shift, qcoeff, dqcoeff, 397 pd->dequant, p->zbin_extra, eob, scan, iscan); 398 break; 399 case TX_4X4: 400 scan = vp9_default_scan_4x4; 401 iscan = vp9_default_iscan_4x4; 402 xoff = 4 * (block & twmask); 403 yoff = 4 * (block >> twl); 404 src_diff = p->src_diff + 4 * bw * yoff + xoff; 405 x->fwd_txm4x4(src_diff, coeff, bw * 4); 406 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 407 p->quant, p->quant_shift, qcoeff, dqcoeff, 408 pd->dequant, p->zbin_extra, eob, scan, iscan); 409 break; 410 default: 411 assert(0); 412 } 413 } 414 415 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, 416 TX_SIZE tx_size, void *arg) { 417 struct encode_b_args *const args = arg; 418 MACROBLOCK *const x = args->x; 419 MACROBLOCKD *const xd = &x->e_mbd; 420 struct optimize_ctx *const ctx = args->ctx; 421 struct macroblockd_plane *const pd = &xd->plane[plane]; 422 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 423 int i, j; 424 uint8_t *dst; 425 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); 426 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; 427 428 // TODO(jingning): per transformed block zero forcing only enabled for 429 // luma component. will integrate chroma components as well. 430 if (x->zcoeff_blk[tx_size][block] && plane == 0) { 431 pd->eobs[block] = 0; 432 ctx->ta[plane][i] = 0; 433 ctx->tl[plane][j] = 0; 434 return; 435 } 436 437 if (!x->skip_recode) 438 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); 439 440 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 441 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx); 442 } else { 443 ctx->ta[plane][i] = pd->eobs[block] > 0; 444 ctx->tl[plane][j] = pd->eobs[block] > 0; 445 } 446 447 if (x->skip_encode || pd->eobs[block] == 0) 448 return; 449 450 switch (tx_size) { 451 case TX_32X32: 452 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); 453 break; 454 case TX_16X16: 455 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); 456 break; 457 case TX_8X8: 458 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); 459 break; 460 case TX_4X4: 461 // this is like vp9_short_idct4x4 but has a special case around eob<=1 462 // which is significant (not just an optimization) for the lossless 463 // case. 464 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); 465 break; 466 default: 467 assert(!"Invalid transform size"); 468 } 469 } 470 471 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, 472 TX_SIZE tx_size, void *arg) { 473 struct encode_b_args *const args = arg; 474 MACROBLOCK *const x = args->x; 475 MACROBLOCKD *const xd = &x->e_mbd; 476 struct macroblockd_plane *const pd = &xd->plane[plane]; 477 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, 478 block); 479 480 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 481 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, 482 pd->dst.buf, pd->dst.stride); 483 484 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); 485 486 if (pd->eobs[block] == 0) 487 return; 488 489 xd->itxm_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]); 490 } 491 492 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { 493 MACROBLOCKD *const xd = &x->e_mbd; 494 struct optimize_ctx ctx; 495 struct encode_b_args arg = {x, &ctx}; 496 497 vp9_subtract_sby(x, bsize); 498 if (x->optimize) 499 optimize_init_b(0, bsize, &arg); 500 501 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1, &arg); 502 } 503 504 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 505 MACROBLOCKD *const xd = &x->e_mbd; 506 struct optimize_ctx ctx; 507 struct encode_b_args arg = {x, &ctx}; 508 509 if (!x->skip_recode) 510 vp9_subtract_sb(x, bsize); 511 512 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 513 int i; 514 for (i = 0; i < MAX_MB_PLANE; ++i) 515 optimize_init_b(i, bsize, &arg); 516 } 517 518 foreach_transformed_block(xd, bsize, encode_block, &arg); 519 } 520 521 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, 522 TX_SIZE tx_size, void *arg) { 523 struct encode_b_args* const args = arg; 524 MACROBLOCK *const x = args->x; 525 MACROBLOCKD *const xd = &x->e_mbd; 526 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; 527 struct macroblock_plane *const p = &x->plane[plane]; 528 struct macroblockd_plane *const pd = &xd->plane[plane]; 529 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); 530 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); 531 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 532 const int16_t *scan, *iscan; 533 TX_TYPE tx_type; 534 MB_PREDICTION_MODE mode; 535 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; 536 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; 537 int xoff, yoff; 538 uint8_t *src, *dst; 539 int16_t *src_diff; 540 uint16_t *eob = &pd->eobs[block]; 541 542 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) 543 extend_for_intra(xd, plane_bsize, plane, block, tx_size); 544 545 // if (x->optimize) 546 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); 547 548 switch (tx_size) { 549 case TX_32X32: 550 scan = vp9_default_scan_32x32; 551 iscan = vp9_default_iscan_32x32; 552 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 553 block >>= 6; 554 xoff = 32 * (block & twmask); 555 yoff = 32 * (block >> twl); 556 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 557 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, 558 dst, pd->dst.stride, dst, pd->dst.stride); 559 560 if (!x->skip_recode) { 561 src = p->src.buf + yoff * p->src.stride + xoff; 562 src_diff = p->src_diff + 4 * bw * yoff + xoff; 563 vp9_subtract_block(32, 32, src_diff, bw * 4, 564 src, p->src.stride, dst, pd->dst.stride); 565 if (x->use_lp32x32fdct) 566 vp9_fdct32x32_rd(src_diff, coeff, bw * 4); 567 else 568 vp9_fdct32x32(src_diff, coeff, bw * 4); 569 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 570 p->quant, p->quant_shift, qcoeff, dqcoeff, 571 pd->dequant, p->zbin_extra, eob, scan, iscan); 572 } 573 if (!x->skip_encode && *eob) 574 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob); 575 break; 576 case TX_16X16: 577 tx_type = get_tx_type_16x16(pd->plane_type, xd); 578 scan = get_scan_16x16(tx_type); 579 iscan = get_iscan_16x16(tx_type); 580 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 581 block >>= 4; 582 xoff = 16 * (block & twmask); 583 yoff = 16 * (block >> twl); 584 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 585 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, 586 dst, pd->dst.stride, dst, pd->dst.stride); 587 if (!x->skip_recode) { 588 src = p->src.buf + yoff * p->src.stride + xoff; 589 src_diff = p->src_diff + 4 * bw * yoff + xoff; 590 vp9_subtract_block(16, 16, src_diff, bw * 4, 591 src, p->src.stride, dst, pd->dst.stride); 592 vp9_fht16x16(tx_type, src_diff, coeff, bw * 4); 593 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 594 p->quant, p->quant_shift, qcoeff, dqcoeff, 595 pd->dequant, p->zbin_extra, eob, scan, iscan); 596 } 597 if (!x->skip_encode && *eob) 598 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); 599 break; 600 case TX_8X8: 601 tx_type = get_tx_type_8x8(pd->plane_type, xd); 602 scan = get_scan_8x8(tx_type); 603 iscan = get_iscan_8x8(tx_type); 604 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 605 block >>= 2; 606 xoff = 8 * (block & twmask); 607 yoff = 8 * (block >> twl); 608 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 609 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, 610 dst, pd->dst.stride, dst, pd->dst.stride); 611 if (!x->skip_recode) { 612 src = p->src.buf + yoff * p->src.stride + xoff; 613 src_diff = p->src_diff + 4 * bw * yoff + xoff; 614 vp9_subtract_block(8, 8, src_diff, bw * 4, 615 src, p->src.stride, dst, pd->dst.stride); 616 vp9_fht8x8(tx_type, src_diff, coeff, bw * 4); 617 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 618 p->quant_shift, qcoeff, dqcoeff, 619 pd->dequant, p->zbin_extra, eob, scan, iscan); 620 } 621 if (!x->skip_encode && *eob) 622 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob); 623 break; 624 case TX_4X4: 625 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); 626 scan = get_scan_4x4(tx_type); 627 iscan = get_iscan_4x4(tx_type); 628 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) 629 mode = xd->mi_8x8[0]->bmi[block].as_mode; 630 else 631 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 632 633 xoff = 4 * (block & twmask); 634 yoff = 4 * (block >> twl); 635 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 636 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, 637 dst, pd->dst.stride, dst, pd->dst.stride); 638 639 if (!x->skip_recode) { 640 src = p->src.buf + yoff * p->src.stride + xoff; 641 src_diff = p->src_diff + 4 * bw * yoff + xoff; 642 vp9_subtract_block(4, 4, src_diff, bw * 4, 643 src, p->src.stride, dst, pd->dst.stride); 644 if (tx_type != DCT_DCT) 645 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); 646 else 647 x->fwd_txm4x4(src_diff, coeff, bw * 4); 648 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 649 p->quant_shift, qcoeff, dqcoeff, 650 pd->dequant, p->zbin_extra, eob, scan, iscan); 651 } 652 653 if (!x->skip_encode && *eob) { 654 if (tx_type == DCT_DCT) 655 // this is like vp9_short_idct4x4 but has a special case around eob<=1 656 // which is significant (not just an optimization) for the lossless 657 // case. 658 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob); 659 else 660 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type); 661 } 662 break; 663 default: 664 assert(0); 665 } 666 } 667 668 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { 669 MACROBLOCKD* const xd = &x->e_mbd; 670 struct optimize_ctx ctx; 671 struct encode_b_args arg = {x, &ctx}; 672 673 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, 674 &arg); 675 } 676 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { 677 MACROBLOCKD* const xd = &x->e_mbd; 678 struct optimize_ctx ctx; 679 struct encode_b_args arg = {x, &ctx}; 680 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); 681 } 682 683