1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_config.h" 12 #include "vp9/encoder/vp9_encodemb.h" 13 #include "vp9/common/vp9_reconinter.h" 14 #include "vp9/encoder/vp9_quantize.h" 15 #include "vp9/encoder/vp9_tokenize.h" 16 #include "vp9/common/vp9_reconintra.h" 17 #include "vpx_mem/vpx_mem.h" 18 #include "vp9/encoder/vp9_rdopt.h" 19 #include "vp9/common/vp9_systemdependent.h" 20 #include "vp9_rtcd.h" 21 22 DECLARE_ALIGNED(16, extern const uint8_t, 23 vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); 24 25 void vp9_subtract_block_c(int rows, int cols, 26 int16_t *diff_ptr, ptrdiff_t diff_stride, 27 const uint8_t *src_ptr, ptrdiff_t src_stride, 28 const uint8_t *pred_ptr, ptrdiff_t pred_stride) { 29 int r, c; 30 31 for (r = 0; r < rows; r++) { 32 for (c = 0; c < cols; c++) 33 diff_ptr[c] = src_ptr[c] - pred_ptr[c]; 34 35 diff_ptr += diff_stride; 36 pred_ptr += pred_stride; 37 src_ptr += src_stride; 38 } 39 } 40 41 static void inverse_transform_b_4x4_add(MACROBLOCKD *xd, int eob, 42 int16_t *dqcoeff, uint8_t *dest, 43 int stride) { 44 if (eob <= 1) 45 xd->inv_txm4x4_1_add(dqcoeff, dest, stride); 46 else 47 xd->inv_txm4x4_add(dqcoeff, dest, stride); 48 } 49 50 static void inverse_transform_b_8x8_add(int eob, 51 int16_t *dqcoeff, uint8_t *dest, 52 int stride) { 53 if (eob <= 1) 54 vp9_short_idct8x8_1_add(dqcoeff, dest, stride); 55 else if (eob <= 10) 56 vp9_short_idct10_8x8_add(dqcoeff, dest, stride); 57 else 58 vp9_short_idct8x8_add(dqcoeff, dest, stride); 59 } 60 61 static void inverse_transform_b_16x16_add(int eob, 62 int16_t *dqcoeff, uint8_t *dest, 63 int stride) { 64 if (eob <= 1) 65 vp9_short_idct16x16_1_add(dqcoeff, dest, stride); 66 else if (eob <= 10) 67 vp9_short_idct10_16x16_add(dqcoeff, dest, stride); 68 else 69 vp9_short_idct16x16_add(dqcoeff, dest, stride); 70 } 71 72 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 73 struct macroblock_plane *const p = &x->plane[plane]; 74 const MACROBLOCKD *const xd = &x->e_mbd; 75 const struct macroblockd_plane *const pd = &xd->plane[plane]; 76 const int bw = plane_block_width(bsize, pd); 77 const int bh = plane_block_height(bsize, pd); 78 79 vp9_subtract_block(bh, bw, p->src_diff, bw, 80 p->src.buf, p->src.stride, 81 pd->dst.buf, pd->dst.stride); 82 } 83 84 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { 85 subtract_plane(x, bsize, 0); 86 } 87 88 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) { 89 int i; 90 91 for (i = 1; i < MAX_MB_PLANE; i++) 92 subtract_plane(x, bsize, i); 93 } 94 95 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 96 vp9_subtract_sby(x, bsize); 97 vp9_subtract_sbuv(x, bsize); 98 } 99 100 101 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) 102 typedef struct vp9_token_state vp9_token_state; 103 104 struct vp9_token_state { 105 int rate; 106 int error; 107 int next; 108 signed char token; 109 short qc; 110 }; 111 112 // TODO: experiments to find optimal multiple numbers 113 #define Y1_RD_MULT 4 114 #define UV_RD_MULT 2 115 116 static const int plane_rd_mult[4] = { 117 Y1_RD_MULT, 118 UV_RD_MULT, 119 }; 120 121 #define UPDATE_RD_COST()\ 122 {\ 123 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\ 124 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\ 125 if (rd_cost0 == rd_cost1) {\ 126 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\ 127 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\ 128 }\ 129 } 130 131 // This function is a place holder for now but may ultimately need 132 // to scan previous tokens to work out the correct context. 133 static int trellis_get_coeff_context(const int16_t *scan, 134 const int16_t *nb, 135 int idx, int token, 136 uint8_t *token_cache) { 137 int bak = token_cache[scan[idx]], pt; 138 token_cache[scan[idx]] = vp9_pt_energy_class[token]; 139 pt = get_coef_context(nb, token_cache, idx + 1); 140 token_cache[scan[idx]] = bak; 141 return pt; 142 } 143 144 static void optimize_b(MACROBLOCK *mb, 145 int plane, int block, BLOCK_SIZE plane_bsize, 146 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, 147 TX_SIZE tx_size) { 148 MACROBLOCKD *const xd = &mb->e_mbd; 149 struct macroblockd_plane *pd = &xd->plane[plane]; 150 const int ref = is_inter_block(&xd->this_mi->mbmi); 151 vp9_token_state tokens[1025][2]; 152 unsigned best_index[1025][2]; 153 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block); 154 int16_t *qcoeff_ptr; 155 int16_t *dqcoeff_ptr; 156 int eob = pd->eobs[block], final_eob, sz = 0; 157 const int i0 = 0; 158 int rc, x, next, i; 159 int64_t rdmult, rddiv, rd_cost0, rd_cost1; 160 int rate0, rate1, error0, error1, t0, t1; 161 int best, band, pt; 162 PLANE_TYPE type = pd->plane_type; 163 int err_mult = plane_rd_mult[type]; 164 int default_eob; 165 const int16_t *scan, *nb; 166 const int mul = 1 + (tx_size == TX_32X32); 167 uint8_t token_cache[1024]; 168 const int ib = txfrm_block_to_raster_block(plane_bsize, tx_size, block); 169 const int16_t *dequant_ptr = pd->dequant; 170 const uint8_t * band_translate; 171 172 assert((!type && !plane) || (type && plane)); 173 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); 174 qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); 175 switch (tx_size) { 176 default: 177 case TX_4X4: 178 default_eob = 16; 179 scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib)); 180 band_translate = vp9_coefband_trans_4x4; 181 break; 182 case TX_8X8: 183 scan = get_scan_8x8(get_tx_type_8x8(type, xd)); 184 default_eob = 64; 185 band_translate = vp9_coefband_trans_8x8plus; 186 break; 187 case TX_16X16: 188 scan = get_scan_16x16(get_tx_type_16x16(type, xd)); 189 default_eob = 256; 190 band_translate = vp9_coefband_trans_8x8plus; 191 break; 192 case TX_32X32: 193 scan = vp9_default_scan_32x32; 194 default_eob = 1024; 195 band_translate = vp9_coefband_trans_8x8plus; 196 break; 197 } 198 assert(eob <= default_eob); 199 200 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 201 rdmult = mb->rdmult * err_mult; 202 if (mb->e_mbd.mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) 203 rdmult = (rdmult * 9) >> 4; 204 rddiv = mb->rddiv; 205 /* Initialize the sentinel node of the trellis. */ 206 tokens[eob][0].rate = 0; 207 tokens[eob][0].error = 0; 208 tokens[eob][0].next = default_eob; 209 tokens[eob][0].token = DCT_EOB_TOKEN; 210 tokens[eob][0].qc = 0; 211 *(tokens[eob] + 1) = *(tokens[eob] + 0); 212 next = eob; 213 for (i = 0; i < eob; i++) 214 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ 215 qcoeff_ptr[scan[i]]].token]; 216 nb = vp9_get_coef_neighbors_handle(scan); 217 218 for (i = eob; i-- > i0;) { 219 int base_bits, d2, dx; 220 221 rc = scan[i]; 222 x = qcoeff_ptr[rc]; 223 /* Only add a trellis state for non-zero coefficients. */ 224 if (x) { 225 int shortcut = 0; 226 error0 = tokens[next][0].error; 227 error1 = tokens[next][1].error; 228 /* Evaluate the first possibility for this state. */ 229 rate0 = tokens[next][0].rate; 230 rate1 = tokens[next][1].rate; 231 t0 = (vp9_dct_value_tokens_ptr + x)->token; 232 /* Consider both possible successor states. */ 233 if (next < default_eob) { 234 band = get_coef_band(band_translate, i + 1); 235 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 236 rate0 += 237 mb->token_costs[tx_size][type][ref][band][0][pt] 238 [tokens[next][0].token]; 239 rate1 += 240 mb->token_costs[tx_size][type][ref][band][0][pt] 241 [tokens[next][1].token]; 242 } 243 UPDATE_RD_COST(); 244 /* And pick the best. */ 245 best = rd_cost1 < rd_cost0; 246 base_bits = *(vp9_dct_value_cost_ptr + x); 247 dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]); 248 d2 = dx * dx; 249 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 250 tokens[i][0].error = d2 + (best ? error1 : error0); 251 tokens[i][0].next = next; 252 tokens[i][0].token = t0; 253 tokens[i][0].qc = x; 254 best_index[i][0] = best; 255 256 /* Evaluate the second possibility for this state. */ 257 rate0 = tokens[next][0].rate; 258 rate1 = tokens[next][1].rate; 259 260 if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) && 261 (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul + 262 dequant_ptr[rc != 0])) 263 shortcut = 1; 264 else 265 shortcut = 0; 266 267 if (shortcut) { 268 sz = -(x < 0); 269 x -= 2 * sz + 1; 270 } 271 272 /* Consider both possible successor states. */ 273 if (!x) { 274 /* If we reduced this coefficient to zero, check to see if 275 * we need to move the EOB back here. 276 */ 277 t0 = tokens[next][0].token == DCT_EOB_TOKEN ? 278 DCT_EOB_TOKEN : ZERO_TOKEN; 279 t1 = tokens[next][1].token == DCT_EOB_TOKEN ? 280 DCT_EOB_TOKEN : ZERO_TOKEN; 281 } else { 282 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token; 283 } 284 if (next < default_eob) { 285 band = get_coef_band(band_translate, i + 1); 286 if (t0 != DCT_EOB_TOKEN) { 287 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 288 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] 289 [tokens[next][0].token]; 290 } 291 if (t1 != DCT_EOB_TOKEN) { 292 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); 293 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] 294 [tokens[next][1].token]; 295 } 296 } 297 298 UPDATE_RD_COST(); 299 /* And pick the best. */ 300 best = rd_cost1 < rd_cost0; 301 base_bits = *(vp9_dct_value_cost_ptr + x); 302 303 if (shortcut) { 304 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; 305 d2 = dx * dx; 306 } 307 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 308 tokens[i][1].error = d2 + (best ? error1 : error0); 309 tokens[i][1].next = next; 310 tokens[i][1].token = best ? t1 : t0; 311 tokens[i][1].qc = x; 312 best_index[i][1] = best; 313 /* Finally, make this the new head of the trellis. */ 314 next = i; 315 } 316 /* There's no choice to make for a zero coefficient, so we don't 317 * add a new trellis node, but we do need to update the costs. 318 */ 319 else { 320 band = get_coef_band(band_translate, i + 1); 321 t0 = tokens[next][0].token; 322 t1 = tokens[next][1].token; 323 /* Update the cost of each path if we're past the EOB token. */ 324 if (t0 != DCT_EOB_TOKEN) { 325 tokens[next][0].rate += 326 mb->token_costs[tx_size][type][ref][band][1][0][t0]; 327 tokens[next][0].token = ZERO_TOKEN; 328 } 329 if (t1 != DCT_EOB_TOKEN) { 330 tokens[next][1].rate += 331 mb->token_costs[tx_size][type][ref][band][1][0][t1]; 332 tokens[next][1].token = ZERO_TOKEN; 333 } 334 best_index[i][0] = best_index[i][1] = 0; 335 /* Don't update next, because we didn't add a new node. */ 336 } 337 } 338 339 /* Now pick the best path through the whole trellis. */ 340 band = get_coef_band(band_translate, i + 1); 341 pt = combine_entropy_contexts(*a, *l); 342 rate0 = tokens[next][0].rate; 343 rate1 = tokens[next][1].rate; 344 error0 = tokens[next][0].error; 345 error1 = tokens[next][1].error; 346 t0 = tokens[next][0].token; 347 t1 = tokens[next][1].token; 348 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; 349 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; 350 UPDATE_RD_COST(); 351 best = rd_cost1 < rd_cost0; 352 final_eob = i0 - 1; 353 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2))); 354 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2))); 355 for (i = next; i < eob; i = next) { 356 x = tokens[i][best].qc; 357 if (x) { 358 final_eob = i; 359 } 360 rc = scan[i]; 361 qcoeff_ptr[rc] = x; 362 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul; 363 364 next = tokens[i][best].next; 365 best = best_index[i][best]; 366 } 367 final_eob++; 368 369 xd->plane[plane].eobs[block] = final_eob; 370 *a = *l = (final_eob > 0); 371 } 372 373 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, 374 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) { 375 int x, y; 376 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); 377 optimize_b(mb, plane, block, plane_bsize, 378 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size); 379 } 380 381 static void optimize_init_b(int plane, BLOCK_SIZE bsize, 382 struct encode_b_args *args) { 383 const MACROBLOCKD *xd = &args->x->e_mbd; 384 const struct macroblockd_plane* const pd = &xd->plane[plane]; 385 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 386 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; 387 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; 388 const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 389 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; 390 int i; 391 392 switch (tx_size) { 393 case TX_4X4: 394 vpx_memcpy(args->ctx->ta[plane], pd->above_context, 395 sizeof(ENTROPY_CONTEXT) * num_4x4_w); 396 vpx_memcpy(args->ctx->tl[plane], pd->left_context, 397 sizeof(ENTROPY_CONTEXT) * num_4x4_h); 398 break; 399 case TX_8X8: 400 for (i = 0; i < num_4x4_w; i += 2) 401 args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i]; 402 for (i = 0; i < num_4x4_h; i += 2) 403 args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i]; 404 break; 405 case TX_16X16: 406 for (i = 0; i < num_4x4_w; i += 4) 407 args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i]; 408 for (i = 0; i < num_4x4_h; i += 4) 409 args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i]; 410 break; 411 case TX_32X32: 412 for (i = 0; i < num_4x4_w; i += 8) 413 args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i]; 414 for (i = 0; i < num_4x4_h; i += 8) 415 args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i]; 416 break; 417 default: 418 assert(0); 419 } 420 } 421 422 void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, 423 TX_SIZE tx_size, void *arg) { 424 struct encode_b_args* const args = arg; 425 MACROBLOCK* const x = args->x; 426 MACROBLOCKD* const xd = &x->e_mbd; 427 struct macroblock_plane *const p = &x->plane[plane]; 428 struct macroblockd_plane *const pd = &xd->plane[plane]; 429 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); 430 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); 431 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 432 const int16_t *scan, *iscan; 433 uint16_t *eob = &pd->eobs[block]; 434 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; 435 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; 436 int xoff, yoff; 437 int16_t *src_diff; 438 439 switch (tx_size) { 440 case TX_32X32: 441 scan = vp9_default_scan_32x32; 442 iscan = vp9_default_iscan_32x32; 443 block >>= 6; 444 xoff = 32 * (block & twmask); 445 yoff = 32 * (block >> twl); 446 src_diff = p->src_diff + 4 * bw * yoff + xoff; 447 if (x->use_lp32x32fdct) 448 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); 449 else 450 vp9_short_fdct32x32(src_diff, coeff, bw * 8); 451 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 452 p->quant, p->quant_shift, qcoeff, dqcoeff, 453 pd->dequant, p->zbin_extra, eob, scan, iscan); 454 break; 455 case TX_16X16: 456 scan = vp9_default_scan_16x16; 457 iscan = vp9_default_iscan_16x16; 458 block >>= 4; 459 xoff = 16 * (block & twmask); 460 yoff = 16 * (block >> twl); 461 src_diff = p->src_diff + 4 * bw * yoff + xoff; 462 x->fwd_txm16x16(src_diff, coeff, bw * 8); 463 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 464 p->quant, p->quant_shift, qcoeff, dqcoeff, 465 pd->dequant, p->zbin_extra, eob, scan, iscan); 466 break; 467 case TX_8X8: 468 scan = vp9_default_scan_8x8; 469 iscan = vp9_default_iscan_8x8; 470 block >>= 2; 471 xoff = 8 * (block & twmask); 472 yoff = 8 * (block >> twl); 473 src_diff = p->src_diff + 4 * bw * yoff + xoff; 474 x->fwd_txm8x8(src_diff, coeff, bw * 8); 475 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 476 p->quant, p->quant_shift, qcoeff, dqcoeff, 477 pd->dequant, p->zbin_extra, eob, scan, iscan); 478 break; 479 case TX_4X4: 480 scan = vp9_default_scan_4x4; 481 iscan = vp9_default_iscan_4x4; 482 xoff = 4 * (block & twmask); 483 yoff = 4 * (block >> twl); 484 src_diff = p->src_diff + 4 * bw * yoff + xoff; 485 x->fwd_txm4x4(src_diff, coeff, bw * 8); 486 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 487 p->quant, p->quant_shift, qcoeff, dqcoeff, 488 pd->dequant, p->zbin_extra, eob, scan, iscan); 489 break; 490 default: 491 assert(0); 492 } 493 } 494 495 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, 496 TX_SIZE tx_size, void *arg) { 497 struct encode_b_args *const args = arg; 498 MACROBLOCK *const x = args->x; 499 MACROBLOCKD *const xd = &x->e_mbd; 500 struct macroblockd_plane *const pd = &xd->plane[plane]; 501 const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, 502 block); 503 504 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 505 uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, 506 pd->dst.buf, pd->dst.stride); 507 vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); 508 509 if (x->optimize) 510 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); 511 512 if (x->skip_encode || pd->eobs[block] == 0) 513 return; 514 515 switch (tx_size) { 516 case TX_32X32: 517 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); 518 break; 519 case TX_16X16: 520 inverse_transform_b_16x16_add(pd->eobs[block], dqcoeff, dst, 521 pd->dst.stride); 522 break; 523 case TX_8X8: 524 inverse_transform_b_8x8_add(pd->eobs[block], dqcoeff, dst, 525 pd->dst.stride); 526 break; 527 case TX_4X4: 528 // this is like vp9_short_idct4x4 but has a special case around eob<=1 529 // which is significant (not just an optimization) for the lossless 530 // case. 531 inverse_transform_b_4x4_add(xd, pd->eobs[block], dqcoeff, 532 dst, pd->dst.stride); 533 break; 534 default: 535 assert(!"Invalid transform size"); 536 } 537 } 538 539 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) { 540 MACROBLOCKD *const xd = &x->e_mbd; 541 struct optimize_ctx ctx; 542 struct encode_b_args arg = {x, &ctx}; 543 544 vp9_subtract_sby(x, bsize); 545 if (x->optimize) 546 optimize_init_b(0, bsize, &arg); 547 548 foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg); 549 } 550 551 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 552 MACROBLOCKD *const xd = &x->e_mbd; 553 struct optimize_ctx ctx; 554 struct encode_b_args arg = {x, &ctx}; 555 556 vp9_subtract_sb(x, bsize); 557 558 if (x->optimize) { 559 int i; 560 for (i = 0; i < MAX_MB_PLANE; ++i) 561 optimize_init_b(i, bsize, &arg); 562 } 563 564 foreach_transformed_block(xd, bsize, encode_block, &arg); 565 } 566 567 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, 568 TX_SIZE tx_size, void *arg) { 569 struct encode_b_args* const args = arg; 570 MACROBLOCK *const x = args->x; 571 MACROBLOCKD *const xd = &x->e_mbd; 572 MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; 573 struct macroblock_plane *const p = &x->plane[plane]; 574 struct macroblockd_plane *const pd = &xd->plane[plane]; 575 int16_t *coeff = BLOCK_OFFSET(p->coeff, block); 576 int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block); 577 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 578 const int16_t *scan, *iscan; 579 TX_TYPE tx_type; 580 MB_PREDICTION_MODE mode; 581 const int bwl = b_width_log2(plane_bsize), bw = 1 << bwl; 582 const int twl = bwl - tx_size, twmask = (1 << twl) - 1; 583 int xoff, yoff; 584 uint8_t *src, *dst; 585 int16_t *src_diff; 586 uint16_t *eob = &pd->eobs[block]; 587 588 if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) 589 extend_for_intra(xd, plane_bsize, plane, block, tx_size); 590 591 // if (x->optimize) 592 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); 593 594 switch (tx_size) { 595 case TX_32X32: 596 scan = vp9_default_scan_32x32; 597 iscan = vp9_default_iscan_32x32; 598 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 599 block >>= 6; 600 xoff = 32 * (block & twmask); 601 yoff = 32 * (block >> twl); 602 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 603 src = p->src.buf + yoff * p->src.stride + xoff; 604 src_diff = p->src_diff + 4 * bw * yoff + xoff; 605 vp9_predict_intra_block(xd, block, bwl, TX_32X32, mode, 606 dst, pd->dst.stride, dst, pd->dst.stride); 607 vp9_subtract_block(32, 32, src_diff, bw * 4, 608 src, p->src.stride, dst, pd->dst.stride); 609 if (x->use_lp32x32fdct) 610 vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8); 611 else 612 vp9_short_fdct32x32(src_diff, coeff, bw * 8); 613 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 614 p->quant, p->quant_shift, qcoeff, dqcoeff, 615 pd->dequant, p->zbin_extra, eob, scan, iscan); 616 if (!x->skip_encode && *eob) 617 vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride); 618 break; 619 case TX_16X16: 620 tx_type = get_tx_type_16x16(pd->plane_type, xd); 621 scan = get_scan_16x16(tx_type); 622 iscan = get_iscan_16x16(tx_type); 623 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 624 block >>= 4; 625 xoff = 16 * (block & twmask); 626 yoff = 16 * (block >> twl); 627 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 628 src = p->src.buf + yoff * p->src.stride + xoff; 629 src_diff = p->src_diff + 4 * bw * yoff + xoff; 630 vp9_predict_intra_block(xd, block, bwl, TX_16X16, mode, 631 dst, pd->dst.stride, dst, pd->dst.stride); 632 vp9_subtract_block(16, 16, src_diff, bw * 4, 633 src, p->src.stride, dst, pd->dst.stride); 634 if (tx_type != DCT_DCT) 635 vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type); 636 else 637 x->fwd_txm16x16(src_diff, coeff, bw * 8); 638 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 639 p->quant, p->quant_shift, qcoeff, dqcoeff, 640 pd->dequant, p->zbin_extra, eob, scan, iscan); 641 if (!x->skip_encode && *eob) { 642 if (tx_type == DCT_DCT) 643 inverse_transform_b_16x16_add(*eob, dqcoeff, dst, pd->dst.stride); 644 else 645 vp9_short_iht16x16_add(dqcoeff, dst, pd->dst.stride, tx_type); 646 } 647 break; 648 case TX_8X8: 649 tx_type = get_tx_type_8x8(pd->plane_type, xd); 650 scan = get_scan_8x8(tx_type); 651 iscan = get_iscan_8x8(tx_type); 652 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 653 block >>= 2; 654 xoff = 8 * (block & twmask); 655 yoff = 8 * (block >> twl); 656 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 657 src = p->src.buf + yoff * p->src.stride + xoff; 658 src_diff = p->src_diff + 4 * bw * yoff + xoff; 659 vp9_predict_intra_block(xd, block, bwl, TX_8X8, mode, 660 dst, pd->dst.stride, dst, pd->dst.stride); 661 vp9_subtract_block(8, 8, src_diff, bw * 4, 662 src, p->src.stride, dst, pd->dst.stride); 663 if (tx_type != DCT_DCT) 664 vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type); 665 else 666 x->fwd_txm8x8(src_diff, coeff, bw * 8); 667 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 668 p->quant_shift, qcoeff, dqcoeff, 669 pd->dequant, p->zbin_extra, eob, scan, iscan); 670 if (!x->skip_encode && *eob) { 671 if (tx_type == DCT_DCT) 672 inverse_transform_b_8x8_add(*eob, dqcoeff, dst, pd->dst.stride); 673 else 674 vp9_short_iht8x8_add(dqcoeff, dst, pd->dst.stride, tx_type); 675 } 676 break; 677 case TX_4X4: 678 tx_type = get_tx_type_4x4(pd->plane_type, xd, block); 679 scan = get_scan_4x4(tx_type); 680 iscan = get_iscan_4x4(tx_type); 681 if (mbmi->sb_type < BLOCK_8X8 && plane == 0) 682 mode = xd->this_mi->bmi[block].as_mode; 683 else 684 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; 685 686 xoff = 4 * (block & twmask); 687 yoff = 4 * (block >> twl); 688 dst = pd->dst.buf + yoff * pd->dst.stride + xoff; 689 src = p->src.buf + yoff * p->src.stride + xoff; 690 src_diff = p->src_diff + 4 * bw * yoff + xoff; 691 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, 692 dst, pd->dst.stride, dst, pd->dst.stride); 693 vp9_subtract_block(4, 4, src_diff, bw * 4, 694 src, p->src.stride, dst, pd->dst.stride); 695 if (tx_type != DCT_DCT) 696 vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type); 697 else 698 x->fwd_txm4x4(src_diff, coeff, bw * 8); 699 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 700 p->quant_shift, qcoeff, dqcoeff, 701 pd->dequant, p->zbin_extra, eob, scan, iscan); 702 if (!x->skip_encode && *eob) { 703 if (tx_type == DCT_DCT) 704 // this is like vp9_short_idct4x4 but has a special case around eob<=1 705 // which is significant (not just an optimization) for the lossless 706 // case. 707 inverse_transform_b_4x4_add(xd, *eob, dqcoeff, dst, pd->dst.stride); 708 else 709 vp9_short_iht4x4_add(dqcoeff, dst, pd->dst.stride, tx_type); 710 } 711 break; 712 default: 713 assert(0); 714 } 715 } 716 717 void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize) { 718 MACROBLOCKD* const xd = &x->e_mbd; 719 struct optimize_ctx ctx; 720 struct encode_b_args arg = {x, &ctx}; 721 722 foreach_transformed_block_in_plane(xd, bsize, 0, vp9_encode_block_intra, 723 &arg); 724 } 725 void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize) { 726 MACROBLOCKD* const xd = &x->e_mbd; 727 struct optimize_ctx ctx; 728 struct encode_b_args arg = {x, &ctx}; 729 foreach_transformed_block_uv(xd, bsize, vp9_encode_block_intra, &arg); 730 } 731 732