1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vp9_rtcd.h" 12 #include "./vpx_config.h" 13 #include "./vpx_dsp_rtcd.h" 14 15 #include "vpx_dsp/quantize.h" 16 #include "vpx_mem/vpx_mem.h" 17 #include "vpx_ports/mem.h" 18 19 #include "vp9/common/vp9_idct.h" 20 #include "vp9/common/vp9_reconinter.h" 21 #include "vp9/common/vp9_reconintra.h" 22 #include "vp9/common/vp9_scan.h" 23 24 #include "vp9/encoder/vp9_encodemb.h" 25 #include "vp9/encoder/vp9_rd.h" 26 #include "vp9/encoder/vp9_tokenize.h" 27 28 struct optimize_ctx { 29 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; 30 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; 31 }; 32 33 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 34 struct macroblock_plane *const p = &x->plane[plane]; 35 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; 36 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 37 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 38 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 39 40 #if CONFIG_VP9_HIGHBITDEPTH 41 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 42 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, 43 p->src.stride, pd->dst.buf, pd->dst.stride, 44 x->e_mbd.bd); 45 return; 46 } 47 #endif // CONFIG_VP9_HIGHBITDEPTH 48 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, 49 pd->dst.buf, pd->dst.stride); 50 } 51 52 typedef struct vp9_token_state { 53 int64_t error; 54 int rate; 55 int16_t next; 56 int16_t token; 57 tran_low_t qc; 58 tran_low_t dqc; 59 uint8_t best_index; 60 } vp9_token_state; 61 62 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { 63 { 10, 6 }, { 8, 5 }, 64 }; 65 66 #define UPDATE_RD_COST() \ 67 { \ 68 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \ 69 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \ 70 } 71 72 // This function is a place holder for now but may ultimately need 73 // to scan previous tokens to work out the correct context. 74 static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb, 75 int idx, int token, uint8_t *token_cache) { 76 int bak = token_cache[scan[idx]], pt; 77 token_cache[scan[idx]] = vp9_pt_energy_class[token]; 78 pt = get_coef_context(nb, token_cache, idx + 1); 79 token_cache[scan[idx]] = bak; 80 return pt; 81 } 82 83 static const int16_t band_count_table[TX_SIZES][8] = { 84 { 1, 2, 3, 4, 3, 16 - 13, 0 }, 85 { 1, 2, 3, 4, 11, 64 - 21, 0 }, 86 { 1, 2, 3, 4, 11, 256 - 21, 0 }, 87 { 1, 2, 3, 4, 11, 1024 - 21, 0 }, 88 }; 89 static const int16_t band_cum_count_table[TX_SIZES][8] = { 90 { 0, 1, 3, 6, 10, 13, 16, 0 }, 91 { 0, 1, 3, 6, 10, 21, 64, 0 }, 92 { 0, 1, 3, 6, 10, 21, 256, 0 }, 93 { 0, 1, 3, 6, 10, 21, 1024, 0 }, 94 }; 95 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, 96 int ctx) { 97 MACROBLOCKD *const xd = &mb->e_mbd; 98 struct macroblock_plane *const p = &mb->plane[plane]; 99 struct macroblockd_plane *const pd = &xd->plane[plane]; 100 const int ref = is_inter_block(xd->mi[0]); 101 vp9_token_state tokens[1025][2]; 102 uint8_t token_cache[1024]; 103 const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); 104 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 105 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 106 const int eob = p->eobs[block]; 107 const PLANE_TYPE type = get_plane_type(plane); 108 const int default_eob = 16 << (tx_size << 1); 109 const int shift = (tx_size == TX_32X32); 110 const int16_t *const dequant_ptr = pd->dequant; 111 const uint8_t *const band_translate = get_band_translate(tx_size); 112 const scan_order *const so = get_scan(xd, tx_size, type, block); 113 const int16_t *const scan = so->scan; 114 const int16_t *const nb = so->neighbors; 115 const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift }; 116 int next = eob, sz = 0; 117 const int64_t rdmult = ((int64_t)mb->rdmult * plane_rd_mult[ref][type]) >> 1; 118 const int64_t rddiv = mb->rddiv; 119 int64_t rd_cost0, rd_cost1; 120 int rate0, rate1; 121 int64_t error0, error1; 122 int16_t t0, t1; 123 int best, band = (eob < default_eob) ? band_translate[eob] 124 : band_translate[eob - 1]; 125 int pt, i, final_eob; 126 #if CONFIG_VP9_HIGHBITDEPTH 127 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); 128 #else 129 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); 130 #endif 131 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 132 mb->token_costs[tx_size][type][ref]; 133 const int16_t *band_counts = &band_count_table[tx_size][band]; 134 int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1; 135 136 token_costs += band; 137 138 assert((!type && !plane) || (type && plane)); 139 assert(eob <= default_eob); 140 141 /* Now set up a Viterbi trellis to evaluate alternative roundings. */ 142 /* Initialize the sentinel node of the trellis. */ 143 tokens[eob][0].rate = 0; 144 tokens[eob][0].error = 0; 145 tokens[eob][0].next = default_eob; 146 tokens[eob][0].token = EOB_TOKEN; 147 tokens[eob][0].qc = 0; 148 tokens[eob][1] = tokens[eob][0]; 149 150 for (i = 0; i < eob; i++) { 151 const int rc = scan[i]; 152 token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; 153 } 154 155 for (i = eob; i-- > 0;) { 156 int base_bits, d2, dx; 157 const int rc = scan[i]; 158 int x = qcoeff[rc]; 159 /* Only add a trellis state for non-zero coefficients. */ 160 if (x) { 161 error0 = tokens[next][0].error; 162 error1 = tokens[next][1].error; 163 /* Evaluate the first possibility for this state. */ 164 rate0 = tokens[next][0].rate; 165 rate1 = tokens[next][1].rate; 166 base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); 167 /* Consider both possible successor states. */ 168 if (next < default_eob) { 169 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 170 rate0 += (*token_costs)[0][pt][tokens[next][0].token]; 171 rate1 += (*token_costs)[0][pt][tokens[next][1].token]; 172 } 173 UPDATE_RD_COST(); 174 /* And pick the best. */ 175 best = rd_cost1 < rd_cost0; 176 dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); 177 #if CONFIG_VP9_HIGHBITDEPTH 178 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 179 dx >>= xd->bd - 8; 180 } 181 #endif // CONFIG_VP9_HIGHBITDEPTH 182 d2 = dx * dx; 183 tokens[i][0].rate = base_bits + (best ? rate1 : rate0); 184 tokens[i][0].error = d2 + (best ? error1 : error0); 185 tokens[i][0].next = next; 186 tokens[i][0].token = t0; 187 tokens[i][0].qc = x; 188 tokens[i][0].dqc = dqcoeff[rc]; 189 tokens[i][0].best_index = best; 190 191 /* Evaluate the second possibility for this state. */ 192 rate0 = tokens[next][0].rate; 193 rate1 = tokens[next][1].rate; 194 195 if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) && 196 (abs(x) * dequant_ptr[rc != 0] < 197 (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])) { 198 sz = -(x < 0); 199 x -= 2 * sz + 1; 200 } else { 201 tokens[i][1] = tokens[i][0]; 202 next = i; 203 204 if (!(--band_left)) { 205 --band_counts; 206 band_left = *band_counts; 207 --token_costs; 208 } 209 continue; 210 } 211 212 /* Consider both possible successor states. */ 213 if (!x) { 214 /* If we reduced this coefficient to zero, check to see if 215 * we need to move the EOB back here. 216 */ 217 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; 218 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; 219 base_bits = 0; 220 } else { 221 base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); 222 t1 = t0; 223 } 224 if (next < default_eob) { 225 if (t0 != EOB_TOKEN) { 226 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); 227 rate0 += (*token_costs)[!x][pt][tokens[next][0].token]; 228 } 229 if (t1 != EOB_TOKEN) { 230 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); 231 rate1 += (*token_costs)[!x][pt][tokens[next][1].token]; 232 } 233 } 234 235 UPDATE_RD_COST(); 236 /* And pick the best. */ 237 best = rd_cost1 < rd_cost0; 238 239 #if CONFIG_VP9_HIGHBITDEPTH 240 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 241 dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz; 242 } else { 243 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; 244 } 245 #else 246 dx -= (dequant_ptr[rc != 0] + sz) ^ sz; 247 #endif // CONFIG_VP9_HIGHBITDEPTH 248 d2 = dx * dx; 249 250 tokens[i][1].rate = base_bits + (best ? rate1 : rate0); 251 tokens[i][1].error = d2 + (best ? error1 : error0); 252 tokens[i][1].next = next; 253 tokens[i][1].token = best ? t1 : t0; 254 tokens[i][1].qc = x; 255 256 if (x) { 257 tran_low_t offset = dq_step[rc != 0]; 258 // The 32x32 transform coefficient uses half quantization step size. 259 // Account for the rounding difference in the dequantized coefficeint 260 // value when the quantization index is dropped from an even number 261 // to an odd number. 262 if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01); 263 264 if (sz == 0) 265 tokens[i][1].dqc = dqcoeff[rc] - offset; 266 else 267 tokens[i][1].dqc = dqcoeff[rc] + offset; 268 } else { 269 tokens[i][1].dqc = 0; 270 } 271 272 tokens[i][1].best_index = best; 273 /* Finally, make this the new head of the trellis. */ 274 next = i; 275 } else { 276 /* There's no choice to make for a zero coefficient, so we don't 277 * add a new trellis node, but we do need to update the costs. 278 */ 279 pt = get_coef_context(nb, token_cache, i + 1); 280 t0 = tokens[next][0].token; 281 t1 = tokens[next][1].token; 282 /* Update the cost of each path if we're past the EOB token. */ 283 if (t0 != EOB_TOKEN) { 284 tokens[next][0].rate += (*token_costs)[1][pt][t0]; 285 tokens[next][0].token = ZERO_TOKEN; 286 } 287 if (t1 != EOB_TOKEN) { 288 tokens[next][1].rate += (*token_costs)[1][pt][t1]; 289 tokens[next][1].token = ZERO_TOKEN; 290 } 291 tokens[i][0].best_index = tokens[i][1].best_index = 0; 292 /* Don't update next, because we didn't add a new node. */ 293 } 294 295 if (!(--band_left)) { 296 --band_counts; 297 band_left = *band_counts; 298 --token_costs; 299 } 300 } 301 302 /* Now pick the best path through the whole trellis. */ 303 rate0 = tokens[next][0].rate; 304 rate1 = tokens[next][1].rate; 305 error0 = tokens[next][0].error; 306 error1 = tokens[next][1].error; 307 t0 = tokens[next][0].token; 308 t1 = tokens[next][1].token; 309 rate0 += (*token_costs)[0][ctx][t0]; 310 rate1 += (*token_costs)[0][ctx][t1]; 311 UPDATE_RD_COST(); 312 best = rd_cost1 < rd_cost0; 313 final_eob = -1; 314 315 for (i = next; i < eob; i = next) { 316 const int x = tokens[i][best].qc; 317 const int rc = scan[i]; 318 if (x) final_eob = i; 319 qcoeff[rc] = x; 320 dqcoeff[rc] = tokens[i][best].dqc; 321 next = tokens[i][best].next; 322 best = tokens[i][best].best_index; 323 } 324 final_eob++; 325 326 mb->plane[plane].eobs[block] = final_eob; 327 return final_eob; 328 } 329 330 static INLINE void fdct32x32(int rd_transform, const int16_t *src, 331 tran_low_t *dst, int src_stride) { 332 if (rd_transform) 333 vpx_fdct32x32_rd(src, dst, src_stride); 334 else 335 vpx_fdct32x32(src, dst, src_stride); 336 } 337 338 #if CONFIG_VP9_HIGHBITDEPTH 339 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, 340 tran_low_t *dst, int src_stride) { 341 if (rd_transform) 342 vpx_highbd_fdct32x32_rd(src, dst, src_stride); 343 else 344 vpx_highbd_fdct32x32(src, dst, src_stride); 345 } 346 #endif // CONFIG_VP9_HIGHBITDEPTH 347 348 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, 349 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 350 MACROBLOCKD *const xd = &x->e_mbd; 351 const struct macroblock_plane *const p = &x->plane[plane]; 352 const struct macroblockd_plane *const pd = &xd->plane[plane]; 353 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 354 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 355 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 356 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 357 uint16_t *const eob = &p->eobs[block]; 358 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 359 const int16_t *src_diff; 360 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 361 362 #if CONFIG_VP9_HIGHBITDEPTH 363 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 364 switch (tx_size) { 365 case TX_32X32: 366 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 367 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 368 p->quant_fp, qcoeff, dqcoeff, pd->dequant, 369 eob, scan_order->scan, scan_order->iscan); 370 break; 371 case TX_16X16: 372 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 373 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp, 374 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 375 scan_order->scan, scan_order->iscan); 376 break; 377 case TX_8X8: 378 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 379 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp, 380 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 381 scan_order->scan, scan_order->iscan); 382 break; 383 case TX_4X4: 384 x->fwd_txm4x4(src_diff, coeff, diff_stride); 385 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, 386 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 387 scan_order->scan, scan_order->iscan); 388 break; 389 default: assert(0); 390 } 391 return; 392 } 393 #endif // CONFIG_VP9_HIGHBITDEPTH 394 395 switch (tx_size) { 396 case TX_32X32: 397 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 398 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 399 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 400 scan_order->scan, scan_order->iscan); 401 break; 402 case TX_16X16: 403 vpx_fdct16x16(src_diff, coeff, diff_stride); 404 vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, 405 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 406 scan_order->iscan); 407 break; 408 case TX_8X8: 409 vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, 410 p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, 411 eob, scan_order->scan, scan_order->iscan); 412 break; 413 case TX_4X4: 414 x->fwd_txm4x4(src_diff, coeff, diff_stride); 415 vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, 416 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 417 scan_order->iscan); 418 break; 419 default: assert(0); break; 420 } 421 } 422 423 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, 424 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 425 MACROBLOCKD *const xd = &x->e_mbd; 426 const struct macroblock_plane *const p = &x->plane[plane]; 427 const struct macroblockd_plane *const pd = &xd->plane[plane]; 428 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 429 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 430 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 431 uint16_t *const eob = &p->eobs[block]; 432 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 433 const int16_t *src_diff; 434 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 435 #if CONFIG_VP9_HIGHBITDEPTH 436 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 437 switch (tx_size) { 438 case TX_32X32: 439 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); 440 vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, 441 p->quant_fp[0], qcoeff, dqcoeff, 442 pd->dequant[0], eob); 443 break; 444 case TX_16X16: 445 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); 446 vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, 447 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 448 eob); 449 break; 450 case TX_8X8: 451 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); 452 vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, 453 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 454 eob); 455 break; 456 case TX_4X4: 457 x->fwd_txm4x4(src_diff, coeff, diff_stride); 458 vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, 459 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 460 eob); 461 break; 462 default: assert(0); 463 } 464 return; 465 } 466 #endif // CONFIG_VP9_HIGHBITDEPTH 467 468 switch (tx_size) { 469 case TX_32X32: 470 vpx_fdct32x32_1(src_diff, coeff, diff_stride); 471 vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], 472 qcoeff, dqcoeff, pd->dequant[0], eob); 473 break; 474 case TX_16X16: 475 vpx_fdct16x16_1(src_diff, coeff, diff_stride); 476 vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], 477 qcoeff, dqcoeff, pd->dequant[0], eob); 478 break; 479 case TX_8X8: 480 vpx_fdct8x8_1(src_diff, coeff, diff_stride); 481 vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], 482 qcoeff, dqcoeff, pd->dequant[0], eob); 483 break; 484 case TX_4X4: 485 x->fwd_txm4x4(src_diff, coeff, diff_stride); 486 vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], 487 qcoeff, dqcoeff, pd->dequant[0], eob); 488 break; 489 default: assert(0); break; 490 } 491 } 492 493 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, 494 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 495 MACROBLOCKD *const xd = &x->e_mbd; 496 const struct macroblock_plane *const p = &x->plane[plane]; 497 const struct macroblockd_plane *const pd = &xd->plane[plane]; 498 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 499 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 500 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 501 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 502 uint16_t *const eob = &p->eobs[block]; 503 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 504 const int16_t *src_diff; 505 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 506 507 #if CONFIG_VP9_HIGHBITDEPTH 508 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 509 switch (tx_size) { 510 case TX_32X32: 511 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 512 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 513 p->round, p->quant, p->quant_shift, qcoeff, 514 dqcoeff, pd->dequant, eob, scan_order->scan, 515 scan_order->iscan); 516 break; 517 case TX_16X16: 518 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 519 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 520 p->quant, p->quant_shift, qcoeff, dqcoeff, 521 pd->dequant, eob, scan_order->scan, 522 scan_order->iscan); 523 break; 524 case TX_8X8: 525 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 526 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 527 p->quant, p->quant_shift, qcoeff, dqcoeff, 528 pd->dequant, eob, scan_order->scan, 529 scan_order->iscan); 530 break; 531 case TX_4X4: 532 x->fwd_txm4x4(src_diff, coeff, diff_stride); 533 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 534 p->quant, p->quant_shift, qcoeff, dqcoeff, 535 pd->dequant, eob, scan_order->scan, 536 scan_order->iscan); 537 break; 538 default: assert(0); 539 } 540 return; 541 } 542 #endif // CONFIG_VP9_HIGHBITDEPTH 543 544 switch (tx_size) { 545 case TX_32X32: 546 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 547 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 548 p->quant, p->quant_shift, qcoeff, dqcoeff, 549 pd->dequant, eob, scan_order->scan, 550 scan_order->iscan); 551 break; 552 case TX_16X16: 553 vpx_fdct16x16(src_diff, coeff, diff_stride); 554 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 555 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 556 scan_order->scan, scan_order->iscan); 557 break; 558 case TX_8X8: 559 vpx_fdct8x8(src_diff, coeff, diff_stride); 560 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 561 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 562 scan_order->scan, scan_order->iscan); 563 break; 564 case TX_4X4: 565 x->fwd_txm4x4(src_diff, coeff, diff_stride); 566 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 567 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 568 scan_order->scan, scan_order->iscan); 569 break; 570 default: assert(0); break; 571 } 572 } 573 574 static void encode_block(int plane, int block, int row, int col, 575 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { 576 struct encode_b_args *const args = arg; 577 MACROBLOCK *const x = args->x; 578 MACROBLOCKD *const xd = &x->e_mbd; 579 struct macroblock_plane *const p = &x->plane[plane]; 580 struct macroblockd_plane *const pd = &xd->plane[plane]; 581 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 582 uint8_t *dst; 583 ENTROPY_CONTEXT *a, *l; 584 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 585 a = &args->ta[col]; 586 l = &args->tl[row]; 587 588 // TODO(jingning): per transformed block zero forcing only enabled for 589 // luma component. will integrate chroma components as well. 590 if (x->zcoeff_blk[tx_size][block] && plane == 0) { 591 p->eobs[block] = 0; 592 *a = *l = 0; 593 return; 594 } 595 596 if (!x->skip_recode) { 597 if (x->quant_fp) { 598 // Encoding process for rtc mode 599 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { 600 // skip forward transform 601 p->eobs[block] = 0; 602 *a = *l = 0; 603 return; 604 } else { 605 vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); 606 } 607 } else { 608 if (max_txsize_lookup[plane_bsize] == tx_size) { 609 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); 610 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { 611 // full forward transform and quantization 612 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 613 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { 614 // fast path forward transform and quantization 615 vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); 616 } else { 617 // skip forward transform 618 p->eobs[block] = 0; 619 *a = *l = 0; 620 return; 621 } 622 } else { 623 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 624 } 625 } 626 } 627 628 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 629 const int ctx = combine_entropy_contexts(*a, *l); 630 *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0; 631 } else { 632 *a = *l = p->eobs[block] > 0; 633 } 634 635 if (p->eobs[block]) *(args->skip) = 0; 636 637 if (x->skip_encode || p->eobs[block] == 0) return; 638 #if CONFIG_VP9_HIGHBITDEPTH 639 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 640 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 641 switch (tx_size) { 642 case TX_32X32: 643 vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 644 xd->bd); 645 break; 646 case TX_16X16: 647 vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 648 xd->bd); 649 break; 650 case TX_8X8: 651 vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 652 xd->bd); 653 break; 654 case TX_4X4: 655 // this is like vp9_short_idct4x4 but has a special case around eob<=1 656 // which is significant (not just an optimization) for the lossless 657 // case. 658 x->highbd_itxm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 659 xd->bd); 660 break; 661 default: assert(0 && "Invalid transform size"); 662 } 663 return; 664 } 665 #endif // CONFIG_VP9_HIGHBITDEPTH 666 667 switch (tx_size) { 668 case TX_32X32: 669 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 670 break; 671 case TX_16X16: 672 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 673 break; 674 case TX_8X8: 675 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 676 break; 677 case TX_4X4: 678 // this is like vp9_short_idct4x4 but has a special case around eob<=1 679 // which is significant (not just an optimization) for the lossless 680 // case. 681 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 682 break; 683 default: assert(0 && "Invalid transform size"); break; 684 } 685 } 686 687 static void encode_block_pass1(int plane, int block, int row, int col, 688 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 689 void *arg) { 690 MACROBLOCK *const x = (MACROBLOCK *)arg; 691 MACROBLOCKD *const xd = &x->e_mbd; 692 struct macroblock_plane *const p = &x->plane[plane]; 693 struct macroblockd_plane *const pd = &xd->plane[plane]; 694 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 695 uint8_t *dst; 696 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 697 698 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 699 700 if (p->eobs[block] > 0) { 701 #if CONFIG_VP9_HIGHBITDEPTH 702 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 703 x->highbd_itxm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, 704 p->eobs[block], xd->bd); 705 return; 706 } 707 #endif // CONFIG_VP9_HIGHBITDEPTH 708 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 709 } 710 } 711 712 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { 713 vp9_subtract_plane(x, bsize, 0); 714 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, 715 encode_block_pass1, x); 716 } 717 718 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 719 MACROBLOCKD *const xd = &x->e_mbd; 720 struct optimize_ctx ctx; 721 MODE_INFO *mi = xd->mi[0]; 722 struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; 723 int plane; 724 725 mi->skip = 1; 726 727 if (x->skip) return; 728 729 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 730 if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane); 731 732 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 733 const struct macroblockd_plane *const pd = &xd->plane[plane]; 734 const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; 735 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], 736 ctx.tl[plane]); 737 arg.enable_coeff_opt = 1; 738 } else { 739 arg.enable_coeff_opt = 0; 740 } 741 arg.ta = ctx.ta[plane]; 742 arg.tl = ctx.tl[plane]; 743 744 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, 745 &arg); 746 } 747 } 748 749 void vp9_encode_block_intra(int plane, int block, int row, int col, 750 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 751 void *arg) { 752 struct encode_b_args *const args = arg; 753 MACROBLOCK *const x = args->x; 754 MACROBLOCKD *const xd = &x->e_mbd; 755 MODE_INFO *mi = xd->mi[0]; 756 struct macroblock_plane *const p = &x->plane[plane]; 757 struct macroblockd_plane *const pd = &xd->plane[plane]; 758 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); 759 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 760 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 761 const scan_order *scan_order; 762 TX_TYPE tx_type = DCT_DCT; 763 PREDICTION_MODE mode; 764 const int bwl = b_width_log2_lookup[plane_bsize]; 765 const int diff_stride = 4 * (1 << bwl); 766 uint8_t *src, *dst; 767 int16_t *src_diff; 768 uint16_t *eob = &p->eobs[block]; 769 const int src_stride = p->src.stride; 770 const int dst_stride = pd->dst.stride; 771 ENTROPY_CONTEXT *a = NULL; 772 ENTROPY_CONTEXT *l = NULL; 773 int entropy_ctx = 0; 774 dst = &pd->dst.buf[4 * (row * dst_stride + col)]; 775 src = &p->src.buf[4 * (row * src_stride + col)]; 776 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 777 if (args->enable_coeff_opt) { 778 a = &args->ta[col]; 779 l = &args->tl[row]; 780 entropy_ctx = combine_entropy_contexts(*a, *l); 781 } 782 783 if (tx_size == TX_4X4) { 784 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); 785 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; 786 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; 787 } else { 788 mode = plane == 0 ? mi->mode : mi->uv_mode; 789 if (tx_size == TX_32X32) { 790 scan_order = &vp9_default_scan_orders[TX_32X32]; 791 } else { 792 tx_type = get_tx_type(get_plane_type(plane), xd); 793 scan_order = &vp9_scan_orders[tx_size][tx_type]; 794 } 795 } 796 797 vp9_predict_intra_block( 798 xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst, 799 (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst, 800 dst_stride, col, row, plane); 801 802 #if CONFIG_VP9_HIGHBITDEPTH 803 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 804 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 805 switch (tx_size) { 806 case TX_32X32: 807 if (!x->skip_recode) { 808 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src, 809 src_stride, dst, dst_stride, xd->bd); 810 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 811 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 812 p->round, p->quant, p->quant_shift, 813 qcoeff, dqcoeff, pd->dequant, eob, 814 scan_order->scan, scan_order->iscan); 815 } 816 if (args->enable_coeff_opt && !x->skip_recode) { 817 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 818 } 819 if (!x->skip_encode && *eob) { 820 vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 821 } 822 break; 823 case TX_16X16: 824 if (!x->skip_recode) { 825 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src, 826 src_stride, dst, dst_stride, xd->bd); 827 if (tx_type == DCT_DCT) 828 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 829 else 830 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); 831 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 832 p->quant, p->quant_shift, qcoeff, dqcoeff, 833 pd->dequant, eob, scan_order->scan, 834 scan_order->iscan); 835 } 836 if (args->enable_coeff_opt && !x->skip_recode) { 837 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 838 } 839 if (!x->skip_encode && *eob) { 840 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 841 xd->bd); 842 } 843 break; 844 case TX_8X8: 845 if (!x->skip_recode) { 846 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src, 847 src_stride, dst, dst_stride, xd->bd); 848 if (tx_type == DCT_DCT) 849 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 850 else 851 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); 852 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 853 p->quant, p->quant_shift, qcoeff, dqcoeff, 854 pd->dequant, eob, scan_order->scan, 855 scan_order->iscan); 856 } 857 if (args->enable_coeff_opt && !x->skip_recode) { 858 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 859 } 860 if (!x->skip_encode && *eob) { 861 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 862 xd->bd); 863 } 864 break; 865 case TX_4X4: 866 if (!x->skip_recode) { 867 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, 868 src_stride, dst, dst_stride, xd->bd); 869 if (tx_type != DCT_DCT) 870 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); 871 else 872 x->fwd_txm4x4(src_diff, coeff, diff_stride); 873 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 874 p->quant, p->quant_shift, qcoeff, dqcoeff, 875 pd->dequant, eob, scan_order->scan, 876 scan_order->iscan); 877 } 878 if (args->enable_coeff_opt && !x->skip_recode) { 879 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 880 } 881 if (!x->skip_encode && *eob) { 882 if (tx_type == DCT_DCT) { 883 // this is like vp9_short_idct4x4 but has a special case around 884 // eob<=1 which is significant (not just an optimization) for the 885 // lossless case. 886 x->highbd_itxm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 887 } else { 888 vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, 889 xd->bd); 890 } 891 } 892 break; 893 default: assert(0); return; 894 } 895 if (*eob) *(args->skip) = 0; 896 return; 897 } 898 #endif // CONFIG_VP9_HIGHBITDEPTH 899 900 switch (tx_size) { 901 case TX_32X32: 902 if (!x->skip_recode) { 903 vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst, 904 dst_stride); 905 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 906 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 907 p->quant, p->quant_shift, qcoeff, dqcoeff, 908 pd->dequant, eob, scan_order->scan, 909 scan_order->iscan); 910 } 911 if (args->enable_coeff_opt && !x->skip_recode) { 912 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 913 } 914 if (!x->skip_encode && *eob) 915 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); 916 break; 917 case TX_16X16: 918 if (!x->skip_recode) { 919 vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst, 920 dst_stride); 921 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); 922 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 923 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 924 scan_order->scan, scan_order->iscan); 925 } 926 if (args->enable_coeff_opt && !x->skip_recode) { 927 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 928 } 929 if (!x->skip_encode && *eob) 930 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); 931 break; 932 case TX_8X8: 933 if (!x->skip_recode) { 934 vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst, 935 dst_stride); 936 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); 937 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 938 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 939 scan_order->scan, scan_order->iscan); 940 } 941 if (args->enable_coeff_opt && !x->skip_recode) { 942 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 943 } 944 if (!x->skip_encode && *eob) 945 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); 946 break; 947 case TX_4X4: 948 if (!x->skip_recode) { 949 vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, 950 dst_stride); 951 if (tx_type != DCT_DCT) 952 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); 953 else 954 x->fwd_txm4x4(src_diff, coeff, diff_stride); 955 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 956 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 957 scan_order->scan, scan_order->iscan); 958 } 959 if (args->enable_coeff_opt && !x->skip_recode) { 960 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 961 } 962 if (!x->skip_encode && *eob) { 963 if (tx_type == DCT_DCT) 964 // this is like vp9_short_idct4x4 but has a special case around eob<=1 965 // which is significant (not just an optimization) for the lossless 966 // case. 967 x->itxm_add(dqcoeff, dst, dst_stride, *eob); 968 else 969 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); 970 } 971 break; 972 default: assert(0); break; 973 } 974 if (*eob) *(args->skip) = 0; 975 } 976 977 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, 978 int enable_optimize_b) { 979 const MACROBLOCKD *const xd = &x->e_mbd; 980 struct optimize_ctx ctx; 981 struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], 982 ctx.tl[plane], &xd->mi[0]->skip }; 983 984 if (enable_optimize_b && x->optimize && 985 (!x->skip_recode || !x->skip_optimize)) { 986 const struct macroblockd_plane *const pd = &xd->plane[plane]; 987 const TX_SIZE tx_size = 988 plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size; 989 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); 990 } else { 991 arg.enable_coeff_opt = 0; 992 } 993 994 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, 995 vp9_encode_block_intra, &arg); 996 } 997