1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vp9_rtcd.h" 12 #include "./vpx_config.h" 13 #include "./vpx_dsp_rtcd.h" 14 15 #include "vpx_dsp/quantize.h" 16 #include "vpx_mem/vpx_mem.h" 17 #include "vpx_ports/mem.h" 18 19 #include "vp9/common/vp9_idct.h" 20 #include "vp9/common/vp9_reconinter.h" 21 #include "vp9/common/vp9_reconintra.h" 22 #include "vp9/common/vp9_scan.h" 23 24 #include "vp9/encoder/vp9_encodemb.h" 25 #include "vp9/encoder/vp9_rd.h" 26 #include "vp9/encoder/vp9_tokenize.h" 27 28 struct optimize_ctx { 29 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; 30 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; 31 }; 32 33 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 34 struct macroblock_plane *const p = &x->plane[plane]; 35 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; 36 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 37 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 38 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 39 40 #if CONFIG_VP9_HIGHBITDEPTH 41 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 42 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, 43 p->src.stride, pd->dst.buf, pd->dst.stride, 44 x->e_mbd.bd); 45 return; 46 } 47 #endif // CONFIG_VP9_HIGHBITDEPTH 48 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, 49 pd->dst.buf, pd->dst.stride); 50 } 51 52 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { 53 { 10, 6 }, 54 { 8, 5 }, 55 }; 56 57 // 'num' can be negative, but 'shift' must be non-negative. 58 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \ 59 ((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)) 60 61 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, 62 int ctx) { 63 MACROBLOCKD *const xd = &mb->e_mbd; 64 struct macroblock_plane *const p = &mb->plane[plane]; 65 struct macroblockd_plane *const pd = &xd->plane[plane]; 66 const int ref = is_inter_block(xd->mi[0]); 67 uint8_t token_cache[1024]; 68 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 69 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 70 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 71 const int eob = p->eobs[block]; 72 const PLANE_TYPE plane_type = get_plane_type(plane); 73 const int default_eob = 16 << (tx_size << 1); 74 const int shift = (tx_size == TX_32X32); 75 const int16_t *const dequant_ptr = pd->dequant; 76 const uint8_t *const band_translate = get_band_translate(tx_size); 77 const scan_order *const so = get_scan(xd, tx_size, plane_type, block); 78 const int16_t *const scan = so->scan; 79 const int16_t *const nb = so->neighbors; 80 const MODE_INFO *mbmi = xd->mi[0]; 81 const int sharpness = mb->sharpness; 82 const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]; 83 const int64_t rdmult = 84 (sharpness == 0 ? rdadj >> 1 85 : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4); 86 87 const int64_t rddiv = mb->rddiv; 88 int64_t rd_cost0, rd_cost1; 89 int64_t rate0, rate1; 90 int16_t t0, t1; 91 int i, final_eob; 92 int count_high_values_after_eob = 0; 93 #if CONFIG_VP9_HIGHBITDEPTH 94 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); 95 #else 96 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); 97 #endif 98 unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 99 mb->token_costs[tx_size][plane_type][ref]; 100 unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS]; 101 int64_t eob_cost0, eob_cost1; 102 const int ctx0 = ctx; 103 int64_t accu_rate = 0; 104 // Initialized to the worst possible error for the largest transform size. 105 // This ensures that it never goes negative. 106 int64_t accu_error = ((int64_t)1) << 50; 107 int64_t best_block_rd_cost = INT64_MAX; 108 int x_prev = 1; 109 tran_low_t before_best_eob_qc = 0; 110 tran_low_t before_best_eob_dqc = 0; 111 112 assert((!plane_type && !plane) || (plane_type && plane)); 113 assert(eob <= default_eob); 114 115 for (i = 0; i < eob; i++) { 116 const int rc = scan[i]; 117 token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; 118 } 119 final_eob = 0; 120 121 // Initial RD cost. 122 token_costs_cur = token_costs + band_translate[0]; 123 rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN]; 124 best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error); 125 126 // For each token, pick one of two choices greedily: 127 // (i) First candidate: Keep current quantized value, OR 128 // (ii) Second candidate: Reduce quantized value by 1. 129 for (i = 0; i < eob; i++) { 130 const int rc = scan[i]; 131 const int x = qcoeff[rc]; 132 const int band_cur = band_translate[i]; 133 const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i); 134 const int token_tree_sel_cur = (x_prev == 0); 135 token_costs_cur = token_costs + band_cur; 136 if (x == 0) { // No need to search 137 const int token = vp9_get_token(x); 138 rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token]; 139 accu_rate += rate0; 140 x_prev = 0; 141 // Note: accu_error does not change. 142 } else { 143 const int dqv = dequant_ptr[rc != 0]; 144 // Compute the distortion for quantizing to 0. 145 const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift); 146 const int diff_for_zero = 147 #if CONFIG_VP9_HIGHBITDEPTH 148 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) 149 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8) 150 : 151 #endif 152 diff_for_zero_raw; 153 const int64_t distortion_for_zero = 154 (int64_t)diff_for_zero * diff_for_zero; 155 156 // Compute the distortion for the first candidate 157 const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift); 158 const int diff0 = 159 #if CONFIG_VP9_HIGHBITDEPTH 160 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) 161 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8) 162 : 163 #endif // CONFIG_VP9_HIGHBITDEPTH 164 diff0_raw; 165 const int64_t distortion0 = (int64_t)diff0 * diff0; 166 167 // Compute the distortion for the second candidate 168 const int sign = -(x < 0); // -1 if x is negative and 0 otherwise. 169 const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1. 170 int64_t distortion1; 171 if (x1 != 0) { 172 const int dqv_step = 173 #if CONFIG_VP9_HIGHBITDEPTH 174 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8) 175 : 176 #endif // CONFIG_VP9_HIGHBITDEPTH 177 dqv; 178 const int diff_step = (dqv_step + sign) ^ sign; 179 const int diff1 = diff0 - diff_step; 180 assert(dqv > 0); // We aren't right shifting a negative number above. 181 distortion1 = (int64_t)diff1 * diff1; 182 } else { 183 distortion1 = distortion_for_zero; 184 } 185 { 186 // Calculate RDCost for current coeff for the two candidates. 187 const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost); 188 const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost); 189 rate0 = 190 base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0]; 191 rate1 = 192 base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1]; 193 } 194 { 195 int rdcost_better_for_x1, eob_rdcost_better_for_x1; 196 int dqc0, dqc1; 197 int64_t best_eob_cost_cur; 198 int use_x1; 199 200 // Calculate RD Cost effect on the next coeff for the two candidates. 201 int64_t next_bits0 = 0; 202 int64_t next_bits1 = 0; 203 int64_t next_eob_bits0 = 0; 204 int64_t next_eob_bits1 = 0; 205 if (i < default_eob - 1) { 206 int ctx_next, token_tree_sel_next; 207 const int band_next = band_translate[i + 1]; 208 const int token_next = 209 (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; 210 unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS] 211 [ENTROPY_TOKENS] = 212 token_costs + band_next; 213 token_cache[rc] = vp9_pt_energy_class[t0]; 214 ctx_next = get_coef_context(nb, token_cache, i + 1); 215 token_tree_sel_next = (x == 0); 216 next_bits0 = 217 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; 218 next_eob_bits0 = 219 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; 220 token_cache[rc] = vp9_pt_energy_class[t1]; 221 ctx_next = get_coef_context(nb, token_cache, i + 1); 222 token_tree_sel_next = (x1 == 0); 223 next_bits1 = 224 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; 225 if (x1 != 0) { 226 next_eob_bits1 = 227 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; 228 } 229 } 230 231 // Compare the total RD costs for two candidates. 232 rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0); 233 rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1); 234 rdcost_better_for_x1 = (rd_cost1 < rd_cost0); 235 eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0), 236 (accu_error + distortion0 - distortion_for_zero)); 237 eob_cost1 = eob_cost0; 238 if (x1 != 0) { 239 eob_cost1 = 240 RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1), 241 (accu_error + distortion1 - distortion_for_zero)); 242 eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0); 243 } else { 244 eob_rdcost_better_for_x1 = 0; 245 } 246 247 // Calculate the two candidate de-quantized values. 248 dqc0 = dqcoeff[rc]; 249 dqc1 = 0; 250 if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) { 251 if (x1 != 0) { 252 dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift); 253 } else { 254 dqc1 = 0; 255 } 256 } 257 258 // Pick and record the better quantized and de-quantized values. 259 if (rdcost_better_for_x1) { 260 qcoeff[rc] = x1; 261 dqcoeff[rc] = dqc1; 262 accu_rate += rate1; 263 accu_error += distortion1 - distortion_for_zero; 264 assert(distortion1 <= distortion_for_zero); 265 token_cache[rc] = vp9_pt_energy_class[t1]; 266 } else { 267 accu_rate += rate0; 268 accu_error += distortion0 - distortion_for_zero; 269 assert(distortion0 <= distortion_for_zero); 270 token_cache[rc] = vp9_pt_energy_class[t0]; 271 } 272 if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++; 273 assert(accu_error >= 0); 274 x_prev = qcoeff[rc]; // Update based on selected quantized value. 275 276 use_x1 = (x1 != 0) && eob_rdcost_better_for_x1; 277 best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0; 278 279 // Determine whether to move the eob position to i+1 280 if (best_eob_cost_cur < best_block_rd_cost) { 281 best_block_rd_cost = best_eob_cost_cur; 282 final_eob = i + 1; 283 count_high_values_after_eob = 0; 284 if (use_x1) { 285 before_best_eob_qc = x1; 286 before_best_eob_dqc = dqc1; 287 } else { 288 before_best_eob_qc = x; 289 before_best_eob_dqc = dqc0; 290 } 291 } 292 } 293 } 294 } 295 if (count_high_values_after_eob > 0) { 296 final_eob = eob - 1; 297 for (; final_eob >= 0; final_eob--) { 298 const int rc = scan[final_eob]; 299 const int x = qcoeff[rc]; 300 if (x) { 301 break; 302 } 303 } 304 final_eob++; 305 } else { 306 assert(final_eob <= eob); 307 if (final_eob > 0) { 308 int rc; 309 assert(before_best_eob_qc != 0); 310 i = final_eob - 1; 311 rc = scan[i]; 312 qcoeff[rc] = before_best_eob_qc; 313 dqcoeff[rc] = before_best_eob_dqc; 314 } 315 for (i = final_eob; i < eob; i++) { 316 int rc = scan[i]; 317 qcoeff[rc] = 0; 318 dqcoeff[rc] = 0; 319 } 320 } 321 mb->plane[plane].eobs[block] = final_eob; 322 return final_eob; 323 } 324 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE 325 326 static INLINE void fdct32x32(int rd_transform, const int16_t *src, 327 tran_low_t *dst, int src_stride) { 328 if (rd_transform) 329 vpx_fdct32x32_rd(src, dst, src_stride); 330 else 331 vpx_fdct32x32(src, dst, src_stride); 332 } 333 334 #if CONFIG_VP9_HIGHBITDEPTH 335 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, 336 tran_low_t *dst, int src_stride) { 337 if (rd_transform) 338 vpx_highbd_fdct32x32_rd(src, dst, src_stride); 339 else 340 vpx_highbd_fdct32x32(src, dst, src_stride); 341 } 342 #endif // CONFIG_VP9_HIGHBITDEPTH 343 344 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, 345 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 346 MACROBLOCKD *const xd = &x->e_mbd; 347 const struct macroblock_plane *const p = &x->plane[plane]; 348 const struct macroblockd_plane *const pd = &xd->plane[plane]; 349 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 350 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 351 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 352 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 353 uint16_t *const eob = &p->eobs[block]; 354 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 355 const int16_t *src_diff; 356 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 357 // skip block condition should be handled before this is called. 358 assert(!x->skip_block); 359 360 #if CONFIG_VP9_HIGHBITDEPTH 361 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 362 switch (tx_size) { 363 case TX_32X32: 364 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 365 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 366 p->quant_fp, qcoeff, dqcoeff, pd->dequant, 367 eob, scan_order->scan, scan_order->iscan); 368 break; 369 case TX_16X16: 370 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 371 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp, 372 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 373 scan_order->scan, scan_order->iscan); 374 break; 375 case TX_8X8: 376 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 377 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp, 378 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 379 scan_order->scan, scan_order->iscan); 380 break; 381 default: 382 assert(tx_size == TX_4X4); 383 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 384 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, 385 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 386 scan_order->scan, scan_order->iscan); 387 break; 388 } 389 return; 390 } 391 #endif // CONFIG_VP9_HIGHBITDEPTH 392 393 switch (tx_size) { 394 case TX_32X32: 395 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 396 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 397 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 398 scan_order->scan, scan_order->iscan); 399 break; 400 case TX_16X16: 401 vpx_fdct16x16(src_diff, coeff, diff_stride); 402 vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, 403 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 404 scan_order->iscan); 405 break; 406 case TX_8X8: 407 vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, 408 p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, 409 eob, scan_order->scan, scan_order->iscan); 410 break; 411 default: 412 assert(tx_size == TX_4X4); 413 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 414 vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, 415 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 416 scan_order->iscan); 417 break; 418 } 419 } 420 421 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, 422 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 423 MACROBLOCKD *const xd = &x->e_mbd; 424 const struct macroblock_plane *const p = &x->plane[plane]; 425 const struct macroblockd_plane *const pd = &xd->plane[plane]; 426 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 427 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 428 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 429 uint16_t *const eob = &p->eobs[block]; 430 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 431 const int16_t *src_diff; 432 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 433 // skip block condition should be handled before this is called. 434 assert(!x->skip_block); 435 436 #if CONFIG_VP9_HIGHBITDEPTH 437 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 438 switch (tx_size) { 439 case TX_32X32: 440 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); 441 vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, 442 p->quant_fp[0], qcoeff, dqcoeff, 443 pd->dequant[0], eob); 444 break; 445 case TX_16X16: 446 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); 447 vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, 448 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 449 eob); 450 break; 451 case TX_8X8: 452 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); 453 vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, 454 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 455 eob); 456 break; 457 default: 458 assert(tx_size == TX_4X4); 459 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 460 vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, 461 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 462 eob); 463 break; 464 } 465 return; 466 } 467 #endif // CONFIG_VP9_HIGHBITDEPTH 468 469 switch (tx_size) { 470 case TX_32X32: 471 vpx_fdct32x32_1(src_diff, coeff, diff_stride); 472 vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], 473 qcoeff, dqcoeff, pd->dequant[0], eob); 474 break; 475 case TX_16X16: 476 vpx_fdct16x16_1(src_diff, coeff, diff_stride); 477 vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], 478 qcoeff, dqcoeff, pd->dequant[0], eob); 479 break; 480 case TX_8X8: 481 vpx_fdct8x8_1(src_diff, coeff, diff_stride); 482 vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], 483 qcoeff, dqcoeff, pd->dequant[0], eob); 484 break; 485 default: 486 assert(tx_size == TX_4X4); 487 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 488 vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], 489 qcoeff, dqcoeff, pd->dequant[0], eob); 490 break; 491 } 492 } 493 494 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, 495 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 496 MACROBLOCKD *const xd = &x->e_mbd; 497 const struct macroblock_plane *const p = &x->plane[plane]; 498 const struct macroblockd_plane *const pd = &xd->plane[plane]; 499 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 500 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 501 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 502 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 503 uint16_t *const eob = &p->eobs[block]; 504 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 505 const int16_t *src_diff; 506 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 507 // skip block condition should be handled before this is called. 508 assert(!x->skip_block); 509 510 #if CONFIG_VP9_HIGHBITDEPTH 511 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 512 switch (tx_size) { 513 case TX_32X32: 514 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 515 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 516 p->round, p->quant, p->quant_shift, qcoeff, 517 dqcoeff, pd->dequant, eob, scan_order->scan, 518 scan_order->iscan); 519 break; 520 case TX_16X16: 521 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 522 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 523 p->quant, p->quant_shift, qcoeff, dqcoeff, 524 pd->dequant, eob, scan_order->scan, 525 scan_order->iscan); 526 break; 527 case TX_8X8: 528 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 529 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 530 p->quant, p->quant_shift, qcoeff, dqcoeff, 531 pd->dequant, eob, scan_order->scan, 532 scan_order->iscan); 533 break; 534 default: 535 assert(tx_size == TX_4X4); 536 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 537 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 538 p->quant, p->quant_shift, qcoeff, dqcoeff, 539 pd->dequant, eob, scan_order->scan, 540 scan_order->iscan); 541 break; 542 } 543 return; 544 } 545 #endif // CONFIG_VP9_HIGHBITDEPTH 546 547 switch (tx_size) { 548 case TX_32X32: 549 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 550 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 551 p->quant, p->quant_shift, qcoeff, dqcoeff, 552 pd->dequant, eob, scan_order->scan, 553 scan_order->iscan); 554 break; 555 case TX_16X16: 556 vpx_fdct16x16(src_diff, coeff, diff_stride); 557 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 558 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 559 scan_order->scan, scan_order->iscan); 560 break; 561 case TX_8X8: 562 vpx_fdct8x8(src_diff, coeff, diff_stride); 563 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 564 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 565 scan_order->scan, scan_order->iscan); 566 break; 567 default: 568 assert(tx_size == TX_4X4); 569 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 570 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 571 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 572 scan_order->scan, scan_order->iscan); 573 break; 574 } 575 } 576 577 static void encode_block(int plane, int block, int row, int col, 578 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { 579 struct encode_b_args *const args = arg; 580 MACROBLOCK *const x = args->x; 581 MACROBLOCKD *const xd = &x->e_mbd; 582 struct macroblock_plane *const p = &x->plane[plane]; 583 struct macroblockd_plane *const pd = &xd->plane[plane]; 584 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 585 uint8_t *dst; 586 ENTROPY_CONTEXT *a, *l; 587 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 588 a = &args->ta[col]; 589 l = &args->tl[row]; 590 591 // TODO(jingning): per transformed block zero forcing only enabled for 592 // luma component. will integrate chroma components as well. 593 if (x->zcoeff_blk[tx_size][block] && plane == 0) { 594 p->eobs[block] = 0; 595 *a = *l = 0; 596 return; 597 } 598 599 if (!x->skip_recode) { 600 if (x->quant_fp) { 601 // Encoding process for rtc mode 602 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { 603 // skip forward transform 604 p->eobs[block] = 0; 605 *a = *l = 0; 606 return; 607 } else { 608 vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); 609 } 610 } else { 611 if (max_txsize_lookup[plane_bsize] == tx_size) { 612 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); 613 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { 614 // full forward transform and quantization 615 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 616 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { 617 // fast path forward transform and quantization 618 vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); 619 } else { 620 // skip forward transform 621 p->eobs[block] = 0; 622 *a = *l = 0; 623 return; 624 } 625 } else { 626 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 627 } 628 } 629 } 630 631 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 632 const int ctx = combine_entropy_contexts(*a, *l); 633 *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0; 634 } else { 635 *a = *l = p->eobs[block] > 0; 636 } 637 638 if (p->eobs[block]) *(args->skip) = 0; 639 640 if (x->skip_encode || p->eobs[block] == 0) return; 641 #if CONFIG_VP9_HIGHBITDEPTH 642 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 643 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 644 switch (tx_size) { 645 case TX_32X32: 646 vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 647 xd->bd); 648 break; 649 case TX_16X16: 650 vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 651 xd->bd); 652 break; 653 case TX_8X8: 654 vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 655 xd->bd); 656 break; 657 default: 658 assert(tx_size == TX_4X4); 659 // this is like vp9_short_idct4x4 but has a special case around eob<=1 660 // which is significant (not just an optimization) for the lossless 661 // case. 662 x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 663 xd->bd); 664 break; 665 } 666 return; 667 } 668 #endif // CONFIG_VP9_HIGHBITDEPTH 669 670 switch (tx_size) { 671 case TX_32X32: 672 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 673 break; 674 case TX_16X16: 675 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 676 break; 677 case TX_8X8: 678 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 679 break; 680 default: 681 assert(tx_size == TX_4X4); 682 // this is like vp9_short_idct4x4 but has a special case around eob<=1 683 // which is significant (not just an optimization) for the lossless 684 // case. 685 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 686 break; 687 } 688 } 689 690 static void encode_block_pass1(int plane, int block, int row, int col, 691 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 692 void *arg) { 693 MACROBLOCK *const x = (MACROBLOCK *)arg; 694 MACROBLOCKD *const xd = &x->e_mbd; 695 struct macroblock_plane *const p = &x->plane[plane]; 696 struct macroblockd_plane *const pd = &xd->plane[plane]; 697 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 698 uint8_t *dst; 699 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 700 701 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 702 703 if (p->eobs[block] > 0) { 704 #if CONFIG_VP9_HIGHBITDEPTH 705 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 706 x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, 707 p->eobs[block], xd->bd); 708 return; 709 } 710 #endif // CONFIG_VP9_HIGHBITDEPTH 711 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 712 } 713 } 714 715 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { 716 vp9_subtract_plane(x, bsize, 0); 717 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, 718 encode_block_pass1, x); 719 } 720 721 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 722 MACROBLOCKD *const xd = &x->e_mbd; 723 struct optimize_ctx ctx; 724 MODE_INFO *mi = xd->mi[0]; 725 struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; 726 int plane; 727 728 mi->skip = 1; 729 730 if (x->skip) return; 731 732 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 733 if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane); 734 735 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 736 const struct macroblockd_plane *const pd = &xd->plane[plane]; 737 const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; 738 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], 739 ctx.tl[plane]); 740 arg.enable_coeff_opt = 1; 741 } else { 742 arg.enable_coeff_opt = 0; 743 } 744 arg.ta = ctx.ta[plane]; 745 arg.tl = ctx.tl[plane]; 746 747 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, 748 &arg); 749 } 750 } 751 752 void vp9_encode_block_intra(int plane, int block, int row, int col, 753 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 754 void *arg) { 755 struct encode_b_args *const args = arg; 756 MACROBLOCK *const x = args->x; 757 MACROBLOCKD *const xd = &x->e_mbd; 758 MODE_INFO *mi = xd->mi[0]; 759 struct macroblock_plane *const p = &x->plane[plane]; 760 struct macroblockd_plane *const pd = &xd->plane[plane]; 761 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); 762 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 763 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 764 const scan_order *scan_order; 765 TX_TYPE tx_type = DCT_DCT; 766 PREDICTION_MODE mode; 767 const int bwl = b_width_log2_lookup[plane_bsize]; 768 const int diff_stride = 4 * (1 << bwl); 769 uint8_t *src, *dst; 770 int16_t *src_diff; 771 uint16_t *eob = &p->eobs[block]; 772 const int src_stride = p->src.stride; 773 const int dst_stride = pd->dst.stride; 774 ENTROPY_CONTEXT *a = NULL; 775 ENTROPY_CONTEXT *l = NULL; 776 int entropy_ctx = 0; 777 dst = &pd->dst.buf[4 * (row * dst_stride + col)]; 778 src = &p->src.buf[4 * (row * src_stride + col)]; 779 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 780 if (args->enable_coeff_opt) { 781 a = &args->ta[col]; 782 l = &args->tl[row]; 783 entropy_ctx = combine_entropy_contexts(*a, *l); 784 } 785 786 if (tx_size == TX_4X4) { 787 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); 788 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; 789 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; 790 } else { 791 mode = plane == 0 ? mi->mode : mi->uv_mode; 792 if (tx_size == TX_32X32) { 793 scan_order = &vp9_default_scan_orders[TX_32X32]; 794 } else { 795 tx_type = get_tx_type(get_plane_type(plane), xd); 796 scan_order = &vp9_scan_orders[tx_size][tx_type]; 797 } 798 } 799 800 vp9_predict_intra_block( 801 xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst, 802 (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst, 803 dst_stride, col, row, plane); 804 805 // skip block condition should be handled before this is called. 806 assert(!x->skip_block); 807 808 #if CONFIG_VP9_HIGHBITDEPTH 809 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 810 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 811 switch (tx_size) { 812 case TX_32X32: 813 if (!x->skip_recode) { 814 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src, 815 src_stride, dst, dst_stride, xd->bd); 816 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 817 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 818 p->round, p->quant, p->quant_shift, 819 qcoeff, dqcoeff, pd->dequant, eob, 820 scan_order->scan, scan_order->iscan); 821 } 822 if (args->enable_coeff_opt && !x->skip_recode) { 823 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 824 } 825 if (!x->skip_encode && *eob) { 826 vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 827 } 828 break; 829 case TX_16X16: 830 if (!x->skip_recode) { 831 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src, 832 src_stride, dst, dst_stride, xd->bd); 833 if (tx_type == DCT_DCT) 834 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 835 else 836 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); 837 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 838 p->quant, p->quant_shift, qcoeff, dqcoeff, 839 pd->dequant, eob, scan_order->scan, 840 scan_order->iscan); 841 } 842 if (args->enable_coeff_opt && !x->skip_recode) { 843 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 844 } 845 if (!x->skip_encode && *eob) { 846 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 847 xd->bd); 848 } 849 break; 850 case TX_8X8: 851 if (!x->skip_recode) { 852 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src, 853 src_stride, dst, dst_stride, xd->bd); 854 if (tx_type == DCT_DCT) 855 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 856 else 857 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); 858 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 859 p->quant, p->quant_shift, qcoeff, dqcoeff, 860 pd->dequant, eob, scan_order->scan, 861 scan_order->iscan); 862 } 863 if (args->enable_coeff_opt && !x->skip_recode) { 864 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 865 } 866 if (!x->skip_encode && *eob) { 867 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 868 xd->bd); 869 } 870 break; 871 default: 872 assert(tx_size == TX_4X4); 873 if (!x->skip_recode) { 874 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, 875 src_stride, dst, dst_stride, xd->bd); 876 if (tx_type != DCT_DCT) 877 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); 878 else 879 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 880 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 881 p->quant, p->quant_shift, qcoeff, dqcoeff, 882 pd->dequant, eob, scan_order->scan, 883 scan_order->iscan); 884 } 885 if (args->enable_coeff_opt && !x->skip_recode) { 886 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 887 } 888 if (!x->skip_encode && *eob) { 889 if (tx_type == DCT_DCT) { 890 // this is like vp9_short_idct4x4 but has a special case around 891 // eob<=1 which is significant (not just an optimization) for the 892 // lossless case. 893 x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 894 } else { 895 vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, 896 xd->bd); 897 } 898 } 899 break; 900 } 901 if (*eob) *(args->skip) = 0; 902 return; 903 } 904 #endif // CONFIG_VP9_HIGHBITDEPTH 905 906 switch (tx_size) { 907 case TX_32X32: 908 if (!x->skip_recode) { 909 vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst, 910 dst_stride); 911 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 912 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 913 p->quant, p->quant_shift, qcoeff, dqcoeff, 914 pd->dequant, eob, scan_order->scan, 915 scan_order->iscan); 916 } 917 if (args->enable_coeff_opt && !x->skip_recode) { 918 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 919 } 920 if (!x->skip_encode && *eob) 921 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); 922 break; 923 case TX_16X16: 924 if (!x->skip_recode) { 925 vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst, 926 dst_stride); 927 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); 928 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 929 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 930 scan_order->scan, scan_order->iscan); 931 } 932 if (args->enable_coeff_opt && !x->skip_recode) { 933 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 934 } 935 if (!x->skip_encode && *eob) 936 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); 937 break; 938 case TX_8X8: 939 if (!x->skip_recode) { 940 vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst, 941 dst_stride); 942 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); 943 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 944 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 945 scan_order->scan, scan_order->iscan); 946 } 947 if (args->enable_coeff_opt && !x->skip_recode) { 948 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 949 } 950 if (!x->skip_encode && *eob) 951 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); 952 break; 953 default: 954 assert(tx_size == TX_4X4); 955 if (!x->skip_recode) { 956 vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, 957 dst_stride); 958 if (tx_type != DCT_DCT) 959 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); 960 else 961 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 962 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 963 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 964 scan_order->scan, scan_order->iscan); 965 } 966 if (args->enable_coeff_opt && !x->skip_recode) { 967 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 968 } 969 if (!x->skip_encode && *eob) { 970 if (tx_type == DCT_DCT) 971 // this is like vp9_short_idct4x4 but has a special case around eob<=1 972 // which is significant (not just an optimization) for the lossless 973 // case. 974 x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob); 975 else 976 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); 977 } 978 break; 979 } 980 if (*eob) *(args->skip) = 0; 981 } 982 983 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, 984 int enable_optimize_b) { 985 const MACROBLOCKD *const xd = &x->e_mbd; 986 struct optimize_ctx ctx; 987 struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], 988 ctx.tl[plane], &xd->mi[0]->skip }; 989 990 if (enable_optimize_b && x->optimize && 991 (!x->skip_recode || !x->skip_optimize)) { 992 const struct macroblockd_plane *const pd = &xd->plane[plane]; 993 const TX_SIZE tx_size = 994 plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size; 995 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); 996 } else { 997 arg.enable_coeff_opt = 0; 998 } 999 1000 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, 1001 vp9_encode_block_intra, &arg); 1002 } 1003