1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vp9_rtcd.h" 12 #include "./vpx_config.h" 13 #include "./vpx_dsp_rtcd.h" 14 15 #include "vpx_dsp/quantize.h" 16 #include "vpx_mem/vpx_mem.h" 17 #include "vpx_ports/mem.h" 18 19 #include "vp9/common/vp9_idct.h" 20 #include "vp9/common/vp9_reconinter.h" 21 #include "vp9/common/vp9_reconintra.h" 22 #include "vp9/common/vp9_scan.h" 23 24 #include "vp9/encoder/vp9_encodemb.h" 25 #include "vp9/encoder/vp9_rd.h" 26 #include "vp9/encoder/vp9_tokenize.h" 27 28 struct optimize_ctx { 29 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; 30 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; 31 }; 32 33 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { 34 struct macroblock_plane *const p = &x->plane[plane]; 35 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; 36 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); 37 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 38 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; 39 40 #if CONFIG_VP9_HIGHBITDEPTH 41 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 42 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, 43 p->src.stride, pd->dst.buf, pd->dst.stride, 44 x->e_mbd.bd); 45 return; 46 } 47 #endif // CONFIG_VP9_HIGHBITDEPTH 48 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, 49 pd->dst.buf, pd->dst.stride); 50 } 51 52 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { 53 { 10, 6 }, { 8, 5 }, 54 }; 55 56 // 'num' can be negative, but 'shift' must be non-negative. 57 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \ 58 ((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)) 59 60 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, 61 int ctx) { 62 MACROBLOCKD *const xd = &mb->e_mbd; 63 struct macroblock_plane *const p = &mb->plane[plane]; 64 struct macroblockd_plane *const pd = &xd->plane[plane]; 65 const int ref = is_inter_block(xd->mi[0]); 66 uint8_t token_cache[1024]; 67 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 68 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 69 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 70 const int eob = p->eobs[block]; 71 const PLANE_TYPE plane_type = get_plane_type(plane); 72 const int default_eob = 16 << (tx_size << 1); 73 const int shift = (tx_size == TX_32X32); 74 const int16_t *const dequant_ptr = pd->dequant; 75 const uint8_t *const band_translate = get_band_translate(tx_size); 76 const scan_order *const so = get_scan(xd, tx_size, plane_type, block); 77 const int16_t *const scan = so->scan; 78 const int16_t *const nb = so->neighbors; 79 const int64_t rdmult = 80 ((int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1; 81 const int64_t rddiv = mb->rddiv; 82 int64_t rd_cost0, rd_cost1; 83 int64_t rate0, rate1; 84 int16_t t0, t1; 85 int i, final_eob; 86 #if CONFIG_VP9_HIGHBITDEPTH 87 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); 88 #else 89 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); 90 #endif 91 unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 92 mb->token_costs[tx_size][plane_type][ref]; 93 unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS]; 94 int64_t eob_cost0, eob_cost1; 95 const int ctx0 = ctx; 96 int64_t accu_rate = 0; 97 // Initialized to the worst possible error for the largest transform size. 98 // This ensures that it never goes negative. 99 int64_t accu_error = ((int64_t)1) << 50; 100 int64_t best_block_rd_cost = INT64_MAX; 101 int x_prev = 1; 102 tran_low_t before_best_eob_qc = 0; 103 tran_low_t before_best_eob_dqc = 0; 104 105 assert((!plane_type && !plane) || (plane_type && plane)); 106 assert(eob <= default_eob); 107 108 for (i = 0; i < eob; i++) { 109 const int rc = scan[i]; 110 token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; 111 } 112 final_eob = 0; 113 114 // Initial RD cost. 115 token_costs_cur = token_costs + band_translate[0]; 116 rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN]; 117 best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error); 118 119 // For each token, pick one of two choices greedily: 120 // (i) First candidate: Keep current quantized value, OR 121 // (ii) Second candidate: Reduce quantized value by 1. 122 for (i = 0; i < eob; i++) { 123 const int rc = scan[i]; 124 const int x = qcoeff[rc]; 125 const int band_cur = band_translate[i]; 126 const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i); 127 const int token_tree_sel_cur = (x_prev == 0); 128 token_costs_cur = token_costs + band_cur; 129 if (x == 0) { // No need to search 130 const int token = vp9_get_token(x); 131 rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token]; 132 accu_rate += rate0; 133 x_prev = 0; 134 // Note: accu_error does not change. 135 } else { 136 const int dqv = dequant_ptr[rc != 0]; 137 // Compute the distortion for quantizing to 0. 138 const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift); 139 const int diff_for_zero = 140 #if CONFIG_VP9_HIGHBITDEPTH 141 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) 142 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8) 143 : 144 #endif 145 diff_for_zero_raw; 146 const int64_t distortion_for_zero = 147 (int64_t)diff_for_zero * diff_for_zero; 148 149 // Compute the distortion for the first candidate 150 const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift); 151 const int diff0 = 152 #if CONFIG_VP9_HIGHBITDEPTH 153 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) 154 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8) 155 : 156 #endif // CONFIG_VP9_HIGHBITDEPTH 157 diff0_raw; 158 const int64_t distortion0 = (int64_t)diff0 * diff0; 159 160 // Compute the distortion for the second candidate 161 const int sign = -(x < 0); // -1 if x is negative and 0 otherwise. 162 const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1. 163 int64_t distortion1; 164 if (x1 != 0) { 165 const int dqv_step = 166 #if CONFIG_VP9_HIGHBITDEPTH 167 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8) 168 : 169 #endif // CONFIG_VP9_HIGHBITDEPTH 170 dqv; 171 const int diff_step = (dqv_step + sign) ^ sign; 172 const int diff1 = diff0 - diff_step; 173 assert(dqv > 0); // We aren't right shifting a negative number above. 174 distortion1 = (int64_t)diff1 * diff1; 175 } else { 176 distortion1 = distortion_for_zero; 177 } 178 { 179 // Calculate RDCost for current coeff for the two candidates. 180 const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost); 181 const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost); 182 rate0 = 183 base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0]; 184 rate1 = 185 base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1]; 186 } 187 { 188 int rdcost_better_for_x1, eob_rdcost_better_for_x1; 189 int dqc0, dqc1; 190 int64_t best_eob_cost_cur; 191 int use_x1; 192 193 // Calculate RD Cost effect on the next coeff for the two candidates. 194 int64_t next_bits0 = 0; 195 int64_t next_bits1 = 0; 196 int64_t next_eob_bits0 = 0; 197 int64_t next_eob_bits1 = 0; 198 if (i < default_eob - 1) { 199 int ctx_next, token_tree_sel_next; 200 const int band_next = band_translate[i + 1]; 201 const int token_next = 202 (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; 203 unsigned int( 204 *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = 205 token_costs + band_next; 206 token_cache[rc] = vp9_pt_energy_class[t0]; 207 ctx_next = get_coef_context(nb, token_cache, i + 1); 208 token_tree_sel_next = (x == 0); 209 next_bits0 = 210 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; 211 next_eob_bits0 = 212 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; 213 token_cache[rc] = vp9_pt_energy_class[t1]; 214 ctx_next = get_coef_context(nb, token_cache, i + 1); 215 token_tree_sel_next = (x1 == 0); 216 next_bits1 = 217 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; 218 if (x1 != 0) { 219 next_eob_bits1 = 220 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; 221 } 222 } 223 224 // Compare the total RD costs for two candidates. 225 rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0); 226 rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1); 227 rdcost_better_for_x1 = (rd_cost1 < rd_cost0); 228 eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0), 229 (accu_error + distortion0 - distortion_for_zero)); 230 eob_cost1 = eob_cost0; 231 if (x1 != 0) { 232 eob_cost1 = 233 RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1), 234 (accu_error + distortion1 - distortion_for_zero)); 235 eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0); 236 } else { 237 eob_rdcost_better_for_x1 = 0; 238 } 239 240 // Calculate the two candidate de-quantized values. 241 dqc0 = dqcoeff[rc]; 242 dqc1 = 0; 243 if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) { 244 if (x1 != 0) { 245 dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift); 246 } else { 247 dqc1 = 0; 248 } 249 } 250 251 // Pick and record the better quantized and de-quantized values. 252 if (rdcost_better_for_x1) { 253 qcoeff[rc] = x1; 254 dqcoeff[rc] = dqc1; 255 accu_rate += rate1; 256 accu_error += distortion1 - distortion_for_zero; 257 assert(distortion1 <= distortion_for_zero); 258 token_cache[rc] = vp9_pt_energy_class[t1]; 259 } else { 260 accu_rate += rate0; 261 accu_error += distortion0 - distortion_for_zero; 262 assert(distortion0 <= distortion_for_zero); 263 token_cache[rc] = vp9_pt_energy_class[t0]; 264 } 265 assert(accu_error >= 0); 266 x_prev = qcoeff[rc]; // Update based on selected quantized value. 267 268 use_x1 = (x1 != 0) && eob_rdcost_better_for_x1; 269 best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0; 270 271 // Determine whether to move the eob position to i+1 272 if (best_eob_cost_cur < best_block_rd_cost) { 273 best_block_rd_cost = best_eob_cost_cur; 274 final_eob = i + 1; 275 if (use_x1) { 276 before_best_eob_qc = x1; 277 before_best_eob_dqc = dqc1; 278 } else { 279 before_best_eob_qc = x; 280 before_best_eob_dqc = dqc0; 281 } 282 } 283 } 284 } 285 } 286 assert(final_eob <= eob); 287 if (final_eob > 0) { 288 int rc; 289 assert(before_best_eob_qc != 0); 290 i = final_eob - 1; 291 rc = scan[i]; 292 qcoeff[rc] = before_best_eob_qc; 293 dqcoeff[rc] = before_best_eob_dqc; 294 } 295 for (i = final_eob; i < eob; i++) { 296 int rc = scan[i]; 297 qcoeff[rc] = 0; 298 dqcoeff[rc] = 0; 299 } 300 mb->plane[plane].eobs[block] = final_eob; 301 return final_eob; 302 } 303 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE 304 305 static INLINE void fdct32x32(int rd_transform, const int16_t *src, 306 tran_low_t *dst, int src_stride) { 307 if (rd_transform) 308 vpx_fdct32x32_rd(src, dst, src_stride); 309 else 310 vpx_fdct32x32(src, dst, src_stride); 311 } 312 313 #if CONFIG_VP9_HIGHBITDEPTH 314 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, 315 tran_low_t *dst, int src_stride) { 316 if (rd_transform) 317 vpx_highbd_fdct32x32_rd(src, dst, src_stride); 318 else 319 vpx_highbd_fdct32x32(src, dst, src_stride); 320 } 321 #endif // CONFIG_VP9_HIGHBITDEPTH 322 323 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, 324 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 325 MACROBLOCKD *const xd = &x->e_mbd; 326 const struct macroblock_plane *const p = &x->plane[plane]; 327 const struct macroblockd_plane *const pd = &xd->plane[plane]; 328 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 329 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 330 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 331 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 332 uint16_t *const eob = &p->eobs[block]; 333 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 334 const int16_t *src_diff; 335 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 336 // skip block condition should be handled before this is called. 337 assert(!x->skip_block); 338 339 #if CONFIG_VP9_HIGHBITDEPTH 340 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 341 switch (tx_size) { 342 case TX_32X32: 343 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 344 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 345 p->quant_fp, qcoeff, dqcoeff, pd->dequant, 346 eob, scan_order->scan, scan_order->iscan); 347 break; 348 case TX_16X16: 349 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 350 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp, 351 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 352 scan_order->scan, scan_order->iscan); 353 break; 354 case TX_8X8: 355 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 356 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp, 357 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 358 scan_order->scan, scan_order->iscan); 359 break; 360 case TX_4X4: 361 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 362 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, 363 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 364 scan_order->scan, scan_order->iscan); 365 break; 366 default: assert(0); 367 } 368 return; 369 } 370 #endif // CONFIG_VP9_HIGHBITDEPTH 371 372 switch (tx_size) { 373 case TX_32X32: 374 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 375 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, 376 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, 377 scan_order->scan, scan_order->iscan); 378 break; 379 case TX_16X16: 380 vpx_fdct16x16(src_diff, coeff, diff_stride); 381 vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, 382 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 383 scan_order->iscan); 384 break; 385 case TX_8X8: 386 vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, 387 p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, 388 eob, scan_order->scan, scan_order->iscan); 389 break; 390 case TX_4X4: 391 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 392 vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, 393 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, 394 scan_order->iscan); 395 break; 396 default: assert(0); break; 397 } 398 } 399 400 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, 401 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 402 MACROBLOCKD *const xd = &x->e_mbd; 403 const struct macroblock_plane *const p = &x->plane[plane]; 404 const struct macroblockd_plane *const pd = &xd->plane[plane]; 405 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 406 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 407 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 408 uint16_t *const eob = &p->eobs[block]; 409 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 410 const int16_t *src_diff; 411 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 412 // skip block condition should be handled before this is called. 413 assert(!x->skip_block); 414 415 #if CONFIG_VP9_HIGHBITDEPTH 416 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 417 switch (tx_size) { 418 case TX_32X32: 419 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); 420 vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, 421 p->quant_fp[0], qcoeff, dqcoeff, 422 pd->dequant[0], eob); 423 break; 424 case TX_16X16: 425 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); 426 vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, 427 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 428 eob); 429 break; 430 case TX_8X8: 431 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); 432 vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, 433 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 434 eob); 435 break; 436 case TX_4X4: 437 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 438 vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, 439 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], 440 eob); 441 break; 442 default: assert(0); 443 } 444 return; 445 } 446 #endif // CONFIG_VP9_HIGHBITDEPTH 447 448 switch (tx_size) { 449 case TX_32X32: 450 vpx_fdct32x32_1(src_diff, coeff, diff_stride); 451 vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], 452 qcoeff, dqcoeff, pd->dequant[0], eob); 453 break; 454 case TX_16X16: 455 vpx_fdct16x16_1(src_diff, coeff, diff_stride); 456 vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], 457 qcoeff, dqcoeff, pd->dequant[0], eob); 458 break; 459 case TX_8X8: 460 vpx_fdct8x8_1(src_diff, coeff, diff_stride); 461 vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], 462 qcoeff, dqcoeff, pd->dequant[0], eob); 463 break; 464 case TX_4X4: 465 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 466 vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], 467 qcoeff, dqcoeff, pd->dequant[0], eob); 468 break; 469 default: assert(0); break; 470 } 471 } 472 473 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, 474 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { 475 MACROBLOCKD *const xd = &x->e_mbd; 476 const struct macroblock_plane *const p = &x->plane[plane]; 477 const struct macroblockd_plane *const pd = &xd->plane[plane]; 478 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; 479 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); 480 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 481 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 482 uint16_t *const eob = &p->eobs[block]; 483 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; 484 const int16_t *src_diff; 485 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 486 // skip block condition should be handled before this is called. 487 assert(!x->skip_block); 488 489 #if CONFIG_VP9_HIGHBITDEPTH 490 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 491 switch (tx_size) { 492 case TX_32X32: 493 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 494 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 495 p->round, p->quant, p->quant_shift, qcoeff, 496 dqcoeff, pd->dequant, eob, scan_order->scan, 497 scan_order->iscan); 498 break; 499 case TX_16X16: 500 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 501 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 502 p->quant, p->quant_shift, qcoeff, dqcoeff, 503 pd->dequant, eob, scan_order->scan, 504 scan_order->iscan); 505 break; 506 case TX_8X8: 507 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 508 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 509 p->quant, p->quant_shift, qcoeff, dqcoeff, 510 pd->dequant, eob, scan_order->scan, 511 scan_order->iscan); 512 break; 513 case TX_4X4: 514 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 515 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 516 p->quant, p->quant_shift, qcoeff, dqcoeff, 517 pd->dequant, eob, scan_order->scan, 518 scan_order->iscan); 519 break; 520 default: assert(0); 521 } 522 return; 523 } 524 #endif // CONFIG_VP9_HIGHBITDEPTH 525 526 switch (tx_size) { 527 case TX_32X32: 528 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 529 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 530 p->quant, p->quant_shift, qcoeff, dqcoeff, 531 pd->dequant, eob, scan_order->scan, 532 scan_order->iscan); 533 break; 534 case TX_16X16: 535 vpx_fdct16x16(src_diff, coeff, diff_stride); 536 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 537 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 538 scan_order->scan, scan_order->iscan); 539 break; 540 case TX_8X8: 541 vpx_fdct8x8(src_diff, coeff, diff_stride); 542 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 543 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 544 scan_order->scan, scan_order->iscan); 545 break; 546 case TX_4X4: 547 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 548 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 549 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 550 scan_order->scan, scan_order->iscan); 551 break; 552 default: assert(0); break; 553 } 554 } 555 556 static void encode_block(int plane, int block, int row, int col, 557 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { 558 struct encode_b_args *const args = arg; 559 MACROBLOCK *const x = args->x; 560 MACROBLOCKD *const xd = &x->e_mbd; 561 struct macroblock_plane *const p = &x->plane[plane]; 562 struct macroblockd_plane *const pd = &xd->plane[plane]; 563 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 564 uint8_t *dst; 565 ENTROPY_CONTEXT *a, *l; 566 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 567 a = &args->ta[col]; 568 l = &args->tl[row]; 569 570 // TODO(jingning): per transformed block zero forcing only enabled for 571 // luma component. will integrate chroma components as well. 572 if (x->zcoeff_blk[tx_size][block] && plane == 0) { 573 p->eobs[block] = 0; 574 *a = *l = 0; 575 return; 576 } 577 578 if (!x->skip_recode) { 579 if (x->quant_fp) { 580 // Encoding process for rtc mode 581 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { 582 // skip forward transform 583 p->eobs[block] = 0; 584 *a = *l = 0; 585 return; 586 } else { 587 vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); 588 } 589 } else { 590 if (max_txsize_lookup[plane_bsize] == tx_size) { 591 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); 592 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { 593 // full forward transform and quantization 594 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 595 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { 596 // fast path forward transform and quantization 597 vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); 598 } else { 599 // skip forward transform 600 p->eobs[block] = 0; 601 *a = *l = 0; 602 return; 603 } 604 } else { 605 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 606 } 607 } 608 } 609 610 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 611 const int ctx = combine_entropy_contexts(*a, *l); 612 *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0; 613 } else { 614 *a = *l = p->eobs[block] > 0; 615 } 616 617 if (p->eobs[block]) *(args->skip) = 0; 618 619 if (x->skip_encode || p->eobs[block] == 0) return; 620 #if CONFIG_VP9_HIGHBITDEPTH 621 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 622 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 623 switch (tx_size) { 624 case TX_32X32: 625 vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 626 xd->bd); 627 break; 628 case TX_16X16: 629 vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 630 xd->bd); 631 break; 632 case TX_8X8: 633 vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 634 xd->bd); 635 break; 636 case TX_4X4: 637 // this is like vp9_short_idct4x4 but has a special case around eob<=1 638 // which is significant (not just an optimization) for the lossless 639 // case. 640 x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], 641 xd->bd); 642 break; 643 default: assert(0 && "Invalid transform size"); 644 } 645 return; 646 } 647 #endif // CONFIG_VP9_HIGHBITDEPTH 648 649 switch (tx_size) { 650 case TX_32X32: 651 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 652 break; 653 case TX_16X16: 654 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 655 break; 656 case TX_8X8: 657 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 658 break; 659 case TX_4X4: 660 // this is like vp9_short_idct4x4 but has a special case around eob<=1 661 // which is significant (not just an optimization) for the lossless 662 // case. 663 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 664 break; 665 default: assert(0 && "Invalid transform size"); break; 666 } 667 } 668 669 static void encode_block_pass1(int plane, int block, int row, int col, 670 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 671 void *arg) { 672 MACROBLOCK *const x = (MACROBLOCK *)arg; 673 MACROBLOCKD *const xd = &x->e_mbd; 674 struct macroblock_plane *const p = &x->plane[plane]; 675 struct macroblockd_plane *const pd = &xd->plane[plane]; 676 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 677 uint8_t *dst; 678 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; 679 680 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); 681 682 if (p->eobs[block] > 0) { 683 #if CONFIG_VP9_HIGHBITDEPTH 684 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 685 x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, 686 p->eobs[block], xd->bd); 687 return; 688 } 689 #endif // CONFIG_VP9_HIGHBITDEPTH 690 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); 691 } 692 } 693 694 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { 695 vp9_subtract_plane(x, bsize, 0); 696 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, 697 encode_block_pass1, x); 698 } 699 700 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { 701 MACROBLOCKD *const xd = &x->e_mbd; 702 struct optimize_ctx ctx; 703 MODE_INFO *mi = xd->mi[0]; 704 struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; 705 int plane; 706 707 mi->skip = 1; 708 709 if (x->skip) return; 710 711 for (plane = 0; plane < MAX_MB_PLANE; ++plane) { 712 if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane); 713 714 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { 715 const struct macroblockd_plane *const pd = &xd->plane[plane]; 716 const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; 717 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], 718 ctx.tl[plane]); 719 arg.enable_coeff_opt = 1; 720 } else { 721 arg.enable_coeff_opt = 0; 722 } 723 arg.ta = ctx.ta[plane]; 724 arg.tl = ctx.tl[plane]; 725 726 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, 727 &arg); 728 } 729 } 730 731 void vp9_encode_block_intra(int plane, int block, int row, int col, 732 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 733 void *arg) { 734 struct encode_b_args *const args = arg; 735 MACROBLOCK *const x = args->x; 736 MACROBLOCKD *const xd = &x->e_mbd; 737 MODE_INFO *mi = xd->mi[0]; 738 struct macroblock_plane *const p = &x->plane[plane]; 739 struct macroblockd_plane *const pd = &xd->plane[plane]; 740 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); 741 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 742 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 743 const scan_order *scan_order; 744 TX_TYPE tx_type = DCT_DCT; 745 PREDICTION_MODE mode; 746 const int bwl = b_width_log2_lookup[plane_bsize]; 747 const int diff_stride = 4 * (1 << bwl); 748 uint8_t *src, *dst; 749 int16_t *src_diff; 750 uint16_t *eob = &p->eobs[block]; 751 const int src_stride = p->src.stride; 752 const int dst_stride = pd->dst.stride; 753 ENTROPY_CONTEXT *a = NULL; 754 ENTROPY_CONTEXT *l = NULL; 755 int entropy_ctx = 0; 756 dst = &pd->dst.buf[4 * (row * dst_stride + col)]; 757 src = &p->src.buf[4 * (row * src_stride + col)]; 758 src_diff = &p->src_diff[4 * (row * diff_stride + col)]; 759 if (args->enable_coeff_opt) { 760 a = &args->ta[col]; 761 l = &args->tl[row]; 762 entropy_ctx = combine_entropy_contexts(*a, *l); 763 } 764 765 if (tx_size == TX_4X4) { 766 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); 767 scan_order = &vp9_scan_orders[TX_4X4][tx_type]; 768 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; 769 } else { 770 mode = plane == 0 ? mi->mode : mi->uv_mode; 771 if (tx_size == TX_32X32) { 772 scan_order = &vp9_default_scan_orders[TX_32X32]; 773 } else { 774 tx_type = get_tx_type(get_plane_type(plane), xd); 775 scan_order = &vp9_scan_orders[tx_size][tx_type]; 776 } 777 } 778 779 vp9_predict_intra_block( 780 xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst, 781 (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst, 782 dst_stride, col, row, plane); 783 784 // skip block condition should be handled before this is called. 785 assert(!x->skip_block); 786 787 #if CONFIG_VP9_HIGHBITDEPTH 788 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 789 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); 790 switch (tx_size) { 791 case TX_32X32: 792 if (!x->skip_recode) { 793 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src, 794 src_stride, dst, dst_stride, xd->bd); 795 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 796 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, 797 p->round, p->quant, p->quant_shift, 798 qcoeff, dqcoeff, pd->dequant, eob, 799 scan_order->scan, scan_order->iscan); 800 } 801 if (args->enable_coeff_opt && !x->skip_recode) { 802 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 803 } 804 if (!x->skip_encode && *eob) { 805 vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 806 } 807 break; 808 case TX_16X16: 809 if (!x->skip_recode) { 810 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src, 811 src_stride, dst, dst_stride, xd->bd); 812 if (tx_type == DCT_DCT) 813 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); 814 else 815 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); 816 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, 817 p->quant, p->quant_shift, qcoeff, dqcoeff, 818 pd->dequant, eob, scan_order->scan, 819 scan_order->iscan); 820 } 821 if (args->enable_coeff_opt && !x->skip_recode) { 822 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 823 } 824 if (!x->skip_encode && *eob) { 825 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 826 xd->bd); 827 } 828 break; 829 case TX_8X8: 830 if (!x->skip_recode) { 831 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src, 832 src_stride, dst, dst_stride, xd->bd); 833 if (tx_type == DCT_DCT) 834 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); 835 else 836 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); 837 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, 838 p->quant, p->quant_shift, qcoeff, dqcoeff, 839 pd->dequant, eob, scan_order->scan, 840 scan_order->iscan); 841 } 842 if (args->enable_coeff_opt && !x->skip_recode) { 843 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 844 } 845 if (!x->skip_encode && *eob) { 846 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, 847 xd->bd); 848 } 849 break; 850 case TX_4X4: 851 if (!x->skip_recode) { 852 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, 853 src_stride, dst, dst_stride, xd->bd); 854 if (tx_type != DCT_DCT) 855 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); 856 else 857 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 858 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, 859 p->quant, p->quant_shift, qcoeff, dqcoeff, 860 pd->dequant, eob, scan_order->scan, 861 scan_order->iscan); 862 } 863 if (args->enable_coeff_opt && !x->skip_recode) { 864 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 865 } 866 if (!x->skip_encode && *eob) { 867 if (tx_type == DCT_DCT) { 868 // this is like vp9_short_idct4x4 but has a special case around 869 // eob<=1 which is significant (not just an optimization) for the 870 // lossless case. 871 x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); 872 } else { 873 vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, 874 xd->bd); 875 } 876 } 877 break; 878 default: assert(0); return; 879 } 880 if (*eob) *(args->skip) = 0; 881 return; 882 } 883 #endif // CONFIG_VP9_HIGHBITDEPTH 884 885 switch (tx_size) { 886 case TX_32X32: 887 if (!x->skip_recode) { 888 vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst, 889 dst_stride); 890 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); 891 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, 892 p->quant, p->quant_shift, qcoeff, dqcoeff, 893 pd->dequant, eob, scan_order->scan, 894 scan_order->iscan); 895 } 896 if (args->enable_coeff_opt && !x->skip_recode) { 897 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 898 } 899 if (!x->skip_encode && *eob) 900 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); 901 break; 902 case TX_16X16: 903 if (!x->skip_recode) { 904 vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst, 905 dst_stride); 906 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); 907 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, 908 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 909 scan_order->scan, scan_order->iscan); 910 } 911 if (args->enable_coeff_opt && !x->skip_recode) { 912 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 913 } 914 if (!x->skip_encode && *eob) 915 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); 916 break; 917 case TX_8X8: 918 if (!x->skip_recode) { 919 vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst, 920 dst_stride); 921 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); 922 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, 923 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 924 scan_order->scan, scan_order->iscan); 925 } 926 if (args->enable_coeff_opt && !x->skip_recode) { 927 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 928 } 929 if (!x->skip_encode && *eob) 930 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); 931 break; 932 case TX_4X4: 933 if (!x->skip_recode) { 934 vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, 935 dst_stride); 936 if (tx_type != DCT_DCT) 937 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); 938 else 939 x->fwd_txfm4x4(src_diff, coeff, diff_stride); 940 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, 941 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, 942 scan_order->scan, scan_order->iscan); 943 } 944 if (args->enable_coeff_opt && !x->skip_recode) { 945 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; 946 } 947 if (!x->skip_encode && *eob) { 948 if (tx_type == DCT_DCT) 949 // this is like vp9_short_idct4x4 but has a special case around eob<=1 950 // which is significant (not just an optimization) for the lossless 951 // case. 952 x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob); 953 else 954 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); 955 } 956 break; 957 default: assert(0); break; 958 } 959 if (*eob) *(args->skip) = 0; 960 } 961 962 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, 963 int enable_optimize_b) { 964 const MACROBLOCKD *const xd = &x->e_mbd; 965 struct optimize_ctx ctx; 966 struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], 967 ctx.tl[plane], &xd->mi[0]->skip }; 968 969 if (enable_optimize_b && x->optimize && 970 (!x->skip_recode || !x->skip_optimize)) { 971 const struct macroblockd_plane *const pd = &xd->plane[plane]; 972 const TX_SIZE tx_size = 973 plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size; 974 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); 975 } else { 976 arg.enable_coeff_opt = 0; 977 } 978 979 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, 980 vp9_encode_block_intra, &arg); 981 } 982