1 /* 2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "av1/encoder/encodetxb.h" 13 14 #include "aom_ports/mem.h" 15 #include "av1/common/blockd.h" 16 #include "av1/common/idct.h" 17 #include "av1/common/pred_common.h" 18 #include "av1/common/scan.h" 19 #include "av1/encoder/bitstream.h" 20 #include "av1/encoder/cost.h" 21 #include "av1/encoder/encodeframe.h" 22 #include "av1/encoder/hash.h" 23 #include "av1/encoder/rdopt.h" 24 #include "av1/encoder/tokenize.h" 25 26 static int hbt_needs_init = 1; 27 static CRC32C crc_calculator; 28 static const int HBT_EOB = 16; // also the length in opt_qcoeff 29 static const int HBT_TABLE_SIZE = 65536; // 16 bit: holds 65536 'arrays' 30 static const int HBT_ARRAY_LENGTH = 256; // 8 bit: 256 entries 31 // If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type 32 static const int HBT_KICKOUT = 3; 33 34 typedef struct OptTxbQcoeff { 35 // Use larger type if larger/no kickout value is used in hbt_create_hashes 36 int8_t deltas[16]; 37 uint32_t hbt_qc_hash; 38 uint32_t hbt_ctx_hash; 39 int init; 40 int rate_cost; 41 } OptTxbQcoeff; 42 43 OptTxbQcoeff *hbt_hash_table; 44 45 typedef struct LevelDownStats { 46 int update; 47 tran_low_t low_qc; 48 tran_low_t low_dqc; 49 int64_t dist0; 50 int rate; 51 int rate_low; 52 int64_t dist; 53 int64_t dist_low; 54 int64_t rd; 55 int64_t rd_low; 56 int64_t nz_rd; 57 int64_t rd_diff; 58 int cost_diff; 59 int64_t dist_diff; 60 int new_eob; 61 } LevelDownStats; 62 63 void av1_alloc_txb_buf(AV1_COMP *cpi) { 64 AV1_COMMON *cm = &cpi->common; 65 int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) * 66 ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1); 67 68 av1_free_txb_buf(cpi); 69 // TODO(jingning): This should be further reduced. 70 CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base, 71 aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size)); 72 } 73 74 void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); } 75 76 void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x, 77 int mi_row, int mi_col) { 78 const AV1_COMMON *const cm = &cpi->common; 79 int mib_size_log2 = cm->seq_params.mib_size_log2; 80 int stride = (cm->mi_cols >> mib_size_log2) + 1; 81 int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2); 82 x->mbmi_ext->cb_coef_buff = &cpi->coeff_buffer_base[offset]; 83 x->mbmi_ext->cb_offset = x->cb_offset; 84 assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size])); 85 } 86 87 static void write_golomb(aom_writer *w, int level) { 88 int x = level + 1; 89 int i = x; 90 int length = 0; 91 92 while (i) { 93 i >>= 1; 94 ++length; 95 } 96 assert(length > 0); 97 98 for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0); 99 100 for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01); 101 } 102 103 static INLINE tran_low_t get_lower_coeff(tran_low_t qc) { 104 if (qc == 0) { 105 return 0; 106 } 107 return qc > 0 ? qc - 1 : qc + 1; 108 } 109 110 static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx, 111 int dqv, int shift, 112 const qm_val_t *iqmatrix) { 113 int sign = qc < 0 ? -1 : 1; 114 if (iqmatrix != NULL) 115 dqv = 116 ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; 117 return sign * ((abs(qc) * dqv) >> shift); 118 } 119 120 static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff, 121 int shift) { 122 const int64_t diff = (tcoeff - dqcoeff) * (1 << shift); 123 const int64_t error = diff * diff; 124 return error; 125 } 126 127 static const int8_t eob_to_pos_small[33] = { 128 0, 1, 2, // 0-2 129 3, 3, // 3-4 130 4, 4, 4, 4, // 5-8 131 5, 5, 5, 5, 5, 5, 5, 5, // 9-16 132 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32 133 }; 134 135 static const int8_t eob_to_pos_large[17] = { 136 6, // place holder 137 7, // 33-64 138 8, 8, // 65-128 139 9, 9, 9, 9, // 129-256 140 10, 10, 10, 10, 10, 10, 10, 10, // 257-512 141 11 // 513- 142 }; 143 144 static INLINE int get_eob_pos_token(const int eob, int *const extra) { 145 int t; 146 147 if (eob < 33) { 148 t = eob_to_pos_small[eob]; 149 } else { 150 const int e = AOMMIN((eob - 1) >> 5, 16); 151 t = eob_to_pos_large[e]; 152 } 153 154 *extra = eob - k_eob_group_start[t]; 155 156 return t; 157 } 158 159 #if CONFIG_ENTROPY_STATS 160 void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size, 161 TX_CLASS tx_class, PLANE_TYPE plane, 162 FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts, 163 uint8_t allow_update_cdf) { 164 #else 165 void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class, 166 PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx, 167 uint8_t allow_update_cdf) { 168 #endif 169 int eob_extra; 170 const int eob_pt = get_eob_pos_token(eob, &eob_extra); 171 TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 172 173 const int eob_multi_size = txsize_log2_minus4[tx_size]; 174 const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; 175 176 switch (eob_multi_size) { 177 case 0: 178 #if CONFIG_ENTROPY_STATS 179 ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 180 #endif 181 if (allow_update_cdf) 182 update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5); 183 break; 184 case 1: 185 #if CONFIG_ENTROPY_STATS 186 ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 187 #endif 188 if (allow_update_cdf) 189 update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6); 190 break; 191 case 2: 192 #if CONFIG_ENTROPY_STATS 193 ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 194 #endif 195 if (allow_update_cdf) 196 update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7); 197 break; 198 case 3: 199 #if CONFIG_ENTROPY_STATS 200 ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 201 #endif 202 if (allow_update_cdf) { 203 update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 204 8); 205 } 206 break; 207 case 4: 208 #if CONFIG_ENTROPY_STATS 209 ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 210 #endif 211 if (allow_update_cdf) { 212 update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 213 9); 214 } 215 break; 216 case 5: 217 #if CONFIG_ENTROPY_STATS 218 ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 219 #endif 220 if (allow_update_cdf) { 221 update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 222 10); 223 } 224 break; 225 case 6: 226 default: 227 #if CONFIG_ENTROPY_STATS 228 ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; 229 #endif 230 if (allow_update_cdf) { 231 update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 232 11); 233 } 234 break; 235 } 236 237 if (k_eob_offset_bits[eob_pt] > 0) { 238 int eob_ctx = eob_pt - 3; 239 int eob_shift = k_eob_offset_bits[eob_pt] - 1; 240 int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; 241 #if CONFIG_ENTROPY_STATS 242 counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++; 243 #endif // CONFIG_ENTROPY_STATS 244 if (allow_update_cdf) 245 update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2); 246 } 247 } 248 249 static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs, 250 const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) { 251 int eob_extra; 252 const int eob_pt = get_eob_pos_token(eob, &eob_extra); 253 int eob_cost = 0; 254 const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; 255 eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; 256 257 if (k_eob_offset_bits[eob_pt] > 0) { 258 const int eob_ctx = eob_pt - 3; 259 const int eob_shift = k_eob_offset_bits[eob_pt] - 1; 260 const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; 261 eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit]; 262 const int offset_bits = k_eob_offset_bits[eob_pt]; 263 if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1); 264 } 265 return eob_cost; 266 } 267 268 static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx, 269 const int (*dc_sign_cost)[2], 270 int dc_sign_ctx) { 271 if (coeff_idx == 0) { 272 const int sign = (qc < 0) ? 1 : 0; 273 return dc_sign_cost[dc_sign_ctx][sign]; 274 } 275 return av1_cost_literal(1); 276 } 277 278 static const int golomb_bits_cost[32] = { 279 0, 512, 512 * 3, 512 * 3, 512 * 5, 512 * 5, 512 * 5, 512 * 5, 280 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 512 * 7, 281 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 282 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9, 512 * 9 283 }; 284 static const int golomb_cost_diff[32] = { 285 0, 512, 512 * 2, 0, 512 * 2, 0, 0, 0, 512 * 2, 0, 0, 0, 0, 0, 0, 0, 286 512 * 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 287 }; 288 289 static INLINE int get_golomb_cost(int abs_qc) { 290 if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { 291 const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; 292 const int length = get_msb(r) + 1; 293 return av1_cost_literal(2 * length - 1); 294 } 295 return 0; 296 } 297 298 static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps, 299 int *diff) { 300 const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); 301 int golomb_bits = 0; 302 if (level <= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) 303 *diff += coeff_lps[base_range + COEFF_BASE_RANGE + 1]; 304 305 if (level >= COEFF_BASE_RANGE + 1 + NUM_BASE_LEVELS) { 306 int r = level - COEFF_BASE_RANGE - NUM_BASE_LEVELS; 307 if (r < 32) { 308 golomb_bits = golomb_bits_cost[r]; 309 *diff += golomb_cost_diff[r]; 310 } else { 311 golomb_bits = get_golomb_cost(level); 312 *diff += (r & (r - 1)) == 0 ? 1024 : 0; 313 } 314 } 315 316 return coeff_lps[base_range] + golomb_bits; 317 } 318 319 static INLINE int get_br_cost(tran_low_t level, const int *coeff_lps) { 320 const int base_range = AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); 321 return coeff_lps[base_range] + get_golomb_cost(level); 322 } 323 324 static int get_coeff_cost(const tran_low_t qc, const int scan_idx, 325 const int is_eob, const TxbInfo *const txb_info, 326 const LV_MAP_COEFF_COST *const txb_costs, 327 const int coeff_ctx, const TX_CLASS tx_class) { 328 const TXB_CTX *const txb_ctx = txb_info->txb_ctx; 329 const int is_nz = (qc != 0); 330 const tran_low_t abs_qc = abs(qc); 331 int cost = 0; 332 const int16_t *const scan = txb_info->scan_order->scan; 333 const int pos = scan[scan_idx]; 334 335 if (is_eob) { 336 cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; 337 } else { 338 cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; 339 } 340 if (is_nz) { 341 cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost, 342 txb_ctx->dc_sign_ctx); 343 344 if (abs_qc > NUM_BASE_LEVELS) { 345 const int ctx = 346 get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class); 347 cost += get_br_cost(abs_qc, txb_costs->lps_cost[ctx]); 348 } 349 } 350 return cost; 351 } 352 353 static INLINE int get_nz_map_ctx(const uint8_t *const levels, 354 const int coeff_idx, const int bwl, 355 const int height, const int scan_idx, 356 const int is_eob, const TX_SIZE tx_size, 357 const TX_CLASS tx_class) { 358 if (is_eob) { 359 if (scan_idx == 0) return 0; 360 if (scan_idx <= (height << bwl) / 8) return 1; 361 if (scan_idx <= (height << bwl) / 4) return 2; 362 return 3; 363 } 364 const int stats = 365 get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); 366 return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class); 367 } 368 369 static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx, 370 const int is_eob, 371 const LV_MAP_COEFF_COST *const txb_costs, 372 const TxbInfo *const txb_info, 373 const TX_CLASS tx_class) { 374 const int16_t *const scan = txb_info->scan_order->scan; 375 const int coeff_idx = scan[scan_idx]; 376 const tran_low_t qc = txb_info->qcoeff[coeff_idx]; 377 const uint8_t *const levels = txb_info->levels; 378 stats->new_eob = -1; 379 stats->update = 0; 380 stats->rd_low = 0; 381 stats->rd = 0; 382 stats->nz_rd = 0; 383 stats->dist_low = 0; 384 stats->rate_low = 0; 385 stats->low_qc = 0; 386 387 const tran_low_t tqc = txb_info->tcoeff[coeff_idx]; 388 const int dqv = txb_info->dequant[coeff_idx != 0]; 389 const int coeff_ctx = 390 get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height, 391 scan_idx, is_eob, txb_info->tx_size, tx_class); 392 const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs, 393 coeff_ctx, tx_class); 394 assert(qc != 0); 395 const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift, 396 txb_info->iqmatrix); 397 const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift); 398 399 // distortion difference when coefficient is quantized to 0 400 const tran_low_t dqc0 = 401 qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix); 402 403 stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift); 404 stats->dist = dqc_dist - stats->dist0; 405 stats->rate = qc_cost; 406 407 stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist); 408 409 stats->low_qc = get_lower_coeff(qc); 410 411 if (is_eob && stats->low_qc == 0) { 412 stats->rd_low = stats->rd; // disable selection of low_qc in this case. 413 } else { 414 if (stats->low_qc == 0) { 415 stats->dist_low = 0; 416 } else { 417 stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv, 418 txb_info->shift, txb_info->iqmatrix); 419 const int64_t low_dqc_dist = 420 get_coeff_dist(tqc, stats->low_dqc, txb_info->shift); 421 stats->dist_low = low_dqc_dist - stats->dist0; 422 } 423 const int low_qc_cost = 424 get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs, 425 coeff_ctx, tx_class); 426 stats->rate_low = low_qc_cost; 427 stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low); 428 } 429 } 430 431 static void get_dist_cost_stats_with_eob( 432 LevelDownStats *const stats, const int scan_idx, 433 const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info, 434 const TX_CLASS tx_class) { 435 const int is_eob = 0; 436 get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class); 437 438 const int16_t *const scan = txb_info->scan_order->scan; 439 const int coeff_idx = scan[scan_idx]; 440 const tran_low_t qc = txb_info->qcoeff[coeff_idx]; 441 const int coeff_ctx_temp = get_nz_map_ctx( 442 txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1, 443 txb_info->tx_size, tx_class); 444 const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs, 445 coeff_ctx_temp, tx_class); 446 int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist); 447 if (stats->low_qc != 0) { 448 const int low_qc_eob_cost = 449 get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs, 450 coeff_ctx_temp, tx_class); 451 int64_t rd_eob_low = 452 RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low); 453 rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob; 454 } 455 456 stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob; 457 } 458 459 static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc, 460 const TxbInfo *const txb_info) { 461 txb_info->qcoeff[coeff_idx] = qc; 462 txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] = 463 (uint8_t)clamp(abs(qc), 0, INT8_MAX); 464 } 465 466 static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc, 467 const TxbInfo *const txb_info) { 468 update_qcoeff(coeff_idx, qc, txb_info); 469 const int dqv = txb_info->dequant[coeff_idx != 0]; 470 txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff( 471 qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix); 472 } 473 474 void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width, 475 const int height, uint8_t *const levels) { 476 const int stride = width + TX_PAD_HOR; 477 uint8_t *ls = levels; 478 479 memset(levels + stride * height, 0, 480 sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END)); 481 482 for (int i = 0; i < height; i++) { 483 for (int j = 0; j < width; j++) { 484 *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX); 485 } 486 for (int j = 0; j < TX_PAD_HOR; j++) { 487 *ls++ = 0; 488 } 489 } 490 } 491 492 void av1_get_nz_map_contexts_c(const uint8_t *const levels, 493 const int16_t *const scan, const uint16_t eob, 494 const TX_SIZE tx_size, const TX_CLASS tx_class, 495 int8_t *const coeff_contexts) { 496 const int bwl = get_txb_bwl(tx_size); 497 const int height = get_txb_high(tx_size); 498 for (int i = 0; i < eob; ++i) { 499 const int pos = scan[i]; 500 coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i, 501 i == eob - 1, tx_size, tx_class); 502 } 503 } 504 505 void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, 506 aom_writer *w, int blk_row, int blk_col, int plane, 507 TX_SIZE tx_size, const tran_low_t *tcoeff, 508 uint16_t eob, TXB_CTX *txb_ctx) { 509 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 510 FRAME_CONTEXT *ec_ctx = xd->tile_ctx; 511 aom_write_symbol(w, eob == 0, 512 ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2); 513 if (eob == 0) return; 514 const PLANE_TYPE plane_type = get_plane_type(plane); 515 const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, 516 tx_size, cm->reduced_tx_set_used); 517 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 518 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 519 const int16_t *const scan = scan_order->scan; 520 int c; 521 const int bwl = get_txb_bwl(tx_size); 522 const int width = get_txb_wide(tx_size); 523 const int height = get_txb_high(tx_size); 524 525 uint8_t levels_buf[TX_PAD_2D]; 526 uint8_t *const levels = set_levels(levels_buf, width); 527 DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); 528 av1_txb_init_levels(tcoeff, width, height, levels); 529 530 av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w); 531 532 int eob_extra; 533 const int eob_pt = get_eob_pos_token(eob, &eob_extra); 534 const int eob_multi_size = txsize_log2_minus4[tx_size]; 535 const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; 536 switch (eob_multi_size) { 537 case 0: 538 aom_write_symbol(w, eob_pt - 1, 539 ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5); 540 break; 541 case 1: 542 aom_write_symbol(w, eob_pt - 1, 543 ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6); 544 break; 545 case 2: 546 aom_write_symbol(w, eob_pt - 1, 547 ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7); 548 break; 549 case 3: 550 aom_write_symbol(w, eob_pt - 1, 551 ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8); 552 break; 553 case 4: 554 aom_write_symbol(w, eob_pt - 1, 555 ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9); 556 break; 557 case 5: 558 aom_write_symbol(w, eob_pt - 1, 559 ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10); 560 break; 561 default: 562 aom_write_symbol(w, eob_pt - 1, 563 ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11); 564 break; 565 } 566 567 const int eob_offset_bits = k_eob_offset_bits[eob_pt]; 568 if (eob_offset_bits > 0) { 569 const int eob_ctx = eob_pt - 3; 570 int eob_shift = eob_offset_bits - 1; 571 int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; 572 aom_write_symbol(w, bit, 573 ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2); 574 for (int i = 1; i < eob_offset_bits; i++) { 575 eob_shift = eob_offset_bits - 1 - i; 576 bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; 577 aom_write_bit(w, bit); 578 } 579 } 580 581 av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); 582 583 for (c = eob - 1; c >= 0; --c) { 584 const int pos = scan[c]; 585 const int coeff_ctx = coeff_contexts[pos]; 586 const tran_low_t v = tcoeff[pos]; 587 const tran_low_t level = abs(v); 588 589 if (c == eob - 1) { 590 aom_write_symbol( 591 w, AOMMIN(level, 3) - 1, 592 ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3); 593 } else { 594 aom_write_symbol(w, AOMMIN(level, 3), 595 ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], 596 4); 597 } 598 if (level > NUM_BASE_LEVELS) { 599 // level is above 1. 600 const int base_range = level - 1 - NUM_BASE_LEVELS; 601 const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); 602 aom_cdf_prob *cdf = 603 ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx]; 604 for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { 605 const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); 606 aom_write_symbol(w, k, cdf, BR_CDF_SIZE); 607 if (k < BR_CDF_SIZE - 1) break; 608 } 609 } 610 } 611 612 // Loop to code all signs in the transform block, 613 // starting with the sign of DC (if applicable) 614 for (c = 0; c < eob; ++c) { 615 const tran_low_t v = tcoeff[scan[c]]; 616 const tran_low_t level = abs(v); 617 const int sign = (v < 0) ? 1 : 0; 618 if (level) { 619 if (c == 0) { 620 aom_write_symbol( 621 w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2); 622 } else { 623 aom_write_bit(w, sign); 624 } 625 if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) 626 write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS); 627 } 628 } 629 } 630 631 typedef struct encode_txb_args { 632 const AV1_COMMON *cm; 633 MACROBLOCK *x; 634 aom_writer *w; 635 } ENCODE_TXB_ARGS; 636 637 static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x, 638 aom_writer *w, int plane, int block, 639 int blk_row, int blk_col, TX_SIZE tx_size) { 640 MACROBLOCKD *xd = &x->e_mbd; 641 const int txb_offset = 642 x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); 643 tran_low_t *tcoeff_txb = 644 x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset; 645 uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset; 646 uint8_t *txb_skip_ctx_txb = 647 x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset; 648 int *dc_sign_ctx_txb = 649 x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset; 650 tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block); 651 uint16_t eob = eob_txb[block]; 652 TXB_CTX txb_ctx = { txb_skip_ctx_txb[block], dc_sign_ctx_txb[block] }; 653 av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob, 654 &txb_ctx); 655 } 656 657 void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, 658 int mi_col, aom_writer *w, BLOCK_SIZE bsize) { 659 MACROBLOCKD *xd = &x->e_mbd; 660 const int num_planes = av1_num_planes(cm); 661 int block[MAX_MB_PLANE] = { 0 }; 662 int row, col; 663 assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x, 664 xd->plane[0].subsampling_y)); 665 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 666 const int max_blocks_high = max_block_high(xd, bsize, 0); 667 const BLOCK_SIZE max_unit_bsize = BLOCK_64X64; 668 int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; 669 int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; 670 mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide); 671 mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high); 672 673 for (row = 0; row < max_blocks_high; row += mu_blocks_high) { 674 for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) { 675 for (int plane = 0; plane < num_planes; ++plane) { 676 const struct macroblockd_plane *const pd = &xd->plane[plane]; 677 if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, 678 pd->subsampling_y)) 679 continue; 680 const TX_SIZE tx_size = av1_get_tx_size(plane, xd); 681 const int stepr = tx_size_high_unit[tx_size]; 682 const int stepc = tx_size_wide_unit[tx_size]; 683 const int step = stepr * stepc; 684 685 const int unit_height = ROUND_POWER_OF_TWO( 686 AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y); 687 const int unit_width = ROUND_POWER_OF_TWO( 688 AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x); 689 for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height; 690 blk_row += stepr) { 691 for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width; 692 blk_col += stepc) { 693 write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row, 694 blk_col, tx_size); 695 block[plane] += step; 696 } 697 } 698 } 699 } 700 } 701 } 702 703 // TODO(angiebird): use this function whenever it's possible 704 static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, 705 const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, 706 TX_TYPE tx_type) { 707 if (plane > 0) return 0; 708 709 const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; 710 711 const MB_MODE_INFO *mbmi = xd->mi[0]; 712 const int is_inter = is_inter_block(mbmi); 713 if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && 714 !xd->lossless[xd->mi[0]->segment_id]) { 715 const int ext_tx_set = 716 get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); 717 if (is_inter) { 718 if (ext_tx_set > 0) 719 return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type]; 720 } else { 721 if (ext_tx_set > 0) { 722 PREDICTION_MODE intra_dir; 723 if (mbmi->filter_intra_mode_info.use_filter_intra) 724 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info 725 .filter_intra_mode]; 726 else 727 intra_dir = mbmi->mode; 728 return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir] 729 [tx_type]; 730 } 731 } 732 } 733 return 0; 734 } 735 736 static AOM_FORCE_INLINE int warehouse_efficients_txb( 737 const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane, 738 const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, 739 const struct macroblock_plane *p, const int eob, 740 const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, 741 const MACROBLOCKD *const xd, const TX_TYPE tx_type, 742 const TX_CLASS tx_class) { 743 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); 744 const int txb_skip_ctx = txb_ctx->txb_skip_ctx; 745 const int bwl = get_txb_bwl(tx_size); 746 const int width = get_txb_wide(tx_size); 747 const int height = get_txb_high(tx_size); 748 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 749 const int16_t *const scan = scan_order->scan; 750 uint8_t levels_buf[TX_PAD_2D]; 751 uint8_t *const levels = set_levels(levels_buf, width); 752 DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); 753 const int eob_multi_size = txsize_log2_minus4[tx_size]; 754 const LV_MAP_EOB_COST *const eob_costs = 755 &x->eob_costs[eob_multi_size][plane_type]; 756 int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; 757 758 av1_txb_init_levels(qcoeff, width, height, levels); 759 760 cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); 761 762 cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); 763 764 av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); 765 766 const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = 767 coeff_costs->lps_cost; 768 int c = eob - 1; 769 { 770 const int pos = scan[c]; 771 const tran_low_t v = qcoeff[pos]; 772 const int sign = v >> 31; 773 const int level = (v ^ sign) - sign; 774 const int coeff_ctx = coeff_contexts[pos]; 775 cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; 776 777 if (v) { 778 // sign bit cost 779 if (level > NUM_BASE_LEVELS) { 780 const int ctx = get_br_ctx_eob(pos, bwl, tx_class); 781 cost += get_br_cost(level, lps_cost[ctx]); 782 } 783 if (c) { 784 cost += av1_cost_literal(1); 785 } else { 786 const int sign01 = (sign ^ sign) - sign; 787 const int dc_sign_ctx = txb_ctx->dc_sign_ctx; 788 cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; 789 return cost; 790 } 791 } 792 } 793 const int(*base_cost)[8] = coeff_costs->base_cost; 794 for (c = eob - 2; c >= 1; --c) { 795 const int pos = scan[c]; 796 const int coeff_ctx = coeff_contexts[pos]; 797 const tran_low_t v = qcoeff[pos]; 798 const int level = abs(v); 799 const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)]; 800 if (v) { 801 // sign bit cost 802 cost += av1_cost_literal(1); 803 if (level > NUM_BASE_LEVELS) { 804 const int ctx = get_br_ctx(levels, pos, bwl, tx_class); 805 cost += get_br_cost(level, lps_cost[ctx]); 806 } 807 } 808 cost += cost0; 809 } 810 if (c == 0) { 811 const int pos = scan[c]; 812 const tran_low_t v = qcoeff[pos]; 813 const int coeff_ctx = coeff_contexts[pos]; 814 const int sign = v >> 31; 815 const int level = (v ^ sign) - sign; 816 cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; 817 818 if (v) { 819 // sign bit cost 820 const int sign01 = (sign ^ sign) - sign; 821 const int dc_sign_ctx = txb_ctx->dc_sign_ctx; 822 cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; 823 if (level > NUM_BASE_LEVELS) { 824 const int ctx = get_br_ctx(levels, pos, bwl, tx_class); 825 cost += get_br_cost(level, lps_cost[ctx]); 826 } 827 } 828 } 829 return cost; 830 } 831 832 int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x, 833 const int plane, const int block, const TX_SIZE tx_size, 834 const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) { 835 const struct macroblock_plane *p = &x->plane[plane]; 836 const int eob = p->eobs[block]; 837 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 838 const PLANE_TYPE plane_type = get_plane_type(plane); 839 const LV_MAP_COEFF_COST *const coeff_costs = 840 &x->coeff_costs[txs_ctx][plane_type]; 841 if (eob == 0) { 842 return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; 843 } 844 845 const MACROBLOCKD *const xd = &x->e_mbd; 846 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 847 848 #define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal) \ 849 case tx_class_literal: \ 850 return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p, \ 851 eob, plane_type, coeff_costs, xd, tx_type, \ 852 tx_class_literal); 853 switch (tx_class) { 854 WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D); 855 WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ); 856 WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT); 857 #undef WAREHOUSE_EFFICIENTS_TXB_CASE 858 default: assert(false); return 0; 859 } 860 } 861 862 static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, 863 const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) { 864 int update = 0; 865 if (txb_info->eob == 0) return update; 866 const int16_t *const scan = txb_info->scan_order->scan; 867 // forward optimize the nz_map` 868 const int init_eob = txb_info->eob; 869 const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type]; 870 const int eob_cost = 871 get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class); 872 873 // backward optimize the level-k map 874 int accu_rate = eob_cost; 875 int64_t accu_dist = 0; 876 int64_t prev_eob_rd_cost = INT64_MAX; 877 int64_t cur_eob_rd_cost = 0; 878 879 { 880 const int si = init_eob - 1; 881 const int coeff_idx = scan[si]; 882 LevelDownStats stats; 883 get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info, 884 tx_class); 885 if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) { 886 update = 1; 887 update_coeff(coeff_idx, stats.low_qc, txb_info); 888 accu_rate += stats.rate_low; 889 accu_dist += stats.dist_low; 890 } else { 891 accu_rate += stats.rate; 892 accu_dist += stats.dist; 893 } 894 } 895 896 int si = init_eob - 2; 897 int8_t has_nz_tail = 0; 898 // eob is not fixed 899 for (; si >= 0 && has_nz_tail < 2; --si) { 900 assert(si != init_eob - 1); 901 const int coeff_idx = scan[si]; 902 tran_low_t qc = txb_info->qcoeff[coeff_idx]; 903 904 if (qc == 0) { 905 const int coeff_ctx = 906 get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl, 907 txb_info->tx_size, tx_class); 908 accu_rate += txb_costs->base_cost[coeff_ctx][0]; 909 } else { 910 LevelDownStats stats; 911 get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class); 912 // check if it is better to make this the last significant coefficient 913 int cur_eob_rate = 914 get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class); 915 cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0); 916 prev_eob_rd_cost = 917 RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd; 918 if (cur_eob_rd_cost <= prev_eob_rd_cost) { 919 update = 1; 920 for (int j = si + 1; j < txb_info->eob; j++) { 921 const int coeff_pos_j = scan[j]; 922 update_coeff(coeff_pos_j, 0, txb_info); 923 } 924 txb_info->eob = si + 1; 925 926 // rerun cost calculation due to change of eob 927 accu_rate = cur_eob_rate; 928 accu_dist = 0; 929 get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class); 930 if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) { 931 update = 1; 932 update_coeff(coeff_idx, stats.low_qc, txb_info); 933 accu_rate += stats.rate_low; 934 accu_dist += stats.dist_low; 935 } else { 936 accu_rate += stats.rate; 937 accu_dist += stats.dist; 938 } 939 940 // reset non zero tail when new eob is found 941 has_nz_tail = 0; 942 } else { 943 int bUpdCoeff = 0; 944 if (stats.rd_low < stats.rd) { 945 if ((si < txb_info->eob - 1)) { 946 bUpdCoeff = 1; 947 update = 1; 948 } 949 } else { 950 ++has_nz_tail; 951 } 952 953 if (bUpdCoeff) { 954 update_coeff(coeff_idx, stats.low_qc, txb_info); 955 accu_rate += stats.rate_low; 956 accu_dist += stats.dist_low; 957 } else { 958 accu_rate += stats.rate; 959 accu_dist += stats.dist; 960 } 961 } 962 } 963 } // for (si) 964 965 // eob is fixed 966 for (; si >= 0; --si) { 967 assert(si != init_eob - 1); 968 const int coeff_idx = scan[si]; 969 tran_low_t qc = txb_info->qcoeff[coeff_idx]; 970 971 if (qc == 0) { 972 const int coeff_ctx = 973 get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl, 974 txb_info->tx_size, tx_class); 975 accu_rate += txb_costs->base_cost[coeff_ctx][0]; 976 } else { 977 LevelDownStats stats; 978 get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class); 979 980 int bUpdCoeff = 0; 981 if (stats.rd_low < stats.rd) { 982 if ((si < txb_info->eob - 1)) { 983 bUpdCoeff = 1; 984 update = 1; 985 } 986 } 987 if (bUpdCoeff) { 988 update_coeff(coeff_idx, stats.low_qc, txb_info); 989 accu_rate += stats.rate_low; 990 accu_dist += stats.dist_low; 991 } else { 992 accu_rate += stats.rate; 993 accu_dist += stats.dist; 994 } 995 } 996 } // for (si) 997 998 int non_zero_blk_rate = 999 txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0]; 1000 prev_eob_rd_cost = 1001 RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist); 1002 1003 int zero_blk_rate = 1004 txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1]; 1005 int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0); 1006 if (zero_blk_rd_cost <= prev_eob_rd_cost) { 1007 update = 1; 1008 for (int j = 0; j < txb_info->eob; j++) { 1009 const int coeff_pos_j = scan[j]; 1010 update_coeff(coeff_pos_j, 0, txb_info); 1011 } 1012 txb_info->eob = 0; 1013 } 1014 1015 // record total rate cost 1016 *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost 1017 ? zero_blk_rate 1018 : accu_rate + non_zero_blk_rate; 1019 1020 if (txb_info->eob > 0) { 1021 *rate_cost += txb_info->tx_type_cost; 1022 } 1023 1024 return update; 1025 } 1026 1027 static void hbt_init() { 1028 hbt_hash_table = 1029 aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH); 1030 memset(hbt_hash_table, 0, 1031 sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH); 1032 av1_crc32c_calculator_init(&crc_calculator); // 31 bit: qc & ctx 1033 1034 hbt_needs_init = 0; 1035 } 1036 1037 void hbt_destroy() { aom_free(hbt_hash_table); } 1038 1039 static int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, 1040 TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, 1041 const LV_MAP_EOB_COST *txb_eob_costs, 1042 const struct macroblock_plane *p, int block, 1043 int fast_mode, int *rate_cost) { 1044 (void)fast_mode; 1045 const int16_t *scan = txb_info->scan_order->scan; 1046 int prev_eob = txb_info->eob; 1047 assert(HBT_EOB <= 16); // Lengthen array if allowing longer eob. 1048 int32_t prev_coeff[16]; 1049 for (int i = 0; i < prev_eob; i++) { 1050 prev_coeff[i] = txb_info->qcoeff[scan[i]]; 1051 } 1052 for (int i = prev_eob; i < HBT_EOB; i++) { 1053 prev_coeff[i] = 0; // For compiler piece of mind. 1054 } 1055 1056 av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height, 1057 txb_info->levels); 1058 1059 const int update = 1060 optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost); 1061 1062 // Overwrite old entry 1063 uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE; 1064 uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH; 1065 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1066 .rate_cost = *rate_cost; 1067 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1; 1068 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1069 .hbt_qc_hash = hbt_qc_hash; 1070 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1071 .hbt_ctx_hash = hbt_ctx_hash; 1072 assert(prev_eob >= txb_info->eob); // eob can't get longer 1073 for (int i = 0; i < txb_info->eob; i++) { 1074 // Record how coeff changed. Convention: towards zero is negative. 1075 if (txb_info->qcoeff[scan[i]] > 0) 1076 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1077 .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i]; 1078 else 1079 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1080 .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]]; 1081 } 1082 for (int i = txb_info->eob; i < prev_eob; i++) { 1083 // If eob got shorter, record that all after it changed to zero. 1084 if (prev_coeff[i] > 0) 1085 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1086 .deltas[i] = -prev_coeff[i]; 1087 else 1088 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1089 .deltas[i] = prev_coeff[i]; 1090 } 1091 for (int i = prev_eob; i < HBT_EOB; i++) { 1092 // Record 'no change' after optimized coefficients run out. 1093 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1094 .deltas[i] = 0; 1095 } 1096 1097 if (update) { 1098 p->eobs[block] = txb_info->eob; 1099 p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( 1100 txb_info->qcoeff, txb_info->scan_order, txb_info->eob); 1101 } 1102 return txb_info->eob; 1103 } 1104 1105 static int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index, 1106 TxbInfo *txb_info, const struct macroblock_plane *p, 1107 int block, int *rate_cost) { 1108 const int16_t *scan = txb_info->scan_order->scan; 1109 int new_eob = 0; 1110 int update = 0; 1111 1112 for (int i = 0; i < txb_info->eob; i++) { 1113 // Delta convention is negatives go towards zero, so only apply those ones. 1114 if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1115 .deltas[i] < 0) { 1116 if (txb_info->qcoeff[scan[i]] > 0) 1117 txb_info->qcoeff[scan[i]] += 1118 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1119 .deltas[i]; 1120 else 1121 txb_info->qcoeff[scan[i]] -= 1122 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1123 .deltas[i]; 1124 1125 update = 1; 1126 update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info); 1127 } 1128 if (txb_info->qcoeff[scan[i]]) new_eob = i + 1; 1129 } 1130 1131 // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but 1132 // it is expensive and gives little benefit as long as qc_hash is high bit 1133 *rate_cost = 1134 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1135 .rate_cost; 1136 1137 if (update) { 1138 txb_info->eob = new_eob; 1139 p->eobs[block] = txb_info->eob; 1140 p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( 1141 txb_info->qcoeff, txb_info->scan_order, txb_info->eob); 1142 } 1143 1144 return txb_info->eob; 1145 } 1146 1147 static int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, 1148 TxbInfo *txb_info, 1149 const LV_MAP_COEFF_COST *txb_costs, 1150 const LV_MAP_EOB_COST *txb_eob_costs, 1151 const struct macroblock_plane *p, int block, 1152 int fast_mode, int *rate_cost) { 1153 // Check for qcoeff match 1154 int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH; 1155 int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE; 1156 1157 if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1158 .hbt_qc_hash == hbt_qc_hash && 1159 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1160 .hbt_ctx_hash == hbt_ctx_hash && 1161 hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] 1162 .init) { 1163 return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block, 1164 rate_cost); 1165 } else { 1166 return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs, 1167 txb_eob_costs, p, block, fast_mode, rate_cost); 1168 } 1169 } 1170 1171 static int hbt_create_hashes(TxbInfo *txb_info, 1172 const LV_MAP_COEFF_COST *txb_costs, 1173 const LV_MAP_EOB_COST *txb_eob_costs, 1174 const struct macroblock_plane *p, int block, 1175 int fast_mode, int *rate_cost) { 1176 // Initialize hash table if needed. 1177 if (hbt_needs_init) { 1178 hbt_init(); 1179 } 1180 1181 //// Hash creation 1182 uint8_t txb_hash_data[256]; // Asserts below to ensure enough space. 1183 const int16_t *scan = txb_info->scan_order->scan; 1184 uint8_t chunk = 0; 1185 int hash_data_index = 0; 1186 1187 // Make qc_hash. 1188 int packing_index = 0; // needed for packing. 1189 for (int i = 0; i < txb_info->eob; i++) { 1190 tran_low_t prechunk = txb_info->qcoeff[scan[i]]; 1191 1192 // Softening: Improves speed. Aligns with signed deltas. 1193 if (prechunk < 0) prechunk *= -1; 1194 1195 // Early kick out: Don't apply feature if there are large coeffs: 1196 // If this kickout value is removed or raised beyond int8_t, 1197 // widen deltas type in OptTxbQcoeff struct. 1198 assert((int8_t)HBT_KICKOUT == HBT_KICKOUT); // If not, widen types. 1199 if (prechunk > HBT_KICKOUT) { 1200 av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height, 1201 txb_info->levels); 1202 1203 const int update = 1204 optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost); 1205 1206 if (update) { 1207 p->eobs[block] = txb_info->eob; 1208 p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( 1209 txb_info->qcoeff, txb_info->scan_order, txb_info->eob); 1210 } 1211 return txb_info->eob; 1212 } 1213 1214 // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes 1215 if (packing_index == 0) txb_hash_data[hash_data_index] = 0; 1216 chunk = prechunk << packing_index; 1217 packing_index += 2; 1218 txb_hash_data[hash_data_index] |= chunk; 1219 1220 // Full byte: 1221 if (packing_index == 8) { 1222 packing_index = 0; 1223 hash_data_index++; 1224 } 1225 } 1226 // Needed when packing_index != 0, to include final byte. 1227 hash_data_index++; 1228 assert(hash_data_index <= 64); 1229 // 31 bit qc_hash: index to array 1230 uint32_t hbt_qc_hash = 1231 av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index); 1232 1233 // Make ctx_hash. 1234 hash_data_index = 0; 1235 tran_low_t prechunk; 1236 1237 for (int i = 0; i < txb_info->eob; i++) { 1238 // Save as magnitudes towards or away from zero. 1239 if (txb_info->tcoeff[scan[i]] >= 0) 1240 prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]]; 1241 else 1242 prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]]; 1243 1244 chunk = prechunk & 0xff; 1245 txb_hash_data[hash_data_index++] = chunk; 1246 } 1247 1248 // Extra ctx data: 1249 // Include dequants. 1250 txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff; 1251 txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff; 1252 chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff; 1253 txb_hash_data[hash_data_index++] = chunk; 1254 chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff; 1255 txb_hash_data[hash_data_index++] = chunk; 1256 // eob 1257 chunk = txb_info->eob & 0xff; 1258 txb_hash_data[hash_data_index++] = chunk; 1259 // rdmult (int64) 1260 chunk = txb_info->rdmult & 0xff; 1261 txb_hash_data[hash_data_index++] = chunk; 1262 // tx_type 1263 chunk = txb_info->tx_type & 0xff; 1264 txb_hash_data[hash_data_index++] = chunk; 1265 // base_eob_cost 1266 for (int i = 1; i < 3; i++) { // i = 0 are softened away 1267 for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) { 1268 chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8; 1269 txb_hash_data[hash_data_index++] = chunk; 1270 } 1271 } 1272 // eob_cost 1273 for (int i = 0; i < 11; i++) { 1274 for (int j = 0; j < 2; j++) { 1275 chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8; 1276 txb_hash_data[hash_data_index++] = chunk; 1277 } 1278 } 1279 // dc_sign_cost 1280 for (int i = 0; i < 2; i++) { 1281 for (int j = 0; j < DC_SIGN_CONTEXTS; j++) { 1282 chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8; 1283 txb_hash_data[hash_data_index++] = chunk; 1284 } 1285 } 1286 1287 assert(hash_data_index <= 256); 1288 // 31 bit ctx_hash: used to index table 1289 uint32_t hbt_ctx_hash = 1290 av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index); 1291 //// End hash creation 1292 1293 return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs, 1294 txb_eob_costs, p, block, fast_mode, rate_cost); 1295 } 1296 1297 static AOM_FORCE_INLINE int get_two_coeff_cost_simple( 1298 int ci, tran_low_t abs_qc, int coeff_ctx, 1299 const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class, 1300 const uint8_t *levels, int *cost_low) { 1301 // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) 1302 // and not the last (scan_idx != eob - 1) 1303 assert(ci > 0); 1304 int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; 1305 int diff = 0; 1306 if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4]; 1307 if (abs_qc) { 1308 cost += av1_cost_literal(1); 1309 if (abs_qc > NUM_BASE_LEVELS) { 1310 const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); 1311 int brcost_diff = 0; 1312 cost += get_br_cost_with_diff(abs_qc, txb_costs->lps_cost[br_ctx], 1313 &brcost_diff); 1314 diff += brcost_diff; 1315 } 1316 } 1317 *cost_low = cost - diff; 1318 1319 return cost; 1320 } 1321 1322 static INLINE int get_coeff_cost_eob(int ci, tran_low_t abs_qc, int sign, 1323 int coeff_ctx, int dc_sign_ctx, 1324 const LV_MAP_COEFF_COST *txb_costs, 1325 int bwl, TX_CLASS tx_class) { 1326 int cost = 0; 1327 cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; 1328 if (abs_qc != 0) { 1329 if (ci == 0) { 1330 cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; 1331 } else { 1332 cost += av1_cost_literal(1); 1333 } 1334 if (abs_qc > NUM_BASE_LEVELS) { 1335 int br_ctx; 1336 br_ctx = get_br_ctx_eob(ci, bwl, tx_class); 1337 cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); 1338 } 1339 } 1340 return cost; 1341 } 1342 1343 static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc, 1344 int sign, int coeff_ctx, 1345 int dc_sign_ctx, 1346 const LV_MAP_COEFF_COST *txb_costs, 1347 int bwl, TX_CLASS tx_class, 1348 const uint8_t *levels) { 1349 int cost = 0; 1350 if (is_last) { 1351 cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; 1352 } else { 1353 cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; 1354 } 1355 if (abs_qc != 0) { 1356 if (ci == 0) { 1357 cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; 1358 } else { 1359 cost += av1_cost_literal(1); 1360 } 1361 if (abs_qc > NUM_BASE_LEVELS) { 1362 int br_ctx; 1363 if (is_last) 1364 br_ctx = get_br_ctx_eob(ci, bwl, tx_class); 1365 else 1366 br_ctx = get_br_ctx(levels, ci, bwl, tx_class); 1367 cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); 1368 } 1369 } 1370 return cost; 1371 } 1372 1373 static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv, 1374 int shift, tran_low_t *qc_low, 1375 tran_low_t *dqc_low) { 1376 tran_low_t abs_qc_low = abs_qc - 1; 1377 *qc_low = (-sign ^ abs_qc_low) + sign; 1378 assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low); 1379 tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift; 1380 *dqc_low = (-sign ^ abs_dqc_low) + sign; 1381 assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low); 1382 } 1383 1384 static INLINE void update_coeff_general( 1385 int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size, 1386 TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift, 1387 int dc_sign_ctx, const int16_t *dequant, const int16_t *scan, 1388 const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, 1389 tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) { 1390 const int dqv = dequant[si != 0]; 1391 const int ci = scan[si]; 1392 const tran_low_t qc = qcoeff[ci]; 1393 const int is_last = si == (eob - 1); 1394 const int coeff_ctx = get_lower_levels_ctx_general( 1395 is_last, si, bwl, height, levels, ci, tx_size, tx_class); 1396 if (qc == 0) { 1397 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 1398 } else { 1399 const int sign = (qc < 0) ? 1 : 0; 1400 const tran_low_t abs_qc = abs(qc); 1401 const tran_low_t tqc = tcoeff[ci]; 1402 const tran_low_t dqc = dqcoeff[ci]; 1403 const int64_t dist = get_coeff_dist(tqc, dqc, shift); 1404 const int64_t dist0 = get_coeff_dist(tqc, 0, shift); 1405 const int rate = 1406 get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx, 1407 dc_sign_ctx, txb_costs, bwl, tx_class, levels); 1408 const int64_t rd = RDCOST(rdmult, rate, dist); 1409 1410 tran_low_t qc_low, dqc_low; 1411 tran_low_t abs_qc_low; 1412 int64_t dist_low, rd_low; 1413 int rate_low; 1414 if (abs_qc == 1) { 1415 abs_qc_low = qc_low = dqc_low = 0; 1416 dist_low = dist0; 1417 rate_low = txb_costs->base_cost[coeff_ctx][0]; 1418 } else { 1419 get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); 1420 abs_qc_low = abs_qc - 1; 1421 dist_low = get_coeff_dist(tqc, dqc_low, shift); 1422 rate_low = 1423 get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx, 1424 dc_sign_ctx, txb_costs, bwl, tx_class, levels); 1425 } 1426 1427 rd_low = RDCOST(rdmult, rate_low, dist_low); 1428 if (rd_low < rd) { 1429 qcoeff[ci] = qc_low; 1430 dqcoeff[ci] = dqc_low; 1431 levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); 1432 *accu_rate += rate_low; 1433 *accu_dist += dist_low - dist0; 1434 } else { 1435 *accu_rate += rate; 1436 *accu_dist += dist - dist0; 1437 } 1438 } 1439 } 1440 1441 static AOM_FORCE_INLINE void update_coeff_simple( 1442 int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class, 1443 int bwl, int64_t rdmult, int shift, const int16_t *dequant, 1444 const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, 1445 const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, 1446 uint8_t *levels) { 1447 const int dqv = dequant[1]; 1448 (void)eob; 1449 // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) 1450 // and not the last (scan_idx != eob - 1) 1451 assert(si != eob - 1); 1452 assert(si > 0); 1453 const int ci = scan[si]; 1454 const tran_low_t qc = qcoeff[ci]; 1455 const int coeff_ctx = 1456 get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); 1457 if (qc == 0) { 1458 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 1459 } else { 1460 const tran_low_t abs_qc = abs(qc); 1461 const tran_low_t abs_tqc = abs(tcoeff[ci]); 1462 const tran_low_t abs_dqc = abs(dqcoeff[ci]); 1463 int rate_low = 0; 1464 const int rate = get_two_coeff_cost_simple( 1465 ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low); 1466 if (abs_dqc < abs_tqc) { 1467 *accu_rate += rate; 1468 return; 1469 } 1470 1471 const int64_t dist = get_coeff_dist(abs_tqc, abs_dqc, shift); 1472 const int64_t rd = RDCOST(rdmult, rate, dist); 1473 1474 const tran_low_t abs_qc_low = abs_qc - 1; 1475 const tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift; 1476 const int64_t dist_low = get_coeff_dist(abs_tqc, abs_dqc_low, shift); 1477 const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); 1478 1479 if (rd_low < rd) { 1480 const int sign = (qc < 0) ? 1 : 0; 1481 qcoeff[ci] = (-sign ^ abs_qc_low) + sign; 1482 dqcoeff[ci] = (-sign ^ abs_dqc_low) + sign; 1483 levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); 1484 *accu_rate += rate_low; 1485 } else { 1486 *accu_rate += rate; 1487 } 1488 } 1489 } 1490 1491 static INLINE void update_coeff_eob_fast(int *eob, int shift, 1492 const int16_t *dequant_ptr, 1493 const int16_t *scan, 1494 const tran_low_t *coeff_ptr, 1495 tran_low_t *qcoeff_ptr, 1496 tran_low_t *dqcoeff_ptr) { 1497 // TODO(sarahparker) make this work for aomqm 1498 int eob_out = *eob; 1499 int zbin[2] = { dequant_ptr[0] + ROUND_POWER_OF_TWO(dequant_ptr[0] * 70, 7), 1500 dequant_ptr[1] + ROUND_POWER_OF_TWO(dequant_ptr[1] * 70, 7) }; 1501 1502 for (int i = *eob - 1; i >= 0; i--) { 1503 const int rc = scan[i]; 1504 const int qcoeff = qcoeff_ptr[rc]; 1505 const int coeff = coeff_ptr[rc]; 1506 const int coeff_sign = (coeff >> 31); 1507 int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; 1508 1509 if (((abs_coeff << (1 + shift)) < zbin[rc != 0]) || (qcoeff == 0)) { 1510 eob_out--; 1511 qcoeff_ptr[rc] = 0; 1512 dqcoeff_ptr[rc] = 0; 1513 } else { 1514 break; 1515 } 1516 } 1517 1518 *eob = eob_out; 1519 } 1520 1521 static AOM_FORCE_INLINE void update_coeff_eob( 1522 int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci, 1523 int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height, 1524 int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant, 1525 const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs, 1526 const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, 1527 tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) { 1528 const int dqv = dequant[si != 0]; 1529 assert(si != *eob - 1); 1530 const int ci = scan[si]; 1531 const tran_low_t qc = qcoeff[ci]; 1532 const int coeff_ctx = 1533 get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); 1534 if (qc == 0) { 1535 *accu_rate += txb_costs->base_cost[coeff_ctx][0]; 1536 } else { 1537 int lower_level = 0; 1538 const tran_low_t abs_qc = abs(qc); 1539 const tran_low_t tqc = tcoeff[ci]; 1540 const tran_low_t dqc = dqcoeff[ci]; 1541 const int sign = (qc < 0) ? 1 : 0; 1542 const int64_t dist0 = get_coeff_dist(tqc, 0, shift); 1543 int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0; 1544 int rate = 1545 get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, 1546 txb_costs, bwl, tx_class, levels); 1547 int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); 1548 1549 tran_low_t qc_low, dqc_low; 1550 tran_low_t abs_qc_low; 1551 int64_t dist_low, rd_low; 1552 int rate_low; 1553 if (abs_qc == 1) { 1554 abs_qc_low = 0; 1555 dqc_low = qc_low = 0; 1556 dist_low = 0; 1557 rate_low = txb_costs->base_cost[coeff_ctx][0]; 1558 rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist); 1559 } else { 1560 get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); 1561 abs_qc_low = abs_qc - 1; 1562 dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0; 1563 rate_low = 1564 get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, 1565 dc_sign_ctx, txb_costs, bwl, tx_class, levels); 1566 rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); 1567 } 1568 1569 int lower_level_new_eob = 0; 1570 const int new_eob = si + 1; 1571 const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si); 1572 const int new_eob_cost = 1573 get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); 1574 int rate_coeff_eob = 1575 new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, 1576 dc_sign_ctx, txb_costs, bwl, 1577 tx_class); 1578 int64_t dist_new_eob = dist; 1579 int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); 1580 1581 if (abs_qc_low > 0) { 1582 const int rate_coeff_eob_low = 1583 new_eob_cost + get_coeff_cost_eob(ci, abs_qc_low, sign, 1584 coeff_ctx_new_eob, dc_sign_ctx, 1585 txb_costs, bwl, tx_class); 1586 const int64_t dist_new_eob_low = dist_low; 1587 const int64_t rd_new_eob_low = 1588 RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low); 1589 if (rd_new_eob_low < rd_new_eob) { 1590 lower_level_new_eob = 1; 1591 rd_new_eob = rd_new_eob_low; 1592 rate_coeff_eob = rate_coeff_eob_low; 1593 dist_new_eob = dist_new_eob_low; 1594 } 1595 } 1596 1597 if (rd_low < rd) { 1598 lower_level = 1; 1599 rd = rd_low; 1600 rate = rate_low; 1601 dist = dist_low; 1602 } 1603 1604 if (sharpness == 0 && rd_new_eob < rd) { 1605 for (int ni = 0; ni < *nz_num; ++ni) { 1606 int last_ci = nz_ci[ni]; 1607 levels[get_padded_idx(last_ci, bwl)] = 0; 1608 qcoeff[last_ci] = 0; 1609 dqcoeff[last_ci] = 0; 1610 } 1611 *eob = new_eob; 1612 *nz_num = 0; 1613 *accu_rate = rate_coeff_eob; 1614 *accu_dist = dist_new_eob; 1615 lower_level = lower_level_new_eob; 1616 } else { 1617 *accu_rate += rate; 1618 *accu_dist += dist; 1619 } 1620 1621 if (lower_level) { 1622 qcoeff[ci] = qc_low; 1623 dqcoeff[ci] = dqc_low; 1624 levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); 1625 } 1626 if (qcoeff[ci]) { 1627 nz_ci[*nz_num] = ci; 1628 ++*nz_num; 1629 } 1630 } 1631 } 1632 1633 static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob, 1634 int nz_num, int *nz_ci, int64_t rdmult, 1635 int skip_cost, int non_skip_cost, 1636 tran_low_t *qcoeff, tran_low_t *dqcoeff, 1637 int sharpness) { 1638 const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist); 1639 const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0); 1640 if (sharpness == 0 && rd_new_eob < rd) { 1641 for (int i = 0; i < nz_num; ++i) { 1642 const int ci = nz_ci[i]; 1643 qcoeff[ci] = 0; 1644 dqcoeff[ci] = 0; 1645 // no need to set up levels because this is the last step 1646 // levels[get_padded_idx(ci, bwl)] = 0; 1647 } 1648 *accu_rate = 0; 1649 *eob = 0; 1650 } 1651 } 1652 1653 int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, 1654 int block, TX_SIZE tx_size, TX_TYPE tx_type, 1655 const TXB_CTX *const txb_ctx, int *rate_cost, 1656 int sharpness, int fast_mode) { 1657 MACROBLOCKD *xd = &x->e_mbd; 1658 struct macroblockd_plane *pd = &xd->plane[plane]; 1659 const struct macroblock_plane *p = &x->plane[plane]; 1660 const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); 1661 const int16_t *scan = scan_order->scan; 1662 const int shift = av1_get_tx_scale(tx_size); 1663 int eob = p->eobs[block]; 1664 const int16_t *dequant = p->dequant_QTX; 1665 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 1666 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 1667 const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); 1668 1669 if (fast_mode) { 1670 update_coeff_eob_fast(&eob, shift, dequant, scan, tcoeff, qcoeff, dqcoeff); 1671 p->eobs[block] = eob; 1672 if (eob == 0) { 1673 *rate_cost = av1_cost_skip_txb(x, txb_ctx, plane, tx_size); 1674 return eob; 1675 } 1676 } 1677 1678 const AV1_COMMON *cm = &cpi->common; 1679 const PLANE_TYPE plane_type = get_plane_type(plane); 1680 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 1681 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 1682 const MB_MODE_INFO *mbmi = xd->mi[0]; 1683 const int bwl = get_txb_bwl(tx_size); 1684 const int width = get_txb_wide(tx_size); 1685 const int height = get_txb_high(tx_size); 1686 assert(width == (1 << bwl)); 1687 const int is_inter = is_inter_block(mbmi); 1688 const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type]; 1689 const int eob_multi_size = txsize_log2_minus4[tx_size]; 1690 const LV_MAP_EOB_COST *txb_eob_costs = 1691 &x->eob_costs[eob_multi_size][plane_type]; 1692 1693 const int rshift = 1694 (sharpness + 1695 (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4 1696 ? 7 - mbmi->segment_id 1697 : 2) + 1698 (cpi->oxcf.aq_mode != VARIANCE_AQ && 1699 cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0 1700 ? (3 - x->sb_energy_level) 1701 : 0)); 1702 const int64_t rdmult = 1703 (((int64_t)x->rdmult * 1704 (plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) + 1705 2) >> 1706 rshift; 1707 1708 uint8_t levels_buf[TX_PAD_2D]; 1709 uint8_t *const levels = set_levels(levels_buf, width); 1710 1711 if (eob > 1) av1_txb_init_levels(qcoeff, width, height, levels); 1712 1713 // TODO(angirbird): check iqmatrix 1714 1715 const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0]; 1716 const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; 1717 const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class); 1718 int accu_rate = eob_cost; 1719 int64_t accu_dist = 0; 1720 int si = eob - 1; 1721 const int ci = scan[si]; 1722 const tran_low_t qc = qcoeff[ci]; 1723 const tran_low_t abs_qc = abs(qc); 1724 const int sign = qc < 0; 1725 const int max_nz_num = 2; 1726 int nz_num = 1; 1727 int nz_ci[3] = { ci, 0, 0 }; 1728 if (abs_qc >= 2) { 1729 update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class, 1730 bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, 1731 dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, 1732 levels); 1733 --si; 1734 } else { 1735 assert(abs_qc == 1); 1736 const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, si); 1737 accu_rate += 1738 get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx, txb_ctx->dc_sign_ctx, 1739 txb_costs, bwl, tx_class); 1740 const tran_low_t tqc = tcoeff[ci]; 1741 const tran_low_t dqc = dqcoeff[ci]; 1742 const int64_t dist = get_coeff_dist(tqc, dqc, shift); 1743 const int64_t dist0 = get_coeff_dist(tqc, 0, shift); 1744 accu_dist += dist - dist0; 1745 --si; 1746 } 1747 1748 #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ 1749 case tx_class_literal: \ 1750 for (; si >= 0 && nz_num <= max_nz_num && !fast_mode; --si) { \ 1751 update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, \ 1752 tx_size, tx_class_literal, bwl, height, \ 1753 txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \ 1754 txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, \ 1755 levels, sharpness); \ 1756 } \ 1757 break; 1758 switch (tx_class) { 1759 UPDATE_COEFF_EOB_CASE(TX_CLASS_2D); 1760 UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ); 1761 UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT); 1762 #undef UPDATE_COEFF_EOB_CASE 1763 default: assert(false); 1764 } 1765 1766 if (si == -1 && nz_num <= max_nz_num) { 1767 update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost, 1768 non_skip_cost, qcoeff, dqcoeff, sharpness); 1769 } 1770 1771 #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \ 1772 case tx_class_literal: \ 1773 for (; si >= 1; --si) { \ 1774 update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \ 1775 rdmult, shift, dequant, scan, txb_costs, tcoeff, \ 1776 qcoeff, dqcoeff, levels); \ 1777 } \ 1778 break; 1779 switch (tx_class) { 1780 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D); 1781 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ); 1782 UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT); 1783 #undef UPDATE_COEFF_SIMPLE_CASE 1784 default: assert(false); 1785 } 1786 1787 // DC position 1788 if (si == 0) { 1789 // no need to update accu_dist because it's not used after this point 1790 int64_t dummy_dist = 0; 1791 update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class, 1792 bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, 1793 dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, 1794 levels); 1795 } 1796 1797 const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); 1798 if (eob == 0) 1799 accu_rate += skip_cost; 1800 else 1801 accu_rate += non_skip_cost + tx_type_cost; 1802 1803 p->eobs[block] = eob; 1804 p->txb_entropy_ctx[block] = 1805 av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]); 1806 1807 *rate_cost = accu_rate; 1808 return eob; 1809 } 1810 1811 // This function is deprecated, but we keep it here because hash trellis 1812 // is not integrated with av1_optimize_txb_new yet 1813 int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, 1814 int blk_row, int blk_col, int block, TX_SIZE tx_size, 1815 TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) { 1816 const AV1_COMMON *cm = &cpi->common; 1817 MACROBLOCKD *const xd = &x->e_mbd; 1818 const PLANE_TYPE plane_type = get_plane_type(plane); 1819 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); 1820 const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, 1821 tx_size, cm->reduced_tx_set_used); 1822 const MB_MODE_INFO *mbmi = xd->mi[0]; 1823 const struct macroblock_plane *p = &x->plane[plane]; 1824 struct macroblockd_plane *pd = &xd->plane[plane]; 1825 const int eob = p->eobs[block]; 1826 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 1827 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); 1828 const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); 1829 const int16_t *dequant = p->dequant_QTX; 1830 const int seg_eob = av1_get_max_eob(tx_size); 1831 const int bwl = get_txb_bwl(tx_size); 1832 const int width = get_txb_wide(tx_size); 1833 const int height = get_txb_high(tx_size); 1834 const int is_inter = is_inter_block(mbmi); 1835 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 1836 const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type]; 1837 const int eob_multi_size = txsize_log2_minus4[tx_size]; 1838 const LV_MAP_EOB_COST txb_eob_costs = 1839 x->eob_costs[eob_multi_size][plane_type]; 1840 1841 const int shift = av1_get_tx_scale(tx_size); 1842 const int64_t rdmult = 1843 (((int64_t)x->rdmult * plane_rd_mult[is_inter][plane_type] 1844 << (2 * (xd->bd - 8))) + 1845 2) >> 1846 2; 1847 uint8_t levels_buf[TX_PAD_2D]; 1848 uint8_t *const levels = set_levels(levels_buf, width); 1849 const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size); 1850 const qm_val_t *iqmatrix = 1851 IS_2D_TRANSFORM(tx_type) 1852 ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size] 1853 : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size]; 1854 assert(width == (1 << bwl)); 1855 const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); 1856 TxbInfo txb_info = { 1857 qcoeff, levels, dqcoeff, tcoeff, dequant, shift, tx_size, 1858 txs_ctx, tx_type, bwl, width, height, eob, seg_eob, 1859 scan_order, txb_ctx, rdmult, iqmatrix, tx_type_cost, 1860 }; 1861 1862 // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls 1863 // by storing the coefficient deltas in a hash table. 1864 // Currently disabled in speedfeatures.c 1865 if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) { 1866 return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block, 1867 fast_mode, rate_cost); 1868 } 1869 1870 av1_txb_init_levels(qcoeff, width, height, levels); 1871 1872 const int update = 1873 optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost); 1874 1875 if (update) { 1876 p->eobs[block] = txb_info.eob; 1877 p->txb_entropy_ctx[block] = 1878 av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob); 1879 } 1880 return txb_info.eob; 1881 } 1882 1883 int av1_get_txb_entropy_context(const tran_low_t *qcoeff, 1884 const SCAN_ORDER *scan_order, int eob) { 1885 const int16_t *const scan = scan_order->scan; 1886 int cul_level = 0; 1887 int c; 1888 1889 if (eob == 0) return 0; 1890 for (c = 0; c < eob; ++c) { 1891 cul_level += abs(qcoeff[scan[c]]); 1892 if (cul_level > COEFF_CONTEXT_MASK) break; 1893 } 1894 1895 cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level); 1896 set_dc_sign(&cul_level, qcoeff[0]); 1897 1898 return cul_level; 1899 } 1900 1901 void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, 1902 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 1903 void *arg) { 1904 struct tokenize_b_args *const args = arg; 1905 const AV1_COMP *cpi = args->cpi; 1906 const AV1_COMMON *cm = &cpi->common; 1907 ThreadData *const td = args->td; 1908 MACROBLOCK *const x = &td->mb; 1909 MACROBLOCKD *const xd = &x->e_mbd; 1910 struct macroblock_plane *p = &x->plane[plane]; 1911 struct macroblockd_plane *pd = &xd->plane[plane]; 1912 const uint16_t eob = p->eobs[block]; 1913 const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 1914 const PLANE_TYPE plane_type = pd->plane_type; 1915 const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, 1916 tx_size, cm->reduced_tx_set_used); 1917 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 1918 const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob); 1919 av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, 1920 blk_row); 1921 } 1922 1923 static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, 1924 int blk_row, int blk_col, int plane, 1925 TX_SIZE tx_size, FRAME_COUNTS *counts, 1926 uint8_t allow_update_cdf) { 1927 MB_MODE_INFO *mbmi = xd->mi[0]; 1928 int is_inter = is_inter_block(mbmi); 1929 FRAME_CONTEXT *fc = xd->tile_ctx; 1930 #if !CONFIG_ENTROPY_STATS 1931 (void)counts; 1932 #endif // !CONFIG_ENTROPY_STATS 1933 1934 // Only y plane's tx_type is updated 1935 if (plane > 0) return; 1936 TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size, 1937 cm->reduced_tx_set_used); 1938 if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && 1939 cm->base_qindex > 0 && !mbmi->skip && 1940 !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 1941 const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); 1942 if (eset > 0) { 1943 const TxSetType tx_set_type = 1944 av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); 1945 if (is_inter) { 1946 if (allow_update_cdf) { 1947 update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]], 1948 av1_ext_tx_ind[tx_set_type][tx_type], 1949 av1_num_ext_tx_set[tx_set_type]); 1950 } 1951 #if CONFIG_ENTROPY_STATS 1952 ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]] 1953 [av1_ext_tx_ind[tx_set_type][tx_type]]; 1954 #endif // CONFIG_ENTROPY_STATS 1955 } else { 1956 PREDICTION_MODE intra_dir; 1957 if (mbmi->filter_intra_mode_info.use_filter_intra) 1958 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info 1959 .filter_intra_mode]; 1960 else 1961 intra_dir = mbmi->mode; 1962 #if CONFIG_ENTROPY_STATS 1963 ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir] 1964 [av1_ext_tx_ind[tx_set_type][tx_type]]; 1965 #endif // CONFIG_ENTROPY_STATS 1966 if (allow_update_cdf) { 1967 update_cdf( 1968 fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir], 1969 av1_ext_tx_ind[tx_set_type][tx_type], 1970 av1_num_ext_tx_set[tx_set_type]); 1971 } 1972 } 1973 } 1974 } 1975 } 1976 1977 void av1_update_and_record_txb_context(int plane, int block, int blk_row, 1978 int blk_col, BLOCK_SIZE plane_bsize, 1979 TX_SIZE tx_size, void *arg) { 1980 struct tokenize_b_args *const args = arg; 1981 const AV1_COMP *cpi = args->cpi; 1982 const AV1_COMMON *cm = &cpi->common; 1983 ThreadData *const td = args->td; 1984 MACROBLOCK *const x = &td->mb; 1985 MACROBLOCKD *const xd = &x->e_mbd; 1986 struct macroblock_plane *p = &x->plane[plane]; 1987 struct macroblockd_plane *pd = &xd->plane[plane]; 1988 MB_MODE_INFO *mbmi = xd->mi[0]; 1989 const int eob = p->eobs[block]; 1990 TXB_CTX txb_ctx; 1991 get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col, 1992 pd->left_context + blk_row, &txb_ctx); 1993 const int bwl = get_txb_bwl(tx_size); 1994 const int width = get_txb_wide(tx_size); 1995 const int height = get_txb_high(tx_size); 1996 const uint8_t allow_update_cdf = args->allow_update_cdf; 1997 const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size); 1998 FRAME_CONTEXT *ec_ctx = xd->tile_ctx; 1999 #if CONFIG_ENTROPY_STATS 2000 int cdf_idx = cm->coef_cdf_category; 2001 #endif // CONFIG_ENTROPY_STATS 2002 2003 #if CONFIG_ENTROPY_STATS 2004 ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0]; 2005 #endif // CONFIG_ENTROPY_STATS 2006 if (allow_update_cdf) { 2007 update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0, 2008 2); 2009 } 2010 2011 const int txb_offset = 2012 x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); 2013 uint16_t *eob_txb = x->mbmi_ext->cb_coef_buff->eobs[plane] + txb_offset; 2014 uint8_t *txb_skip_ctx_txb = 2015 x->mbmi_ext->cb_coef_buff->txb_skip_ctx[plane] + txb_offset; 2016 txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx; 2017 eob_txb[block] = eob; 2018 2019 if (eob == 0) { 2020 av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row); 2021 return; 2022 } 2023 2024 tran_low_t *tcoeff_txb = 2025 x->mbmi_ext->cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset; 2026 tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block); 2027 const int segment_id = mbmi->segment_id; 2028 const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size); 2029 const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); 2030 memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); 2031 2032 uint8_t levels_buf[TX_PAD_2D]; 2033 uint8_t *const levels = set_levels(levels_buf, width); 2034 av1_txb_init_levels(tcoeff, width, height, levels); 2035 update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts, 2036 allow_update_cdf); 2037 2038 const PLANE_TYPE plane_type = pd->plane_type; 2039 const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, 2040 tx_size, cm->reduced_tx_set_used); 2041 const TX_CLASS tx_class = tx_type_to_class[tx_type]; 2042 const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); 2043 const int16_t *const scan = scan_order->scan; 2044 #if CONFIG_ENTROPY_STATS 2045 av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx, 2046 td->counts, allow_update_cdf); 2047 #else 2048 av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx, 2049 allow_update_cdf); 2050 #endif 2051 2052 DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); 2053 av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); 2054 2055 for (int c = eob - 1; c >= 0; --c) { 2056 const int pos = scan[c]; 2057 const int coeff_ctx = coeff_contexts[pos]; 2058 const tran_low_t v = qcoeff[pos]; 2059 const tran_low_t level = abs(v); 2060 2061 if (allow_update_cdf) { 2062 if (c == eob - 1) { 2063 assert(coeff_ctx < 4); 2064 update_cdf( 2065 ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], 2066 AOMMIN(level, 3) - 1, 3); 2067 } else { 2068 update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], 2069 AOMMIN(level, 3), 4); 2070 } 2071 } 2072 { 2073 if (c == eob - 1) { 2074 assert(coeff_ctx < 4); 2075 #if CONFIG_ENTROPY_STATS 2076 ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type] 2077 [coeff_ctx][AOMMIN(level, 3) - 1]; 2078 } else { 2079 ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type] 2080 [coeff_ctx][AOMMIN(level, 3)]; 2081 #endif 2082 } 2083 } 2084 if (level > NUM_BASE_LEVELS) { 2085 const int base_range = level - 1 - NUM_BASE_LEVELS; 2086 const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); 2087 for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { 2088 const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); 2089 if (allow_update_cdf) { 2090 update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)] 2091 [plane_type][br_ctx], 2092 k, BR_CDF_SIZE); 2093 } 2094 for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { 2095 #if CONFIG_ENTROPY_STATS 2096 ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps] 2097 [br_ctx][lps == k]; 2098 #endif // CONFIG_ENTROPY_STATS 2099 if (lps == k) break; 2100 } 2101 #if CONFIG_ENTROPY_STATS 2102 ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)] 2103 [plane_type][br_ctx][k]; 2104 #endif 2105 if (k < BR_CDF_SIZE - 1) break; 2106 } 2107 } 2108 } 2109 2110 // Update the context needed to code the DC sign (if applicable) 2111 if (tcoeff[0] != 0) { 2112 const int dc_sign = (tcoeff[0] < 0) ? 1 : 0; 2113 const int dc_sign_ctx = txb_ctx.dc_sign_ctx; 2114 #if CONFIG_ENTROPY_STATS 2115 ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign]; 2116 #endif // CONFIG_ENTROPY_STATS 2117 if (allow_update_cdf) 2118 update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2); 2119 int *dc_sign_ctx_txb = 2120 x->mbmi_ext->cb_coef_buff->dc_sign_ctx[plane] + txb_offset; 2121 dc_sign_ctx_txb[block] = dc_sign_ctx; 2122 } 2123 2124 const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob); 2125 av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, 2126 blk_row); 2127 } 2128 2129 void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, 2130 RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate, 2131 int mi_row, int mi_col, uint8_t allow_update_cdf) { 2132 const AV1_COMMON *const cm = &cpi->common; 2133 const int num_planes = av1_num_planes(cm); 2134 MACROBLOCK *const x = &td->mb; 2135 MACROBLOCKD *const xd = &x->e_mbd; 2136 MB_MODE_INFO *const mbmi = xd->mi[0]; 2137 struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf }; 2138 (void)rate; 2139 (void)mi_row; 2140 (void)mi_col; 2141 if (mbmi->skip) { 2142 av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); 2143 return; 2144 } 2145 2146 if (!dry_run) { 2147 av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, 2148 av1_update_and_record_txb_context, &arg, 2149 num_planes); 2150 } else if (dry_run == DRY_RUN_NORMAL) { 2151 av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, 2152 av1_update_txb_context_b, &arg, num_planes); 2153 } else { 2154 printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); 2155 assert(0); 2156 } 2157 } 2158