Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 #include <stdio.h>
     13 #include <limits.h>
     14 
     15 #include "vpx/vpx_encoder.h"
     16 #include "vpx_dsp/bitwriter_buffer.h"
     17 #include "vpx_dsp/vpx_dsp_common.h"
     18 #include "vpx_mem/vpx_mem.h"
     19 #include "vpx_ports/mem_ops.h"
     20 #include "vpx_ports/system_state.h"
     21 
     22 #include "vp9/common/vp9_entropy.h"
     23 #include "vp9/common/vp9_entropymode.h"
     24 #include "vp9/common/vp9_entropymv.h"
     25 #include "vp9/common/vp9_mvref_common.h"
     26 #include "vp9/common/vp9_pred_common.h"
     27 #include "vp9/common/vp9_seg_common.h"
     28 #include "vp9/common/vp9_tile_common.h"
     29 
     30 #include "vp9/encoder/vp9_cost.h"
     31 #include "vp9/encoder/vp9_bitstream.h"
     32 #include "vp9/encoder/vp9_encodemv.h"
     33 #include "vp9/encoder/vp9_mcomp.h"
     34 #include "vp9/encoder/vp9_segmentation.h"
     35 #include "vp9/encoder/vp9_subexp.h"
     36 #include "vp9/encoder/vp9_tokenize.h"
     37 
     38 static const struct vp9_token intra_mode_encodings[INTRA_MODES] = {
     39   { 0, 1 },  { 6, 3 },   { 28, 5 },  { 30, 5 }, { 58, 6 },
     40   { 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 }
     41 };
     42 static const struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
     43     { { 0, 1 }, { 2, 2 }, { 3, 2 } };
     44 static const struct vp9_token partition_encodings[PARTITION_TYPES] = {
     45   { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
     46 };
     47 static const struct vp9_token inter_mode_encodings[INTER_MODES] = {
     48   { 2, 2 }, { 6, 3 }, { 0, 1 }, { 7, 3 }
     49 };
     50 
     51 static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
     52                              const vpx_prob *probs) {
     53   vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]);
     54 }
     55 
     56 static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode,
     57                              const vpx_prob *probs) {
     58   assert(is_inter_mode(mode));
     59   vp9_write_token(w, vp9_inter_mode_tree, probs,
     60                   &inter_mode_encodings[INTER_OFFSET(mode)]);
     61 }
     62 
     63 static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, int data,
     64                                 int max) {
     65   vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
     66 }
     67 
     68 static void prob_diff_update(const vpx_tree_index *tree,
     69                              vpx_prob probs[/*n - 1*/],
     70                              const unsigned int counts[/*n - 1*/], int n,
     71                              vpx_writer *w) {
     72   int i;
     73   unsigned int branch_ct[32][2];
     74 
     75   // Assuming max number of probabilities <= 32
     76   assert(n <= 32);
     77 
     78   vp9_tree_probs_from_distribution(tree, branch_ct, counts);
     79   for (i = 0; i < n - 1; ++i)
     80     vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
     81 }
     82 
     83 static void write_selected_tx_size(const VP9_COMMON *cm,
     84                                    const MACROBLOCKD *const xd, vpx_writer *w) {
     85   TX_SIZE tx_size = xd->mi[0]->tx_size;
     86   BLOCK_SIZE bsize = xd->mi[0]->sb_type;
     87   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
     88   const vpx_prob *const tx_probs =
     89       get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
     90   vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
     91   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
     92     vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
     93     if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
     94       vpx_write(w, tx_size != TX_16X16, tx_probs[2]);
     95   }
     96 }
     97 
     98 static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
     99                       int segment_id, const MODE_INFO *mi, vpx_writer *w) {
    100   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
    101     return 1;
    102   } else {
    103     const int skip = mi->skip;
    104     vpx_write(w, skip, vp9_get_skip_prob(cm, xd));
    105     return skip;
    106   }
    107 }
    108 
    109 static void update_skip_probs(VP9_COMMON *cm, vpx_writer *w,
    110                               FRAME_COUNTS *counts) {
    111   int k;
    112 
    113   for (k = 0; k < SKIP_CONTEXTS; ++k)
    114     vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]);
    115 }
    116 
    117 static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w,
    118                                            FRAME_COUNTS *counts) {
    119   int j;
    120   for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
    121     prob_diff_update(vp9_switchable_interp_tree,
    122                      cm->fc->switchable_interp_prob[j],
    123                      counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
    124 }
    125 
    126 static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp,
    127                            const TOKENEXTRA *const stop,
    128                            vpx_bit_depth_t bit_depth) {
    129   const TOKENEXTRA *p;
    130   const vp9_extra_bit *const extra_bits =
    131 #if CONFIG_VP9_HIGHBITDEPTH
    132       (bit_depth == VPX_BITS_12)
    133           ? vp9_extra_bits_high12
    134           : (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 : vp9_extra_bits;
    135 #else
    136       vp9_extra_bits;
    137   (void)bit_depth;
    138 #endif  // CONFIG_VP9_HIGHBITDEPTH
    139 
    140   for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
    141     if (p->token == EOB_TOKEN) {
    142       vpx_write(w, 0, p->context_tree[0]);
    143       continue;
    144     }
    145     vpx_write(w, 1, p->context_tree[0]);
    146     while (p->token == ZERO_TOKEN) {
    147       vpx_write(w, 0, p->context_tree[1]);
    148       ++p;
    149       if (p == stop || p->token == EOSB_TOKEN) {
    150         *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
    151         return;
    152       }
    153     }
    154 
    155     {
    156       const int t = p->token;
    157       const vpx_prob *const context_tree = p->context_tree;
    158       assert(t != ZERO_TOKEN);
    159       assert(t != EOB_TOKEN);
    160       assert(t != EOSB_TOKEN);
    161       vpx_write(w, 1, context_tree[1]);
    162       if (t == ONE_TOKEN) {
    163         vpx_write(w, 0, context_tree[2]);
    164         vpx_write_bit(w, p->extra & 1);
    165       } else {  // t >= TWO_TOKEN && t < EOB_TOKEN
    166         const struct vp9_token *const a = &vp9_coef_encodings[t];
    167         const int v = a->value;
    168         const int n = a->len;
    169         const int e = p->extra;
    170         vpx_write(w, 1, context_tree[2]);
    171         vp9_write_tree(w, vp9_coef_con_tree,
    172                        vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
    173                        n - UNCONSTRAINED_NODES, 0);
    174         if (t >= CATEGORY1_TOKEN) {
    175           const vp9_extra_bit *const b = &extra_bits[t];
    176           const unsigned char *pb = b->prob;
    177           int v = e >> 1;
    178           int n = b->len;  // number of bits in v, assumed nonzero
    179           do {
    180             const int bb = (v >> --n) & 1;
    181             vpx_write(w, bb, *pb++);
    182           } while (n);
    183         }
    184         vpx_write_bit(w, e & 1);
    185       }
    186     }
    187   }
    188   *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN);
    189 }
    190 
    191 static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
    192                              int segment_id) {
    193   if (seg->enabled && seg->update_map)
    194     vp9_write_tree(w, vp9_segment_tree, seg->tree_probs, segment_id, 3, 0);
    195 }
    196 
    197 // This function encodes the reference frame
    198 static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
    199                              vpx_writer *w) {
    200   const MODE_INFO *const mi = xd->mi[0];
    201   const int is_compound = has_second_ref(mi);
    202   const int segment_id = mi->segment_id;
    203 
    204   // If segment level coding of this signal is disabled...
    205   // or the segment allows multiple reference frame options
    206   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
    207     assert(!is_compound);
    208     assert(mi->ref_frame[0] ==
    209            get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
    210   } else {
    211     // does the feature use compound prediction or not
    212     // (if not specified at the frame/segment level)
    213     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
    214       vpx_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd));
    215     } else {
    216       assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
    217     }
    218 
    219     if (is_compound) {
    220       vpx_write(w, mi->ref_frame[0] == GOLDEN_FRAME,
    221                 vp9_get_pred_prob_comp_ref_p(cm, xd));
    222     } else {
    223       const int bit0 = mi->ref_frame[0] != LAST_FRAME;
    224       vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd));
    225       if (bit0) {
    226         const int bit1 = mi->ref_frame[0] != GOLDEN_FRAME;
    227         vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd));
    228       }
    229     }
    230   }
    231 }
    232 
    233 static void pack_inter_mode_mvs(
    234     VP9_COMP *cpi, const MACROBLOCKD *const xd,
    235     const MB_MODE_INFO_EXT *const mbmi_ext, vpx_writer *w,
    236     unsigned int *const max_mv_magnitude,
    237     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
    238   VP9_COMMON *const cm = &cpi->common;
    239   const nmv_context *nmvc = &cm->fc->nmvc;
    240   const struct segmentation *const seg = &cm->seg;
    241   const MODE_INFO *const mi = xd->mi[0];
    242   const PREDICTION_MODE mode = mi->mode;
    243   const int segment_id = mi->segment_id;
    244   const BLOCK_SIZE bsize = mi->sb_type;
    245   const int allow_hp = cm->allow_high_precision_mv;
    246   const int is_inter = is_inter_block(mi);
    247   const int is_compound = has_second_ref(mi);
    248   int skip, ref;
    249 
    250   if (seg->update_map) {
    251     if (seg->temporal_update) {
    252       const int pred_flag = mi->seg_id_predicted;
    253       vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
    254       vpx_write(w, pred_flag, pred_prob);
    255       if (!pred_flag) write_segment_id(w, seg, segment_id);
    256     } else {
    257       write_segment_id(w, seg, segment_id);
    258     }
    259   }
    260 
    261   skip = write_skip(cm, xd, segment_id, mi, w);
    262 
    263   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
    264     vpx_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
    265 
    266   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
    267       !(is_inter && skip)) {
    268     write_selected_tx_size(cm, xd, w);
    269   }
    270 
    271   if (!is_inter) {
    272     if (bsize >= BLOCK_8X8) {
    273       write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
    274     } else {
    275       int idx, idy;
    276       const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
    277       const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
    278       for (idy = 0; idy < 2; idy += num_4x4_h) {
    279         for (idx = 0; idx < 2; idx += num_4x4_w) {
    280           const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
    281           write_intra_mode(w, b_mode, cm->fc->y_mode_prob[0]);
    282         }
    283       }
    284     }
    285     write_intra_mode(w, mi->uv_mode, cm->fc->uv_mode_prob[mode]);
    286   } else {
    287     const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
    288     const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
    289     write_ref_frames(cm, xd, w);
    290 
    291     // If segment skip is not enabled code the mode.
    292     if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
    293       if (bsize >= BLOCK_8X8) {
    294         write_inter_mode(w, mode, inter_probs);
    295       }
    296     }
    297 
    298     if (cm->interp_filter == SWITCHABLE) {
    299       const int ctx = get_pred_context_switchable_interp(xd);
    300       vp9_write_token(w, vp9_switchable_interp_tree,
    301                       cm->fc->switchable_interp_prob[ctx],
    302                       &switchable_interp_encodings[mi->interp_filter]);
    303       ++interp_filter_selected[0][mi->interp_filter];
    304     } else {
    305       assert(mi->interp_filter == cm->interp_filter);
    306     }
    307 
    308     if (bsize < BLOCK_8X8) {
    309       const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
    310       const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
    311       int idx, idy;
    312       for (idy = 0; idy < 2; idy += num_4x4_h) {
    313         for (idx = 0; idx < 2; idx += num_4x4_w) {
    314           const int j = idy * 2 + idx;
    315           const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
    316           write_inter_mode(w, b_mode, inter_probs);
    317           if (b_mode == NEWMV) {
    318             for (ref = 0; ref < 1 + is_compound; ++ref)
    319               vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
    320                             &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv,
    321                             nmvc, allow_hp, max_mv_magnitude);
    322           }
    323         }
    324       }
    325     } else {
    326       if (mode == NEWMV) {
    327         for (ref = 0; ref < 1 + is_compound; ++ref)
    328           vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv,
    329                         &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc,
    330                         allow_hp, max_mv_magnitude);
    331       }
    332     }
    333   }
    334 }
    335 
    336 static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
    337                               vpx_writer *w) {
    338   const struct segmentation *const seg = &cm->seg;
    339   const MODE_INFO *const mi = xd->mi[0];
    340   const MODE_INFO *const above_mi = xd->above_mi;
    341   const MODE_INFO *const left_mi = xd->left_mi;
    342   const BLOCK_SIZE bsize = mi->sb_type;
    343 
    344   if (seg->update_map) write_segment_id(w, seg, mi->segment_id);
    345 
    346   write_skip(cm, xd, mi->segment_id, mi, w);
    347 
    348   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
    349     write_selected_tx_size(cm, xd, w);
    350 
    351   if (bsize >= BLOCK_8X8) {
    352     write_intra_mode(w, mi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
    353   } else {
    354     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
    355     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
    356     int idx, idy;
    357 
    358     for (idy = 0; idy < 2; idy += num_4x4_h) {
    359       for (idx = 0; idx < 2; idx += num_4x4_w) {
    360         const int block = idy * 2 + idx;
    361         write_intra_mode(w, mi->bmi[block].as_mode,
    362                          get_y_mode_probs(mi, above_mi, left_mi, block));
    363       }
    364     }
    365   }
    366 
    367   write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]);
    368 }
    369 
    370 static void write_modes_b(
    371     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
    372     vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
    373     int mi_row, int mi_col, unsigned int *const max_mv_magnitude,
    374     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
    375   const VP9_COMMON *const cm = &cpi->common;
    376   const MB_MODE_INFO_EXT *const mbmi_ext =
    377       cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
    378   MODE_INFO *m;
    379 
    380   xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
    381   m = xd->mi[0];
    382 
    383   set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->sb_type],
    384                  mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows,
    385                  cm->mi_cols);
    386   if (frame_is_intra_only(cm)) {
    387     write_mb_modes_kf(cm, xd, w);
    388   } else {
    389     pack_inter_mode_mvs(cpi, xd, mbmi_ext, w, max_mv_magnitude,
    390                         interp_filter_selected);
    391   }
    392 
    393   assert(*tok < tok_end);
    394   pack_mb_tokens(w, tok, tok_end, cm->bit_depth);
    395 }
    396 
    397 static void write_partition(const VP9_COMMON *const cm,
    398                             const MACROBLOCKD *const xd, int hbs, int mi_row,
    399                             int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
    400                             vpx_writer *w) {
    401   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
    402   const vpx_prob *const probs = xd->partition_probs[ctx];
    403   const int has_rows = (mi_row + hbs) < cm->mi_rows;
    404   const int has_cols = (mi_col + hbs) < cm->mi_cols;
    405 
    406   if (has_rows && has_cols) {
    407     vp9_write_token(w, vp9_partition_tree, probs, &partition_encodings[p]);
    408   } else if (!has_rows && has_cols) {
    409     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
    410     vpx_write(w, p == PARTITION_SPLIT, probs[1]);
    411   } else if (has_rows && !has_cols) {
    412     assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
    413     vpx_write(w, p == PARTITION_SPLIT, probs[2]);
    414   } else {
    415     assert(p == PARTITION_SPLIT);
    416   }
    417 }
    418 
    419 static void write_modes_sb(
    420     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
    421     vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
    422     int mi_row, int mi_col, BLOCK_SIZE bsize,
    423     unsigned int *const max_mv_magnitude,
    424     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
    425   const VP9_COMMON *const cm = &cpi->common;
    426   const int bsl = b_width_log2_lookup[bsize];
    427   const int bs = (1 << bsl) / 4;
    428   PARTITION_TYPE partition;
    429   BLOCK_SIZE subsize;
    430   const MODE_INFO *m = NULL;
    431 
    432   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
    433 
    434   m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
    435 
    436   partition = partition_lookup[bsl][m->sb_type];
    437   write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
    438   subsize = get_subsize(bsize, partition);
    439   if (subsize < BLOCK_8X8) {
    440     write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
    441                   max_mv_magnitude, interp_filter_selected);
    442   } else {
    443     switch (partition) {
    444       case PARTITION_NONE:
    445         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
    446                       max_mv_magnitude, interp_filter_selected);
    447         break;
    448       case PARTITION_HORZ:
    449         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
    450                       max_mv_magnitude, interp_filter_selected);
    451         if (mi_row + bs < cm->mi_rows)
    452           write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
    453                         max_mv_magnitude, interp_filter_selected);
    454         break;
    455       case PARTITION_VERT:
    456         write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
    457                       max_mv_magnitude, interp_filter_selected);
    458         if (mi_col + bs < cm->mi_cols)
    459           write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
    460                         max_mv_magnitude, interp_filter_selected);
    461         break;
    462       case PARTITION_SPLIT:
    463         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize,
    464                        max_mv_magnitude, interp_filter_selected);
    465         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
    466                        subsize, max_mv_magnitude, interp_filter_selected);
    467         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
    468                        subsize, max_mv_magnitude, interp_filter_selected);
    469         write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
    470                        subsize, max_mv_magnitude, interp_filter_selected);
    471         break;
    472       default: assert(0);
    473     }
    474   }
    475 
    476   // update partition context
    477   if (bsize >= BLOCK_8X8 &&
    478       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
    479     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
    480 }
    481 
    482 static void write_modes(
    483     VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile,
    484     vpx_writer *w, int tile_row, int tile_col,
    485     unsigned int *const max_mv_magnitude,
    486     int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) {
    487   const VP9_COMMON *const cm = &cpi->common;
    488   int mi_row, mi_col, tile_sb_row;
    489   TOKENEXTRA *tok = NULL;
    490   TOKENEXTRA *tok_end = NULL;
    491 
    492   set_partition_probs(cm, xd);
    493 
    494   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
    495        mi_row += MI_BLOCK_SIZE) {
    496     tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >>
    497                   MI_BLOCK_SIZE_LOG2;
    498     tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start;
    499     tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count;
    500 
    501     vp9_zero(xd->left_seg_context);
    502     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
    503          mi_col += MI_BLOCK_SIZE)
    504       write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col,
    505                      BLOCK_64X64, max_mv_magnitude, interp_filter_selected);
    506 
    507     assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop);
    508   }
    509 }
    510 
    511 static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
    512                                     vp9_coeff_stats *coef_branch_ct,
    513                                     vp9_coeff_probs_model *coef_probs) {
    514   vp9_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size];
    515   unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
    516       cpi->common.counts.eob_branch[tx_size];
    517   int i, j, k, l, m;
    518 
    519   for (i = 0; i < PLANE_TYPES; ++i) {
    520     for (j = 0; j < REF_TYPES; ++j) {
    521       for (k = 0; k < COEF_BANDS; ++k) {
    522         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
    523           vp9_tree_probs_from_distribution(vp9_coef_tree,
    524                                            coef_branch_ct[i][j][k][l],
    525                                            coef_counts[i][j][k][l]);
    526           coef_branch_ct[i][j][k][l][0][1] =
    527               eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0];
    528           for (m = 0; m < UNCONSTRAINED_NODES; ++m)
    529             coef_probs[i][j][k][l][m] =
    530                 get_binary_prob(coef_branch_ct[i][j][k][l][m][0],
    531                                 coef_branch_ct[i][j][k][l][m][1]);
    532         }
    533       }
    534     }
    535   }
    536 }
    537 
    538 static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
    539                                      TX_SIZE tx_size,
    540                                      vp9_coeff_stats *frame_branch_ct,
    541                                      vp9_coeff_probs_model *new_coef_probs) {
    542   vp9_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
    543   const vpx_prob upd = DIFF_UPDATE_PROB;
    544   const int entropy_nodes_update = UNCONSTRAINED_NODES;
    545   int i, j, k, l, t;
    546   int stepsize = cpi->sf.coeff_prob_appx_step;
    547 
    548   switch (cpi->sf.use_fast_coef_updates) {
    549     case TWO_LOOP: {
    550       /* dry run to see if there is any update at all needed */
    551       int savings = 0;
    552       int update[2] = { 0, 0 };
    553       for (i = 0; i < PLANE_TYPES; ++i) {
    554         for (j = 0; j < REF_TYPES; ++j) {
    555           for (k = 0; k < COEF_BANDS; ++k) {
    556             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
    557               for (t = 0; t < entropy_nodes_update; ++t) {
    558                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
    559                 const vpx_prob oldp = old_coef_probs[i][j][k][l][t];
    560                 int s;
    561                 int u = 0;
    562                 if (t == PIVOT_NODE)
    563                   s = vp9_prob_diff_update_savings_search_model(
    564                       frame_branch_ct[i][j][k][l][0], oldp, &newp, upd,
    565                       stepsize);
    566                 else
    567                   s = vp9_prob_diff_update_savings_search(
    568                       frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
    569                 if (s > 0 && newp != oldp) u = 1;
    570                 if (u)
    571                   savings += s - (int)(vp9_cost_zero(upd));
    572                 else
    573                   savings -= (int)(vp9_cost_zero(upd));
    574                 update[u]++;
    575               }
    576             }
    577           }
    578         }
    579       }
    580 
    581       // printf("Update %d %d, savings %d\n", update[0], update[1], savings);
    582       /* Is coef updated at all */
    583       if (update[1] == 0 || savings < 0) {
    584         vpx_write_bit(bc, 0);
    585         return;
    586       }
    587       vpx_write_bit(bc, 1);
    588       for (i = 0; i < PLANE_TYPES; ++i) {
    589         for (j = 0; j < REF_TYPES; ++j) {
    590           for (k = 0; k < COEF_BANDS; ++k) {
    591             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
    592               // calc probs and branch cts for this frame only
    593               for (t = 0; t < entropy_nodes_update; ++t) {
    594                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
    595                 vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
    596                 const vpx_prob upd = DIFF_UPDATE_PROB;
    597                 int s;
    598                 int u = 0;
    599                 if (t == PIVOT_NODE)
    600                   s = vp9_prob_diff_update_savings_search_model(
    601                       frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
    602                       stepsize);
    603                 else
    604                   s = vp9_prob_diff_update_savings_search(
    605                       frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
    606                 if (s > 0 && newp != *oldp) u = 1;
    607                 vpx_write(bc, u, upd);
    608                 if (u) {
    609                   /* send/use new probability */
    610                   vp9_write_prob_diff_update(bc, newp, *oldp);
    611                   *oldp = newp;
    612                 }
    613               }
    614             }
    615           }
    616         }
    617       }
    618       return;
    619     }
    620 
    621     case ONE_LOOP_REDUCED: {
    622       int updates = 0;
    623       int noupdates_before_first = 0;
    624       for (i = 0; i < PLANE_TYPES; ++i) {
    625         for (j = 0; j < REF_TYPES; ++j) {
    626           for (k = 0; k < COEF_BANDS; ++k) {
    627             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
    628               // calc probs and branch cts for this frame only
    629               for (t = 0; t < entropy_nodes_update; ++t) {
    630                 vpx_prob newp = new_coef_probs[i][j][k][l][t];
    631                 vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
    632                 int s;
    633                 int u = 0;
    634 
    635                 if (t == PIVOT_NODE) {
    636                   s = vp9_prob_diff_update_savings_search_model(
    637                       frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd,
    638                       stepsize);
    639                 } else {
    640                   s = vp9_prob_diff_update_savings_search(
    641                       frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
    642                 }
    643 
    644                 if (s > 0 && newp != *oldp) u = 1;
    645                 updates += u;
    646                 if (u == 0 && updates == 0) {
    647                   noupdates_before_first++;
    648                   continue;
    649                 }
    650                 if (u == 1 && updates == 1) {
    651                   int v;
    652                   // first update
    653                   vpx_write_bit(bc, 1);
    654                   for (v = 0; v < noupdates_before_first; ++v)
    655                     vpx_write(bc, 0, upd);
    656                 }
    657                 vpx_write(bc, u, upd);
    658                 if (u) {
    659                   /* send/use new probability */
    660                   vp9_write_prob_diff_update(bc, newp, *oldp);
    661                   *oldp = newp;
    662                 }
    663               }
    664             }
    665           }
    666         }
    667       }
    668       if (updates == 0) {
    669         vpx_write_bit(bc, 0);  // no updates
    670       }
    671       return;
    672     }
    673     default: assert(0);
    674   }
    675 }
    676 
    677 static void update_coef_probs(VP9_COMP *cpi, vpx_writer *w) {
    678   const TX_MODE tx_mode = cpi->common.tx_mode;
    679   const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
    680   TX_SIZE tx_size;
    681   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) {
    682     vp9_coeff_stats frame_branch_ct[PLANE_TYPES];
    683     vp9_coeff_probs_model frame_coef_probs[PLANE_TYPES];
    684     if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 ||
    685         (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
    686       vpx_write_bit(w, 0);
    687     } else {
    688       build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs);
    689       update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
    690                                frame_coef_probs);
    691     }
    692   }
    693 }
    694 
    695 static void encode_loopfilter(struct loopfilter *lf,
    696                               struct vpx_write_bit_buffer *wb) {
    697   int i;
    698 
    699   // Encode the loop filter level and type
    700   vpx_wb_write_literal(wb, lf->filter_level, 6);
    701   vpx_wb_write_literal(wb, lf->sharpness_level, 3);
    702 
    703   // Write out loop filter deltas applied at the MB level based on mode or
    704   // ref frame (if they are enabled).
    705   vpx_wb_write_bit(wb, lf->mode_ref_delta_enabled);
    706 
    707   if (lf->mode_ref_delta_enabled) {
    708     vpx_wb_write_bit(wb, lf->mode_ref_delta_update);
    709     if (lf->mode_ref_delta_update) {
    710       for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
    711         const int delta = lf->ref_deltas[i];
    712         const int changed = delta != lf->last_ref_deltas[i];
    713         vpx_wb_write_bit(wb, changed);
    714         if (changed) {
    715           lf->last_ref_deltas[i] = delta;
    716           vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
    717           vpx_wb_write_bit(wb, delta < 0);
    718         }
    719       }
    720 
    721       for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
    722         const int delta = lf->mode_deltas[i];
    723         const int changed = delta != lf->last_mode_deltas[i];
    724         vpx_wb_write_bit(wb, changed);
    725         if (changed) {
    726           lf->last_mode_deltas[i] = delta;
    727           vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
    728           vpx_wb_write_bit(wb, delta < 0);
    729         }
    730       }
    731     }
    732   }
    733 }
    734 
    735 static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
    736   if (delta_q != 0) {
    737     vpx_wb_write_bit(wb, 1);
    738     vpx_wb_write_literal(wb, abs(delta_q), 4);
    739     vpx_wb_write_bit(wb, delta_q < 0);
    740   } else {
    741     vpx_wb_write_bit(wb, 0);
    742   }
    743 }
    744 
    745 static void encode_quantization(const VP9_COMMON *const cm,
    746                                 struct vpx_write_bit_buffer *wb) {
    747   vpx_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
    748   write_delta_q(wb, cm->y_dc_delta_q);
    749   write_delta_q(wb, cm->uv_dc_delta_q);
    750   write_delta_q(wb, cm->uv_ac_delta_q);
    751 }
    752 
    753 static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
    754                                 struct vpx_write_bit_buffer *wb) {
    755   int i, j;
    756 
    757   const struct segmentation *seg = &cm->seg;
    758 
    759   vpx_wb_write_bit(wb, seg->enabled);
    760   if (!seg->enabled) return;
    761 
    762   // Segmentation map
    763   vpx_wb_write_bit(wb, seg->update_map);
    764   if (seg->update_map) {
    765     // Select the coding strategy (temporal or spatial)
    766     vp9_choose_segmap_coding_method(cm, xd);
    767     // Write out probabilities used to decode unpredicted  macro-block segments
    768     for (i = 0; i < SEG_TREE_PROBS; i++) {
    769       const int prob = seg->tree_probs[i];
    770       const int update = prob != MAX_PROB;
    771       vpx_wb_write_bit(wb, update);
    772       if (update) vpx_wb_write_literal(wb, prob, 8);
    773     }
    774 
    775     // Write out the chosen coding method.
    776     vpx_wb_write_bit(wb, seg->temporal_update);
    777     if (seg->temporal_update) {
    778       for (i = 0; i < PREDICTION_PROBS; i++) {
    779         const int prob = seg->pred_probs[i];
    780         const int update = prob != MAX_PROB;
    781         vpx_wb_write_bit(wb, update);
    782         if (update) vpx_wb_write_literal(wb, prob, 8);
    783       }
    784     }
    785   }
    786 
    787   // Segmentation data
    788   vpx_wb_write_bit(wb, seg->update_data);
    789   if (seg->update_data) {
    790     vpx_wb_write_bit(wb, seg->abs_delta);
    791 
    792     for (i = 0; i < MAX_SEGMENTS; i++) {
    793       for (j = 0; j < SEG_LVL_MAX; j++) {
    794         const int active = segfeature_active(seg, i, j);
    795         vpx_wb_write_bit(wb, active);
    796         if (active) {
    797           const int data = get_segdata(seg, i, j);
    798           const int data_max = vp9_seg_feature_data_max(j);
    799 
    800           if (vp9_is_segfeature_signed(j)) {
    801             encode_unsigned_max(wb, abs(data), data_max);
    802             vpx_wb_write_bit(wb, data < 0);
    803           } else {
    804             encode_unsigned_max(wb, data, data_max);
    805           }
    806         }
    807       }
    808     }
    809   }
    810 }
    811 
    812 static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w,
    813                               FRAME_COUNTS *counts) {
    814   // Mode
    815   vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2);
    816   if (cm->tx_mode >= ALLOW_32X32)
    817     vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
    818 
    819   // Probabilities
    820   if (cm->tx_mode == TX_MODE_SELECT) {
    821     int i, j;
    822     unsigned int ct_8x8p[TX_SIZES - 3][2];
    823     unsigned int ct_16x16p[TX_SIZES - 2][2];
    824     unsigned int ct_32x32p[TX_SIZES - 1][2];
    825 
    826     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
    827       tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p);
    828       for (j = 0; j < TX_SIZES - 3; j++)
    829         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]);
    830     }
    831 
    832     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
    833       tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p);
    834       for (j = 0; j < TX_SIZES - 2; j++)
    835         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j],
    836                                   ct_16x16p[j]);
    837     }
    838 
    839     for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
    840       tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p);
    841       for (j = 0; j < TX_SIZES - 1; j++)
    842         vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j],
    843                                   ct_32x32p[j]);
    844     }
    845   }
    846 }
    847 
    848 static void write_interp_filter(INTERP_FILTER filter,
    849                                 struct vpx_write_bit_buffer *wb) {
    850   const int filter_to_literal[] = { 1, 0, 2, 3 };
    851 
    852   vpx_wb_write_bit(wb, filter == SWITCHABLE);
    853   if (filter != SWITCHABLE)
    854     vpx_wb_write_literal(wb, filter_to_literal[filter], 2);
    855 }
    856 
    857 static void fix_interp_filter(VP9_COMMON *cm, FRAME_COUNTS *counts) {
    858   if (cm->interp_filter == SWITCHABLE) {
    859     // Check to see if only one of the filters is actually used
    860     int count[SWITCHABLE_FILTERS];
    861     int i, j, c = 0;
    862     for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
    863       count[i] = 0;
    864       for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
    865         count[i] += counts->switchable_interp[j][i];
    866       c += (count[i] > 0);
    867     }
    868     if (c == 1) {
    869       // Only one filter is used. So set the filter at frame level
    870       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
    871         if (count[i]) {
    872           cm->interp_filter = i;
    873           break;
    874         }
    875       }
    876     }
    877   }
    878 }
    879 
    880 static void write_tile_info(const VP9_COMMON *const cm,
    881                             struct vpx_write_bit_buffer *wb) {
    882   int min_log2_tile_cols, max_log2_tile_cols, ones;
    883   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
    884 
    885   // columns
    886   ones = cm->log2_tile_cols - min_log2_tile_cols;
    887   while (ones--) vpx_wb_write_bit(wb, 1);
    888 
    889   if (cm->log2_tile_cols < max_log2_tile_cols) vpx_wb_write_bit(wb, 0);
    890 
    891   // rows
    892   vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
    893   if (cm->log2_tile_rows != 0) vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
    894 }
    895 
    896 int vp9_get_refresh_mask(VP9_COMP *cpi) {
    897   if (vp9_preserve_existing_gf(cpi)) {
    898     // We have decided to preserve the previously existing golden frame as our
    899     // new ARF frame. However, in the short term we leave it in the GF slot and,
    900     // if we're updating the GF with the current decoded frame, we save it
    901     // instead to the ARF slot.
    902     // Later, in the function vp9_encoder.c:vp9_update_reference_frames() we
    903     // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
    904     // there so that it can be done outside of the recode loop.
    905     // Note: This is highly specific to the use of ARF as a forward reference,
    906     // and this needs to be generalized as other uses are implemented
    907     // (like RTC/temporal scalability).
    908     return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
    909            (cpi->refresh_golden_frame << cpi->alt_fb_idx);
    910   } else {
    911     int arf_idx = cpi->alt_fb_idx;
    912     if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
    913       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
    914       arf_idx = gf_group->arf_update_idx[gf_group->index];
    915     }
    916     return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
    917            (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
    918            (cpi->refresh_alt_ref_frame << arf_idx);
    919   }
    920 }
    921 
    922 static int encode_tile_worker(void *arg1, void *arg2) {
    923   VP9_COMP *cpi = (VP9_COMP *)arg1;
    924   VP9BitstreamWorkerData *data = (VP9BitstreamWorkerData *)arg2;
    925   MACROBLOCKD *const xd = &data->xd;
    926   const int tile_row = 0;
    927   vpx_start_encode(&data->bit_writer, data->dest);
    928   write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
    929               &data->bit_writer, tile_row, data->tile_idx,
    930               &data->max_mv_magnitude, data->interp_filter_selected);
    931   vpx_stop_encode(&data->bit_writer);
    932   return 1;
    933 }
    934 
    935 void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
    936   if (cpi->vp9_bitstream_worker_data) {
    937     int i;
    938     for (i = 1; i < cpi->num_workers; ++i) {
    939       vpx_free(cpi->vp9_bitstream_worker_data[i].dest);
    940     }
    941     vpx_free(cpi->vp9_bitstream_worker_data);
    942     cpi->vp9_bitstream_worker_data = NULL;
    943   }
    944 }
    945 
    946 static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
    947   int i;
    948   const size_t worker_data_size =
    949       cpi->num_workers * sizeof(*cpi->vp9_bitstream_worker_data);
    950   cpi->vp9_bitstream_worker_data = vpx_memalign(16, worker_data_size);
    951   memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size);
    952   if (!cpi->vp9_bitstream_worker_data) return 1;
    953   for (i = 1; i < cpi->num_workers; ++i) {
    954     cpi->vp9_bitstream_worker_data[i].dest_size =
    955         cpi->oxcf.width * cpi->oxcf.height;
    956     cpi->vp9_bitstream_worker_data[i].dest =
    957         vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size);
    958     if (!cpi->vp9_bitstream_worker_data[i].dest) return 1;
    959   }
    960   return 0;
    961 }
    962 
    963 static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
    964   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
    965   VP9_COMMON *const cm = &cpi->common;
    966   const int tile_cols = 1 << cm->log2_tile_cols;
    967   const int num_workers = cpi->num_workers;
    968   size_t total_size = 0;
    969   int tile_col = 0;
    970 
    971   if (!cpi->vp9_bitstream_worker_data ||
    972       cpi->vp9_bitstream_worker_data[1].dest_size >
    973           (cpi->oxcf.width * cpi->oxcf.height)) {
    974     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
    975     if (encode_tiles_buffer_alloc(cpi)) return 0;
    976   }
    977 
    978   while (tile_col < tile_cols) {
    979     int i, j;
    980     for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
    981       VPxWorker *const worker = &cpi->workers[i];
    982       VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i];
    983 
    984       // Populate the worker data.
    985       data->xd = cpi->td.mb.e_mbd;
    986       data->tile_idx = tile_col;
    987       data->max_mv_magnitude = cpi->max_mv_magnitude;
    988       memset(data->interp_filter_selected, 0,
    989              sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
    990 
    991       // First thread can directly write into the output buffer.
    992       if (i == 0) {
    993         // If this worker happens to be for the last tile, then do not offset it
    994         // by 4 for the tile size.
    995         data->dest =
    996             data_ptr + total_size + (tile_col == tile_cols - 1 ? 0 : 4);
    997       }
    998       worker->data1 = cpi;
    999       worker->data2 = data;
   1000       worker->hook = encode_tile_worker;
   1001       worker->had_error = 0;
   1002 
   1003       if (i < num_workers - 1) {
   1004         winterface->launch(worker);
   1005       } else {
   1006         winterface->execute(worker);
   1007       }
   1008       ++tile_col;
   1009     }
   1010     for (j = 0; j < i; ++j) {
   1011       VPxWorker *const worker = &cpi->workers[j];
   1012       VP9BitstreamWorkerData *const data =
   1013           (VP9BitstreamWorkerData *)worker->data2;
   1014       uint32_t tile_size;
   1015       int k;
   1016 
   1017       if (!winterface->sync(worker)) return 0;
   1018       tile_size = data->bit_writer.pos;
   1019 
   1020       // Aggregate per-thread bitstream stats.
   1021       cpi->max_mv_magnitude =
   1022           VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude);
   1023       for (k = 0; k < SWITCHABLE; ++k) {
   1024         cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k];
   1025       }
   1026 
   1027       // Prefix the size of the tile on all but the last.
   1028       if (tile_col != tile_cols || j < i - 1) {
   1029         mem_put_be32(data_ptr + total_size, tile_size);
   1030         total_size += 4;
   1031       }
   1032       if (j > 0) {
   1033         memcpy(data_ptr + total_size, data->dest, tile_size);
   1034       }
   1035       total_size += tile_size;
   1036     }
   1037   }
   1038   return total_size;
   1039 }
   1040 
   1041 static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
   1042   VP9_COMMON *const cm = &cpi->common;
   1043   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   1044   vpx_writer residual_bc;
   1045   int tile_row, tile_col;
   1046   size_t total_size = 0;
   1047   const int tile_cols = 1 << cm->log2_tile_cols;
   1048   const int tile_rows = 1 << cm->log2_tile_rows;
   1049 
   1050   memset(cm->above_seg_context, 0,
   1051          sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
   1052 
   1053   // Encoding tiles in parallel is done only for realtime mode now. In other
   1054   // modes the speed up is insignificant and requires further testing to ensure
   1055   // that it does not make the overall process worse in any case.
   1056   if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 &&
   1057       tile_cols > 1) {
   1058     return encode_tiles_mt(cpi, data_ptr);
   1059   }
   1060 
   1061   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
   1062     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
   1063       int tile_idx = tile_row * tile_cols + tile_col;
   1064 
   1065       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
   1066         vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
   1067       else
   1068         vpx_start_encode(&residual_bc, data_ptr + total_size);
   1069 
   1070       write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
   1071                   tile_row, tile_col, &cpi->max_mv_magnitude,
   1072                   cpi->interp_filter_selected);
   1073 
   1074       vpx_stop_encode(&residual_bc);
   1075       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
   1076         // size of this tile
   1077         mem_put_be32(data_ptr + total_size, residual_bc.pos);
   1078         total_size += 4;
   1079       }
   1080 
   1081       total_size += residual_bc.pos;
   1082     }
   1083   }
   1084   return total_size;
   1085 }
   1086 
   1087 static void write_render_size(const VP9_COMMON *cm,
   1088                               struct vpx_write_bit_buffer *wb) {
   1089   const int scaling_active =
   1090       cm->width != cm->render_width || cm->height != cm->render_height;
   1091   vpx_wb_write_bit(wb, scaling_active);
   1092   if (scaling_active) {
   1093     vpx_wb_write_literal(wb, cm->render_width - 1, 16);
   1094     vpx_wb_write_literal(wb, cm->render_height - 1, 16);
   1095   }
   1096 }
   1097 
   1098 static void write_frame_size(const VP9_COMMON *cm,
   1099                              struct vpx_write_bit_buffer *wb) {
   1100   vpx_wb_write_literal(wb, cm->width - 1, 16);
   1101   vpx_wb_write_literal(wb, cm->height - 1, 16);
   1102 
   1103   write_render_size(cm, wb);
   1104 }
   1105 
   1106 static void write_frame_size_with_refs(VP9_COMP *cpi,
   1107                                        struct vpx_write_bit_buffer *wb) {
   1108   VP9_COMMON *const cm = &cpi->common;
   1109   int found = 0;
   1110 
   1111   MV_REFERENCE_FRAME ref_frame;
   1112   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
   1113     YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
   1114 
   1115     // Set "found" to 0 for temporal svc and for spatial svc key frame
   1116     if (cpi->use_svc &&
   1117         ((cpi->svc.number_temporal_layers > 1 &&
   1118           cpi->oxcf.rc_mode == VPX_CBR) ||
   1119          (cpi->svc.number_spatial_layers > 1 &&
   1120           cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
   1121          (is_two_pass_svc(cpi) &&
   1122           cpi->svc.encode_empty_frame_state == ENCODING &&
   1123           cpi->svc.layer_context[0].frames_from_key_frame <
   1124               cpi->svc.number_temporal_layers + 1))) {
   1125       found = 0;
   1126     } else if (cfg != NULL) {
   1127       found =
   1128           cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height;
   1129     }
   1130     vpx_wb_write_bit(wb, found);
   1131     if (found) {
   1132       break;
   1133     }
   1134   }
   1135 
   1136   if (!found) {
   1137     vpx_wb_write_literal(wb, cm->width - 1, 16);
   1138     vpx_wb_write_literal(wb, cm->height - 1, 16);
   1139   }
   1140 
   1141   write_render_size(cm, wb);
   1142 }
   1143 
   1144 static void write_sync_code(struct vpx_write_bit_buffer *wb) {
   1145   vpx_wb_write_literal(wb, VP9_SYNC_CODE_0, 8);
   1146   vpx_wb_write_literal(wb, VP9_SYNC_CODE_1, 8);
   1147   vpx_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
   1148 }
   1149 
   1150 static void write_profile(BITSTREAM_PROFILE profile,
   1151                           struct vpx_write_bit_buffer *wb) {
   1152   switch (profile) {
   1153     case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
   1154     case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
   1155     case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
   1156     case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break;
   1157     default: assert(0);
   1158   }
   1159 }
   1160 
   1161 static void write_bitdepth_colorspace_sampling(
   1162     VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) {
   1163   if (cm->profile >= PROFILE_2) {
   1164     assert(cm->bit_depth > VPX_BITS_8);
   1165     vpx_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
   1166   }
   1167   vpx_wb_write_literal(wb, cm->color_space, 3);
   1168   if (cm->color_space != VPX_CS_SRGB) {
   1169     // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
   1170     vpx_wb_write_bit(wb, cm->color_range);
   1171     if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
   1172       assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
   1173       vpx_wb_write_bit(wb, cm->subsampling_x);
   1174       vpx_wb_write_bit(wb, cm->subsampling_y);
   1175       vpx_wb_write_bit(wb, 0);  // unused
   1176     } else {
   1177       assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
   1178     }
   1179   } else {
   1180     assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
   1181     vpx_wb_write_bit(wb, 0);  // unused
   1182   }
   1183 }
   1184 
   1185 static void write_uncompressed_header(VP9_COMP *cpi,
   1186                                       struct vpx_write_bit_buffer *wb) {
   1187   VP9_COMMON *const cm = &cpi->common;
   1188   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   1189 
   1190   vpx_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
   1191 
   1192   write_profile(cm->profile, wb);
   1193 
   1194   vpx_wb_write_bit(wb, 0);  // show_existing_frame
   1195   vpx_wb_write_bit(wb, cm->frame_type);
   1196   vpx_wb_write_bit(wb, cm->show_frame);
   1197   vpx_wb_write_bit(wb, cm->error_resilient_mode);
   1198 
   1199   if (cm->frame_type == KEY_FRAME) {
   1200     write_sync_code(wb);
   1201     write_bitdepth_colorspace_sampling(cm, wb);
   1202     write_frame_size(cm, wb);
   1203   } else {
   1204     // In spatial svc if it's not error_resilient_mode then we need to code all
   1205     // visible frames as invisible. But we need to keep the show_frame flag so
   1206     // that the publisher could know whether it is supposed to be visible.
   1207     // So we will code the show_frame flag as it is. Then code the intra_only
   1208     // bit here. This will make the bitstream incompatible. In the player we
   1209     // will change to show_frame flag to 0, then add an one byte frame with
   1210     // show_existing_frame flag which tells the decoder which frame we want to
   1211     // show.
   1212     if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
   1213 
   1214     if (!cm->error_resilient_mode)
   1215       vpx_wb_write_literal(wb, cm->reset_frame_context, 2);
   1216 
   1217     if (cm->intra_only) {
   1218       write_sync_code(wb);
   1219 
   1220       // Note for profile 0, 420 8bpp is assumed.
   1221       if (cm->profile > PROFILE_0) {
   1222         write_bitdepth_colorspace_sampling(cm, wb);
   1223       }
   1224 
   1225       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
   1226       write_frame_size(cm, wb);
   1227     } else {
   1228       MV_REFERENCE_FRAME ref_frame;
   1229       vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
   1230       for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
   1231         assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
   1232         vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
   1233                              REF_FRAMES_LOG2);
   1234         vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
   1235       }
   1236 
   1237       write_frame_size_with_refs(cpi, wb);
   1238 
   1239       vpx_wb_write_bit(wb, cm->allow_high_precision_mv);
   1240 
   1241       fix_interp_filter(cm, cpi->td.counts);
   1242       write_interp_filter(cm->interp_filter, wb);
   1243     }
   1244   }
   1245 
   1246   if (!cm->error_resilient_mode) {
   1247     vpx_wb_write_bit(wb, cm->refresh_frame_context);
   1248     vpx_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
   1249   }
   1250 
   1251   vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
   1252 
   1253   encode_loopfilter(&cm->lf, wb);
   1254   encode_quantization(cm, wb);
   1255   encode_segmentation(cm, xd, wb);
   1256 
   1257   write_tile_info(cm, wb);
   1258 }
   1259 
   1260 static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
   1261   VP9_COMMON *const cm = &cpi->common;
   1262   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   1263   FRAME_CONTEXT *const fc = cm->fc;
   1264   FRAME_COUNTS *counts = cpi->td.counts;
   1265   vpx_writer header_bc;
   1266 
   1267   vpx_start_encode(&header_bc, data);
   1268 
   1269   if (xd->lossless)
   1270     cm->tx_mode = ONLY_4X4;
   1271   else
   1272     encode_txfm_probs(cm, &header_bc, counts);
   1273 
   1274   update_coef_probs(cpi, &header_bc);
   1275   update_skip_probs(cm, &header_bc, counts);
   1276 
   1277   if (!frame_is_intra_only(cm)) {
   1278     int i;
   1279 
   1280     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
   1281       prob_diff_update(vp9_inter_mode_tree, cm->fc->inter_mode_probs[i],
   1282                        counts->inter_mode[i], INTER_MODES, &header_bc);
   1283 
   1284     if (cm->interp_filter == SWITCHABLE)
   1285       update_switchable_interp_probs(cm, &header_bc, counts);
   1286 
   1287     for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
   1288       vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i],
   1289                                 counts->intra_inter[i]);
   1290 
   1291     if (cpi->allow_comp_inter_inter) {
   1292       const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
   1293       const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
   1294 
   1295       vpx_write_bit(&header_bc, use_compound_pred);
   1296       if (use_compound_pred) {
   1297         vpx_write_bit(&header_bc, use_hybrid_pred);
   1298         if (use_hybrid_pred)
   1299           for (i = 0; i < COMP_INTER_CONTEXTS; i++)
   1300             vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
   1301                                       counts->comp_inter[i]);
   1302       }
   1303     }
   1304 
   1305     if (cm->reference_mode != COMPOUND_REFERENCE) {
   1306       for (i = 0; i < REF_CONTEXTS; i++) {
   1307         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0],
   1308                                   counts->single_ref[i][0]);
   1309         vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1],
   1310                                   counts->single_ref[i][1]);
   1311       }
   1312     }
   1313 
   1314     if (cm->reference_mode != SINGLE_REFERENCE)
   1315       for (i = 0; i < REF_CONTEXTS; i++)
   1316         vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i],
   1317                                   counts->comp_ref[i]);
   1318 
   1319     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
   1320       prob_diff_update(vp9_intra_mode_tree, cm->fc->y_mode_prob[i],
   1321                        counts->y_mode[i], INTRA_MODES, &header_bc);
   1322 
   1323     for (i = 0; i < PARTITION_CONTEXTS; ++i)
   1324       prob_diff_update(vp9_partition_tree, fc->partition_prob[i],
   1325                        counts->partition[i], PARTITION_TYPES, &header_bc);
   1326 
   1327     vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc,
   1328                         &counts->mv);
   1329   }
   1330 
   1331   vpx_stop_encode(&header_bc);
   1332   assert(header_bc.pos <= 0xffff);
   1333 
   1334   return header_bc.pos;
   1335 }
   1336 
   1337 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
   1338   uint8_t *data = dest;
   1339   size_t first_part_size, uncompressed_hdr_size;
   1340   struct vpx_write_bit_buffer wb = { data, 0 };
   1341   struct vpx_write_bit_buffer saved_wb;
   1342 
   1343   write_uncompressed_header(cpi, &wb);
   1344   saved_wb = wb;
   1345   vpx_wb_write_literal(&wb, 0, 16);  // don't know in advance first part. size
   1346 
   1347   uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
   1348   data += uncompressed_hdr_size;
   1349 
   1350   vpx_clear_system_state();
   1351 
   1352   first_part_size = write_compressed_header(cpi, data);
   1353   data += first_part_size;
   1354   // TODO(jbb): Figure out what to do if first_part_size > 16 bits.
   1355   vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16);
   1356 
   1357   data += encode_tiles(cpi, data);
   1358 
   1359   *size = data - dest;
   1360 }
   1361