Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "vp9/encoder/vp9_encodeframe.h"
     12 #include "vp9/encoder/vp9_encoder.h"
     13 #include "vp9/encoder/vp9_ethread.h"
     14 #include "vp9/encoder/vp9_firstpass.h"
     15 #include "vp9/encoder/vp9_multi_thread.h"
     16 #include "vp9/encoder/vp9_temporal_filter.h"
     17 #include "vpx_dsp/vpx_dsp_common.h"
     18 
     19 static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
     20   int i, j, k, l, m, n;
     21 
     22   for (i = 0; i < REFERENCE_MODES; i++)
     23     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
     24 
     25   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
     26     td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
     27 
     28   for (i = 0; i < TX_SIZES; i++)
     29     for (j = 0; j < PLANE_TYPES; j++)
     30       for (k = 0; k < REF_TYPES; k++)
     31         for (l = 0; l < COEF_BANDS; l++)
     32           for (m = 0; m < COEFF_CONTEXTS; m++)
     33             for (n = 0; n < ENTROPY_TOKENS; n++)
     34               td->rd_counts.coef_counts[i][j][k][l][m][n] +=
     35                   td_t->rd_counts.coef_counts[i][j][k][l][m][n];
     36 }
     37 
     38 static int enc_worker_hook(void *arg1, void *unused) {
     39   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
     40   VP9_COMP *const cpi = thread_data->cpi;
     41   const VP9_COMMON *const cm = &cpi->common;
     42   const int tile_cols = 1 << cm->log2_tile_cols;
     43   const int tile_rows = 1 << cm->log2_tile_rows;
     44   int t;
     45 
     46   (void)unused;
     47 
     48   for (t = thread_data->start; t < tile_rows * tile_cols;
     49        t += cpi->num_workers) {
     50     int tile_row = t / tile_cols;
     51     int tile_col = t % tile_cols;
     52 
     53     vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col);
     54   }
     55 
     56   return 0;
     57 }
     58 
     59 static int get_max_tile_cols(VP9_COMP *cpi) {
     60   const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
     61   int mi_cols = aligned_width >> MI_SIZE_LOG2;
     62   int min_log2_tile_cols, max_log2_tile_cols;
     63   int log2_tile_cols;
     64 
     65   vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
     66   log2_tile_cols =
     67       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
     68   if (cpi->oxcf.target_level == LEVEL_AUTO) {
     69     const int level_tile_cols =
     70         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
     71     if (log2_tile_cols > level_tile_cols) {
     72       log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
     73     }
     74   }
     75   return (1 << log2_tile_cols);
     76 }
     77 
     78 static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
     79   VP9_COMMON *const cm = &cpi->common;
     80   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
     81   int i;
     82 
     83   // Only run once to create threads and allocate thread data.
     84   if (cpi->num_workers == 0) {
     85     int allocated_workers = num_workers;
     86 
     87     // While using SVC, we need to allocate threads according to the highest
     88     // resolution. When row based multithreading is enabled, it is OK to
     89     // allocate more threads than the number of max tile columns.
     90     if (cpi->use_svc && !cpi->row_mt) {
     91       int max_tile_cols = get_max_tile_cols(cpi);
     92       allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
     93     }
     94 
     95     CHECK_MEM_ERROR(cm, cpi->workers,
     96                     vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
     97 
     98     CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
     99                     vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));
    100 
    101     for (i = 0; i < allocated_workers; i++) {
    102       VPxWorker *const worker = &cpi->workers[i];
    103       EncWorkerData *thread_data = &cpi->tile_thr_data[i];
    104 
    105       ++cpi->num_workers;
    106       winterface->init(worker);
    107 
    108       if (i < allocated_workers - 1) {
    109         thread_data->cpi = cpi;
    110 
    111         // Allocate thread data.
    112         CHECK_MEM_ERROR(cm, thread_data->td,
    113                         vpx_memalign(32, sizeof(*thread_data->td)));
    114         vp9_zero(*thread_data->td);
    115 
    116         // Set up pc_tree.
    117         thread_data->td->leaf_tree = NULL;
    118         thread_data->td->pc_tree = NULL;
    119         vp9_setup_pc_tree(cm, thread_data->td);
    120 
    121         // Allocate frame counters in thread data.
    122         CHECK_MEM_ERROR(cm, thread_data->td->counts,
    123                         vpx_calloc(1, sizeof(*thread_data->td->counts)));
    124 
    125         // Create threads
    126         if (!winterface->reset(worker))
    127           vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
    128                              "Tile encoder thread creation failed");
    129       } else {
    130         // Main thread acts as a worker and uses the thread data in cpi.
    131         thread_data->cpi = cpi;
    132         thread_data->td = &cpi->td;
    133       }
    134       winterface->sync(worker);
    135     }
    136   }
    137 }
    138 
    139 static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2,
    140                                int num_workers) {
    141   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
    142   int i;
    143 
    144   for (i = 0; i < num_workers; i++) {
    145     VPxWorker *const worker = &cpi->workers[i];
    146     worker->hook = hook;
    147     worker->data1 = &cpi->tile_thr_data[i];
    148     worker->data2 = data2;
    149   }
    150 
    151   // Encode a frame
    152   for (i = 0; i < num_workers; i++) {
    153     VPxWorker *const worker = &cpi->workers[i];
    154     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
    155 
    156     // Set the starting tile for each thread.
    157     thread_data->start = i;
    158 
    159     if (i == cpi->num_workers - 1)
    160       winterface->execute(worker);
    161     else
    162       winterface->launch(worker);
    163   }
    164 
    165   // Encoding ends.
    166   for (i = 0; i < num_workers; i++) {
    167     VPxWorker *const worker = &cpi->workers[i];
    168     winterface->sync(worker);
    169   }
    170 }
    171 
    172 void vp9_encode_tiles_mt(VP9_COMP *cpi) {
    173   VP9_COMMON *const cm = &cpi->common;
    174   const int tile_cols = 1 << cm->log2_tile_cols;
    175   const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
    176   int i;
    177 
    178   vp9_init_tile_data(cpi);
    179 
    180   create_enc_workers(cpi, num_workers);
    181 
    182   for (i = 0; i < num_workers; i++) {
    183     EncWorkerData *thread_data;
    184     thread_data = &cpi->tile_thr_data[i];
    185 
    186     // Before encoding a frame, copy the thread data from cpi.
    187     if (thread_data->td != &cpi->td) {
    188       thread_data->td->mb = cpi->td.mb;
    189       thread_data->td->rd_counts = cpi->td.rd_counts;
    190     }
    191     if (thread_data->td->counts != &cpi->common.counts) {
    192       memcpy(thread_data->td->counts, &cpi->common.counts,
    193              sizeof(cpi->common.counts));
    194     }
    195 
    196     // Handle use_nonrd_pick_mode case.
    197     if (cpi->sf.use_nonrd_pick_mode) {
    198       MACROBLOCK *const x = &thread_data->td->mb;
    199       MACROBLOCKD *const xd = &x->e_mbd;
    200       struct macroblock_plane *const p = x->plane;
    201       struct macroblockd_plane *const pd = xd->plane;
    202       PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
    203       int j;
    204 
    205       for (j = 0; j < MAX_MB_PLANE; ++j) {
    206         p[j].coeff = ctx->coeff_pbuf[j][0];
    207         p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
    208         pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
    209         p[j].eobs = ctx->eobs_pbuf[j][0];
    210       }
    211     }
    212   }
    213 
    214   launch_enc_workers(cpi, enc_worker_hook, NULL, num_workers);
    215 
    216   for (i = 0; i < num_workers; i++) {
    217     VPxWorker *const worker = &cpi->workers[i];
    218     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
    219 
    220     // Accumulate counters.
    221     if (i < cpi->num_workers - 1) {
    222       vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
    223       accumulate_rd_opt(&cpi->td, thread_data->td);
    224     }
    225   }
    226 }
    227 
    228 #if !CONFIG_REALTIME_ONLY
    229 static void accumulate_fp_tile_stat(TileDataEnc *tile_data,
    230                                     TileDataEnc *tile_data_t) {
    231   tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor;
    232   tile_data->fp_data.brightness_factor +=
    233       tile_data_t->fp_data.brightness_factor;
    234   tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error;
    235   tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error;
    236   tile_data->fp_data.frame_noise_energy +=
    237       tile_data_t->fp_data.frame_noise_energy;
    238   tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error;
    239   tile_data->fp_data.intercount += tile_data_t->fp_data.intercount;
    240   tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count;
    241   tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count;
    242   tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low;
    243   tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high;
    244   tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count;
    245   tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount;
    246   tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr;
    247   tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs;
    248   tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc;
    249   tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs;
    250   tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs;
    251   tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs;
    252   tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors;
    253   tile_data->fp_data.intra_smooth_count +=
    254       tile_data_t->fp_data.intra_smooth_count;
    255   tile_data->fp_data.image_data_start_row =
    256       VPXMIN(tile_data->fp_data.image_data_start_row,
    257              tile_data_t->fp_data.image_data_start_row) == INVALID_ROW
    258           ? VPXMAX(tile_data->fp_data.image_data_start_row,
    259                    tile_data_t->fp_data.image_data_start_row)
    260           : VPXMIN(tile_data->fp_data.image_data_start_row,
    261                    tile_data_t->fp_data.image_data_start_row);
    262 }
    263 #endif  // !CONFIG_REALTIME_ONLY
    264 
    265 // Allocate memory for row synchronization
    266 void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
    267                                int rows) {
    268   row_mt_sync->rows = rows;
    269 #if CONFIG_MULTITHREAD
    270   {
    271     int i;
    272 
    273     CHECK_MEM_ERROR(cm, row_mt_sync->mutex,
    274                     vpx_malloc(sizeof(*row_mt_sync->mutex) * rows));
    275     if (row_mt_sync->mutex) {
    276       for (i = 0; i < rows; ++i) {
    277         pthread_mutex_init(&row_mt_sync->mutex[i], NULL);
    278       }
    279     }
    280 
    281     CHECK_MEM_ERROR(cm, row_mt_sync->cond,
    282                     vpx_malloc(sizeof(*row_mt_sync->cond) * rows));
    283     if (row_mt_sync->cond) {
    284       for (i = 0; i < rows; ++i) {
    285         pthread_cond_init(&row_mt_sync->cond[i], NULL);
    286       }
    287     }
    288   }
    289 #endif  // CONFIG_MULTITHREAD
    290 
    291   CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
    292                   vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows));
    293 
    294   // Set up nsync.
    295   row_mt_sync->sync_range = 1;
    296 }
    297 
    298 // Deallocate row based multi-threading synchronization related mutex and data
    299 void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) {
    300   if (row_mt_sync != NULL) {
    301 #if CONFIG_MULTITHREAD
    302     int i;
    303 
    304     if (row_mt_sync->mutex != NULL) {
    305       for (i = 0; i < row_mt_sync->rows; ++i) {
    306         pthread_mutex_destroy(&row_mt_sync->mutex[i]);
    307       }
    308       vpx_free(row_mt_sync->mutex);
    309     }
    310     if (row_mt_sync->cond != NULL) {
    311       for (i = 0; i < row_mt_sync->rows; ++i) {
    312         pthread_cond_destroy(&row_mt_sync->cond[i]);
    313       }
    314       vpx_free(row_mt_sync->cond);
    315     }
    316 #endif  // CONFIG_MULTITHREAD
    317     vpx_free(row_mt_sync->cur_col);
    318     // clear the structure as the source of this call may be dynamic change
    319     // in tiles in which case this call will be followed by an _alloc()
    320     // which may fail.
    321     vp9_zero(*row_mt_sync);
    322   }
    323 }
    324 
    325 void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) {
    326 #if CONFIG_MULTITHREAD
    327   const int nsync = row_mt_sync->sync_range;
    328 
    329   if (r && !(c & (nsync - 1))) {
    330     pthread_mutex_t *const mutex = &row_mt_sync->mutex[r - 1];
    331     pthread_mutex_lock(mutex);
    332 
    333     while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) {
    334       pthread_cond_wait(&row_mt_sync->cond[r - 1], mutex);
    335     }
    336     pthread_mutex_unlock(mutex);
    337   }
    338 #else
    339   (void)row_mt_sync;
    340   (void)r;
    341   (void)c;
    342 #endif  // CONFIG_MULTITHREAD
    343 }
    344 
    345 void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c) {
    346   (void)row_mt_sync;
    347   (void)r;
    348   (void)c;
    349   return;
    350 }
    351 
    352 void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
    353                            const int cols) {
    354 #if CONFIG_MULTITHREAD
    355   const int nsync = row_mt_sync->sync_range;
    356   int cur;
    357   // Only signal when there are enough encoded blocks for next row to run.
    358   int sig = 1;
    359 
    360   if (c < cols - 1) {
    361     cur = c;
    362     if (c % nsync != nsync - 1) sig = 0;
    363   } else {
    364     cur = cols + nsync;
    365   }
    366 
    367   if (sig) {
    368     pthread_mutex_lock(&row_mt_sync->mutex[r]);
    369 
    370     row_mt_sync->cur_col[r] = cur;
    371 
    372     pthread_cond_signal(&row_mt_sync->cond[r]);
    373     pthread_mutex_unlock(&row_mt_sync->mutex[r]);
    374   }
    375 #else
    376   (void)row_mt_sync;
    377   (void)r;
    378   (void)c;
    379   (void)cols;
    380 #endif  // CONFIG_MULTITHREAD
    381 }
    382 
    383 void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
    384                                  const int cols) {
    385   (void)row_mt_sync;
    386   (void)r;
    387   (void)c;
    388   (void)cols;
    389   return;
    390 }
    391 
    392 #if !CONFIG_REALTIME_ONLY
    393 static int first_pass_worker_hook(void *arg1, void *arg2) {
    394   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
    395   MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
    396   VP9_COMP *const cpi = thread_data->cpi;
    397   const VP9_COMMON *const cm = &cpi->common;
    398   const int tile_cols = 1 << cm->log2_tile_cols;
    399   int tile_row, tile_col;
    400   TileDataEnc *this_tile;
    401   int end_of_frame;
    402   int thread_id = thread_data->thread_id;
    403   int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
    404   JobNode *proc_job = NULL;
    405   FIRSTPASS_DATA fp_acc_data;
    406   MV zero_mv = { 0, 0 };
    407   MV best_ref_mv;
    408   int mb_row;
    409 
    410   end_of_frame = 0;
    411   while (0 == end_of_frame) {
    412     // Get the next job in the queue
    413     proc_job =
    414         (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
    415     if (NULL == proc_job) {
    416       // Query for the status of other tiles
    417       end_of_frame = vp9_get_tiles_proc_status(
    418           multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
    419           tile_cols);
    420     } else {
    421       tile_col = proc_job->tile_col_id;
    422       tile_row = proc_job->tile_row_id;
    423 
    424       this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
    425       mb_row = proc_job->vert_unit_row_num;
    426 
    427       best_ref_mv = zero_mv;
    428       vp9_zero(fp_acc_data);
    429       fp_acc_data.image_data_start_row = INVALID_ROW;
    430       vp9_first_pass_encode_tile_mb_row(cpi, thread_data->td, &fp_acc_data,
    431                                         this_tile, &best_ref_mv, mb_row);
    432     }
    433   }
    434   return 0;
    435 }
    436 
    437 void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
    438   VP9_COMMON *const cm = &cpi->common;
    439   const int tile_cols = 1 << cm->log2_tile_cols;
    440   const int tile_rows = 1 << cm->log2_tile_rows;
    441   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
    442   TileDataEnc *first_tile_col;
    443   int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
    444   int i;
    445 
    446   if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
    447       multi_thread_ctxt->allocated_tile_rows < tile_rows ||
    448       multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
    449     vp9_row_mt_mem_dealloc(cpi);
    450     vp9_init_tile_data(cpi);
    451     vp9_row_mt_mem_alloc(cpi);
    452   } else {
    453     vp9_init_tile_data(cpi);
    454   }
    455 
    456   create_enc_workers(cpi, num_workers);
    457 
    458   vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
    459 
    460   vp9_prepare_job_queue(cpi, FIRST_PASS_JOB);
    461 
    462   vp9_multi_thread_tile_init(cpi);
    463 
    464   for (i = 0; i < num_workers; i++) {
    465     EncWorkerData *thread_data;
    466     thread_data = &cpi->tile_thr_data[i];
    467 
    468     // Before encoding a frame, copy the thread data from cpi.
    469     if (thread_data->td != &cpi->td) {
    470       thread_data->td->mb = cpi->td.mb;
    471     }
    472   }
    473 
    474   launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt,
    475                      num_workers);
    476 
    477   first_tile_col = &cpi->tile_data[0];
    478   for (i = 1; i < tile_cols; i++) {
    479     TileDataEnc *this_tile = &cpi->tile_data[i];
    480     accumulate_fp_tile_stat(first_tile_col, this_tile);
    481   }
    482 }
    483 
    484 static int temporal_filter_worker_hook(void *arg1, void *arg2) {
    485   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
    486   MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
    487   VP9_COMP *const cpi = thread_data->cpi;
    488   const VP9_COMMON *const cm = &cpi->common;
    489   const int tile_cols = 1 << cm->log2_tile_cols;
    490   int tile_row, tile_col;
    491   int mb_col_start, mb_col_end;
    492   TileDataEnc *this_tile;
    493   int end_of_frame;
    494   int thread_id = thread_data->thread_id;
    495   int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
    496   JobNode *proc_job = NULL;
    497   int mb_row;
    498 
    499   end_of_frame = 0;
    500   while (0 == end_of_frame) {
    501     // Get the next job in the queue
    502     proc_job =
    503         (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
    504     if (NULL == proc_job) {
    505       // Query for the status of other tiles
    506       end_of_frame = vp9_get_tiles_proc_status(
    507           multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
    508           tile_cols);
    509     } else {
    510       tile_col = proc_job->tile_col_id;
    511       tile_row = proc_job->tile_row_id;
    512       this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
    513       mb_col_start = (this_tile->tile_info.mi_col_start) >> TF_SHIFT;
    514       mb_col_end = (this_tile->tile_info.mi_col_end + TF_ROUND) >> TF_SHIFT;
    515       mb_row = proc_job->vert_unit_row_num;
    516 
    517       vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row,
    518                                         mb_col_start, mb_col_end);
    519     }
    520   }
    521   return 0;
    522 }
    523 
    524 void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
    525   VP9_COMMON *const cm = &cpi->common;
    526   const int tile_cols = 1 << cm->log2_tile_cols;
    527   const int tile_rows = 1 << cm->log2_tile_rows;
    528   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
    529   int num_workers = cpi->num_workers ? cpi->num_workers : 1;
    530   int i;
    531 
    532   if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
    533       multi_thread_ctxt->allocated_tile_rows < tile_rows ||
    534       multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
    535     vp9_row_mt_mem_dealloc(cpi);
    536     vp9_init_tile_data(cpi);
    537     vp9_row_mt_mem_alloc(cpi);
    538   } else {
    539     vp9_init_tile_data(cpi);
    540   }
    541 
    542   create_enc_workers(cpi, num_workers);
    543 
    544   vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
    545 
    546   vp9_prepare_job_queue(cpi, ARNR_JOB);
    547 
    548   for (i = 0; i < num_workers; i++) {
    549     EncWorkerData *thread_data;
    550     thread_data = &cpi->tile_thr_data[i];
    551 
    552     // Before encoding a frame, copy the thread data from cpi.
    553     if (thread_data->td != &cpi->td) {
    554       thread_data->td->mb = cpi->td.mb;
    555     }
    556   }
    557 
    558   launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt,
    559                      num_workers);
    560 }
    561 #endif  // !CONFIG_REALTIME_ONLY
    562 
    563 static int enc_row_mt_worker_hook(void *arg1, void *arg2) {
    564   EncWorkerData *const thread_data = (EncWorkerData *)arg1;
    565   MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
    566   VP9_COMP *const cpi = thread_data->cpi;
    567   const VP9_COMMON *const cm = &cpi->common;
    568   const int tile_cols = 1 << cm->log2_tile_cols;
    569   int tile_row, tile_col;
    570   int end_of_frame;
    571   int thread_id = thread_data->thread_id;
    572   int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
    573   JobNode *proc_job = NULL;
    574   int mi_row;
    575 
    576   end_of_frame = 0;
    577   while (0 == end_of_frame) {
    578     // Get the next job in the queue
    579     proc_job =
    580         (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
    581     if (NULL == proc_job) {
    582       // Query for the status of other tiles
    583       end_of_frame = vp9_get_tiles_proc_status(
    584           multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
    585           tile_cols);
    586     } else {
    587       tile_col = proc_job->tile_col_id;
    588       tile_row = proc_job->tile_row_id;
    589       mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
    590 
    591       vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
    592     }
    593   }
    594   return 0;
    595 }
    596 
    597 void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
    598   VP9_COMMON *const cm = &cpi->common;
    599   const int tile_cols = 1 << cm->log2_tile_cols;
    600   const int tile_rows = 1 << cm->log2_tile_rows;
    601   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
    602   int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
    603   int i;
    604 
    605   if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
    606       multi_thread_ctxt->allocated_tile_rows < tile_rows ||
    607       multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
    608     vp9_row_mt_mem_dealloc(cpi);
    609     vp9_init_tile_data(cpi);
    610     vp9_row_mt_mem_alloc(cpi);
    611   } else {
    612     vp9_init_tile_data(cpi);
    613   }
    614 
    615   create_enc_workers(cpi, num_workers);
    616 
    617   vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
    618 
    619   vp9_prepare_job_queue(cpi, ENCODE_JOB);
    620 
    621   vp9_multi_thread_tile_init(cpi);
    622 
    623   for (i = 0; i < num_workers; i++) {
    624     EncWorkerData *thread_data;
    625     thread_data = &cpi->tile_thr_data[i];
    626     // Before encoding a frame, copy the thread data from cpi.
    627     if (thread_data->td != &cpi->td) {
    628       thread_data->td->mb = cpi->td.mb;
    629       thread_data->td->rd_counts = cpi->td.rd_counts;
    630     }
    631     if (thread_data->td->counts != &cpi->common.counts) {
    632       memcpy(thread_data->td->counts, &cpi->common.counts,
    633              sizeof(cpi->common.counts));
    634     }
    635 
    636     // Handle use_nonrd_pick_mode case.
    637     if (cpi->sf.use_nonrd_pick_mode) {
    638       MACROBLOCK *const x = &thread_data->td->mb;
    639       MACROBLOCKD *const xd = &x->e_mbd;
    640       struct macroblock_plane *const p = x->plane;
    641       struct macroblockd_plane *const pd = xd->plane;
    642       PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
    643       int j;
    644 
    645       for (j = 0; j < MAX_MB_PLANE; ++j) {
    646         p[j].coeff = ctx->coeff_pbuf[j][0];
    647         p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
    648         pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
    649         p[j].eobs = ctx->eobs_pbuf[j][0];
    650       }
    651     }
    652   }
    653 
    654   launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt,
    655                      num_workers);
    656 
    657   for (i = 0; i < num_workers; i++) {
    658     VPxWorker *const worker = &cpi->workers[i];
    659     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
    660 
    661     // Accumulate counters.
    662     if (i < cpi->num_workers - 1) {
    663       vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
    664       accumulate_rd_opt(&cpi->td, thread_data->td);
    665     }
    666   }
    667 }
    668