Home | History | Annotate | Download | only in decoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #ifndef WIN32
     13 # include <unistd.h>
     14 #endif
     15 #ifdef __APPLE__
     16 #include <mach/mach_init.h>
     17 #endif
     18 #include "onyxd_int.h"
     19 #include "vpx_mem/vpx_mem.h"
     20 #include "threading.h"
     21 
     22 #include "loopfilter.h"
     23 #include "extend.h"
     24 #include "vpx_ports/vpx_timer.h"
     25 #include "detokenize.h"
     26 #include "reconinter.h"
     27 #include "reconintra_mt.h"
     28 
     29 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
     30 extern void clamp_mvs(MACROBLOCKD *xd);
     31 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
     32 
     33 #if CONFIG_RUNTIME_CPU_DETECT
     34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
     35 #else
     36 #define RTCD_VTABLE(x) NULL
     37 #endif
     38 
     39 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
     40 {
     41 #if CONFIG_MULTITHREAD
     42     VP8_COMMON *const pc = & pbi->common;
     43     int i, j;
     44 
     45     for (i = 0; i < count; i++)
     46     {
     47         MACROBLOCKD *mbd = &mbrd[i].mbd;
     48 #if CONFIG_RUNTIME_CPU_DETECT
     49         mbd->rtcd = xd->rtcd;
     50 #endif
     51         mbd->subpixel_predict        = xd->subpixel_predict;
     52         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
     53         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
     54         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
     55 
     56         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
     57         mbd->mode_info_stride  = pc->mode_info_stride;
     58 
     59         mbd->frame_type = pc->frame_type;
     60         mbd->frames_since_golden      = pc->frames_since_golden;
     61         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
     62 
     63         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
     64         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
     65 
     66         vp8_setup_block_dptrs(mbd);
     67         vp8_build_block_doffsets(mbd);
     68         mbd->segmentation_enabled    = xd->segmentation_enabled;
     69         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
     70         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
     71 
     72         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
     73         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
     74         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
     75         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
     76         /*unsigned char mode_ref_lf_delta_enabled;
     77         unsigned char mode_ref_lf_delta_update;*/
     78         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
     79         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
     80 
     81         mbd->current_bc = &pbi->bc2;
     82 
     83         for (j = 0; j < 25; j++)
     84         {
     85             mbd->block[j].dequant = xd->block[j].dequant;
     86         }
     87     }
     88 
     89     for (i=0; i< pc->mb_rows; i++)
     90         pbi->mt_current_mb_col[i]=-1;
     91 #else
     92     (void) pbi;
     93     (void) xd;
     94     (void) mbrd;
     95     (void) count;
     96 #endif
     97 }
     98 
     99 
    100 void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
    101 {
    102 #if CONFIG_MULTITHREAD
    103     int eobtotal = 0;
    104     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
    105     VP8_COMMON *pc = &pbi->common;
    106 
    107     if (xd->mode_info_context->mbmi.mb_skip_coeff)
    108     {
    109         vp8_reset_mb_tokens_context(xd);
    110     }
    111     else
    112     {
    113         eobtotal = vp8_decode_mb_tokens(pbi, xd);
    114     }
    115 
    116     /* Perform temporary clamping of the MV to be used for prediction */
    117     if (do_clamp)
    118     {
    119         clamp_mvs(xd);
    120     }
    121 
    122     xd->mode_info_context->mbmi.dc_diff = 1;
    123 
    124     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
    125     {
    126         xd->mode_info_context->mbmi.dc_diff = 0;
    127 
    128         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
    129         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    130         {
    131             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
    132             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
    133         }
    134         else
    135         {
    136             vp8_build_inter_predictors_mb_s(xd);
    137         }
    138         return;
    139     }
    140 
    141     if (xd->segmentation_enabled)
    142         mb_init_dequantizer(pbi, xd);
    143 
    144     /* do prediction */
    145     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    146     {
    147         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
    148 
    149         if (xd->mode_info_context->mbmi.mode != B_PRED)
    150         {
    151             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
    152         } else {
    153             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
    154         }
    155     }
    156     else
    157     {
    158         vp8_build_inter_predictors_mb(xd);
    159     }
    160 
    161     /* dequantization and idct */
    162     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
    163     {
    164         BLOCKD *b = &xd->block[24];
    165         DEQUANT_INVOKE(&pbi->dequant, block)(b);
    166 
    167         /* do 2nd order transform on the dc block */
    168         if (xd->eobs[24] > 1)
    169         {
    170             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
    171             ((int *)b->qcoeff)[0] = 0;
    172             ((int *)b->qcoeff)[1] = 0;
    173             ((int *)b->qcoeff)[2] = 0;
    174             ((int *)b->qcoeff)[3] = 0;
    175             ((int *)b->qcoeff)[4] = 0;
    176             ((int *)b->qcoeff)[5] = 0;
    177             ((int *)b->qcoeff)[6] = 0;
    178             ((int *)b->qcoeff)[7] = 0;
    179         }
    180         else
    181         {
    182             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
    183             ((int *)b->qcoeff)[0] = 0;
    184         }
    185 
    186         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
    187                         (xd->qcoeff, xd->block[0].dequant,
    188                          xd->predictor, xd->dst.y_buffer,
    189                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
    190     }
    191     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
    192     {
    193         for (i = 0; i < 16; i++)
    194         {
    195             BLOCKD *b = &xd->block[i];
    196             vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
    197 
    198             if (xd->eobs[i] > 1)
    199             {
    200                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
    201                     (b->qcoeff, b->dequant,  b->predictor,
    202                     *(b->base_dst) + b->dst, 16, b->dst_stride);
    203             }
    204             else
    205             {
    206                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
    207                     (b->qcoeff[0] * b->dequant[0], b->predictor,
    208                     *(b->base_dst) + b->dst, 16, b->dst_stride);
    209                 ((int *)b->qcoeff)[0] = 0;
    210             }
    211         }
    212     }
    213     else
    214     {
    215         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
    216                         (xd->qcoeff, xd->block[0].dequant,
    217                          xd->predictor, xd->dst.y_buffer,
    218                          xd->dst.y_stride, xd->eobs);
    219     }
    220 
    221     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
    222                     (xd->qcoeff+16*16, xd->block[16].dequant,
    223                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
    224                      xd->dst.uv_stride, xd->eobs+16);
    225 #else
    226     (void) pbi;
    227     (void) xd;
    228     (void) mb_row;
    229     (void) mb_col;
    230 #endif
    231 }
    232 
    233 
    234 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
    235 {
    236 #if CONFIG_MULTITHREAD
    237     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
    238     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
    239     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
    240     ENTROPY_CONTEXT_PLANES mb_row_left_context;
    241 
    242     while (1)
    243     {
    244         if (pbi->b_multithreaded_rd == 0)
    245             break;
    246 
    247         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
    248         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
    249         {
    250             if (pbi->b_multithreaded_rd == 0)
    251                 break;
    252             else
    253             {
    254                 VP8_COMMON *pc = &pbi->common;
    255                 MACROBLOCKD *xd = &mbrd->mbd;
    256 
    257                 int mb_row;
    258                 int num_part = 1 << pbi->common.multi_token_partition;
    259                 volatile int *last_row_current_mb_col;
    260                 int nsync = pbi->sync_range;
    261 
    262                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    263                 {
    264                     int i;
    265                     int recon_yoffset, recon_uvoffset;
    266                     int mb_col;
    267                     int ref_fb_idx = pc->lst_fb_idx;
    268                     int dst_fb_idx = pc->new_fb_idx;
    269                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    270                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    271 
    272                     int filter_level;
    273                     loop_filter_info *lfi = pc->lf_info;
    274                     int alt_flt_enabled = xd->segmentation_enabled;
    275                     int Segment;
    276 
    277                     pbi->mb_row_di[ithread].mb_row = mb_row;
    278                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
    279 
    280                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
    281 
    282                     recon_yoffset = mb_row * recon_y_stride * 16;
    283                     recon_uvoffset = mb_row * recon_uv_stride * 8;
    284                     /* reset above block coeffs */
    285 
    286                     xd->above_context = pc->above_context;
    287                     xd->left_context = &mb_row_left_context;
    288                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
    289                     xd->up_available = (mb_row != 0);
    290 
    291                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    292                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    293 
    294                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    295                     {
    296                         if ((mb_col & (nsync-1)) == 0)
    297                         {
    298                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
    299                             {
    300                                 x86_pause_hint();
    301                                 thread_sleep(0);
    302                             }
    303                         }
    304 
    305                         if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    306                         {
    307                             for (i = 0; i < 16; i++)
    308                             {
    309                                 BLOCKD *d = &xd->block[i];
    310                                 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    311                             }
    312                         }
    313 
    314                         if(pbi->common.filter_level)
    315                         {
    316                             /*update loopfilter info*/
    317                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
    318                             filter_level = pbi->mt_baseline_filter_level[Segment];
    319                             /* Distance of Mb to the various image edges.
    320                              * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    321                              * Apply any context driven MB level adjustment
    322                              */
    323                             vp8_adjust_mb_lf_value(xd, &filter_level);
    324                         }
    325 
    326                         /* Distance of Mb to the various image edges.
    327                          * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    328                          */
    329                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
    330                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    331 
    332                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    333                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    334                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    335 
    336                         xd->left_available = (mb_col != 0);
    337 
    338                         /* Select the appropriate reference frame for this MB */
    339                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    340                             ref_fb_idx = pc->lst_fb_idx;
    341                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    342                             ref_fb_idx = pc->gld_fb_idx;
    343                         else
    344                             ref_fb_idx = pc->alt_fb_idx;
    345 
    346                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    347                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    348                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    349 
    350                         vp8_build_uvmvs(xd, pc->full_pixel);
    351                         vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
    352 
    353                         if (pbi->common.filter_level)
    354                         {
    355                             if( mb_row != pc->mb_rows-1 )
    356                             {
    357                                 /* Save decoded MB last row data for next-row decoding */
    358                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
    359                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
    360                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
    361                             }
    362 
    363                             /* save left_col for next MB decoding */
    364                             if(mb_col != pc->mb_cols-1)
    365                             {
    366                                 MODE_INFO *next = xd->mode_info_context +1;
    367 
    368                                 if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
    369                                 {
    370                                     for (i = 0; i < 16; i++)
    371                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
    372                                     for (i = 0; i < 8; i++)
    373                                     {
    374                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
    375                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
    376                                     }
    377                                 }
    378                             }
    379 
    380                           /* loopfilter on this macroblock. */
    381                             if (filter_level)
    382                             {
    383                                 if (mb_col > 0)
    384                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    385 
    386                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
    387                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    388 
    389                                 /* don't apply across umv border */
    390                                 if (mb_row > 0)
    391                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    392 
    393                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
    394                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    395                             }
    396                         }
    397 
    398                         recon_yoffset += 16;
    399                         recon_uvoffset += 8;
    400 
    401                         ++xd->mode_info_context;  /* next mb */
    402 
    403                         xd->above_context++;
    404 
    405                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
    406                         pbi->mt_current_mb_col[mb_row] = mb_col;
    407                     }
    408 
    409                     /* adjust to the next row of mbs */
    410                     if (pbi->common.filter_level)
    411                     {
    412                         if(mb_row != pc->mb_rows-1)
    413                         {
    414                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
    415                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
    416 
    417                             for (i = 0; i < 4; i++)
    418                             {
    419                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
    420                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
    421                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
    422                             }
    423                         }
    424                     } else
    425                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
    426 
    427                     ++xd->mode_info_context;      /* skip prediction column */
    428 
    429                     /* since we have multithread */
    430                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    431                 }
    432             }
    433         }
    434         /*  add this to each frame */
    435         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
    436         {
    437             /*SetEvent(pbi->h_event_end_decoding);*/
    438             sem_post(&pbi->h_event_end_decoding);
    439         }
    440     }
    441 #else
    442     (void) p_data;
    443 #endif
    444 
    445     return 0 ;
    446 }
    447 
    448 
    449 void vp8_decoder_create_threads(VP8D_COMP *pbi)
    450 {
    451 #if CONFIG_MULTITHREAD
    452     int core_count = 0;
    453     int ithread;
    454     int i;
    455 
    456     pbi->b_multithreaded_rd = 0;
    457     pbi->allocated_decoding_thread_count = 0;
    458     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
    459 
    460     if (core_count > 1)
    461     {
    462         pbi->b_multithreaded_rd = 1;
    463         pbi->decoding_thread_count = core_count -1;
    464 
    465         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
    466         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
    467         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
    468         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
    469         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
    470 
    471         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
    472         {
    473             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
    474 
    475             pbi->de_thread_data[ithread].ithread  = ithread;
    476             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
    477             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
    478 
    479             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
    480         }
    481 
    482         sem_init(&pbi->h_event_end_decoding, 0, 0);
    483 
    484         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
    485     }
    486 
    487 #else
    488     (void) pbi;
    489 #endif
    490 }
    491 
    492 
    493 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
    494 {
    495 #if CONFIG_MULTITHREAD
    496     VP8_COMMON *const pc = & pbi->common;
    497     int i;
    498 
    499     if (pbi->b_multithreaded_rd)
    500     {
    501         if (pbi->mt_current_mb_col)
    502         {
    503             vpx_free(pbi->mt_current_mb_col);
    504             pbi->mt_current_mb_col = NULL ;
    505         }
    506 
    507         /* Free above_row buffers. */
    508         if (pbi->mt_yabove_row)
    509         {
    510             for (i=0; i< mb_rows; i++)
    511             {
    512                 if (pbi->mt_yabove_row[i])
    513                 {
    514                     vpx_free(pbi->mt_yabove_row[i]);
    515                     pbi->mt_yabove_row[i] = NULL ;
    516                 }
    517             }
    518             vpx_free(pbi->mt_yabove_row);
    519             pbi->mt_yabove_row = NULL ;
    520         }
    521 
    522         if (pbi->mt_uabove_row)
    523         {
    524             for (i=0; i< mb_rows; i++)
    525             {
    526                 if (pbi->mt_uabove_row[i])
    527                 {
    528                     vpx_free(pbi->mt_uabove_row[i]);
    529                     pbi->mt_uabove_row[i] = NULL ;
    530                 }
    531             }
    532             vpx_free(pbi->mt_uabove_row);
    533             pbi->mt_uabove_row = NULL ;
    534         }
    535 
    536         if (pbi->mt_vabove_row)
    537         {
    538             for (i=0; i< mb_rows; i++)
    539             {
    540                 if (pbi->mt_vabove_row[i])
    541                 {
    542                     vpx_free(pbi->mt_vabove_row[i]);
    543                     pbi->mt_vabove_row[i] = NULL ;
    544                 }
    545             }
    546             vpx_free(pbi->mt_vabove_row);
    547             pbi->mt_vabove_row = NULL ;
    548         }
    549 
    550         /* Free left_col buffers. */
    551         if (pbi->mt_yleft_col)
    552         {
    553             for (i=0; i< mb_rows; i++)
    554             {
    555                 if (pbi->mt_yleft_col[i])
    556                 {
    557                     vpx_free(pbi->mt_yleft_col[i]);
    558                     pbi->mt_yleft_col[i] = NULL ;
    559                 }
    560             }
    561             vpx_free(pbi->mt_yleft_col);
    562             pbi->mt_yleft_col = NULL ;
    563         }
    564 
    565         if (pbi->mt_uleft_col)
    566         {
    567             for (i=0; i< mb_rows; i++)
    568             {
    569                 if (pbi->mt_uleft_col[i])
    570                 {
    571                     vpx_free(pbi->mt_uleft_col[i]);
    572                     pbi->mt_uleft_col[i] = NULL ;
    573                 }
    574             }
    575             vpx_free(pbi->mt_uleft_col);
    576             pbi->mt_uleft_col = NULL ;
    577         }
    578 
    579         if (pbi->mt_vleft_col)
    580         {
    581             for (i=0; i< mb_rows; i++)
    582             {
    583                 if (pbi->mt_vleft_col[i])
    584                 {
    585                     vpx_free(pbi->mt_vleft_col[i]);
    586                     pbi->mt_vleft_col[i] = NULL ;
    587                 }
    588             }
    589             vpx_free(pbi->mt_vleft_col);
    590             pbi->mt_vleft_col = NULL ;
    591         }
    592     }
    593 #else
    594     (void) pbi;
    595 #endif
    596 }
    597 
    598 
    599 int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
    600 {
    601 #if CONFIG_MULTITHREAD
    602     VP8_COMMON *const pc = & pbi->common;
    603     int i;
    604     int uv_width;
    605 
    606     if (pbi->b_multithreaded_rd)
    607     {
    608         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
    609 
    610         /* our internal buffers are always multiples of 16 */
    611         if ((width & 0xf) != 0)
    612             width += 16 - (width & 0xf);
    613 
    614         if (width < 640) pbi->sync_range = 1;
    615         else if (width <= 1280) pbi->sync_range = 8;
    616         else if (width <= 2560) pbi->sync_range =16;
    617         else pbi->sync_range = 32;
    618 
    619         uv_width = width >>1;
    620 
    621         /* Allocate an int for each mb row. */
    622         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
    623 
    624         /* Allocate memory for above_row buffers. */
    625         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    626         for (i=0; i< pc->mb_rows; i++)
    627             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
    628 
    629         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    630         for (i=0; i< pc->mb_rows; i++)
    631             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
    632 
    633         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    634         for (i=0; i< pc->mb_rows; i++)
    635             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
    636 
    637         /* Allocate memory for left_col buffers. */
    638         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    639         for (i=0; i< pc->mb_rows; i++)
    640             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
    641 
    642         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    643         for (i=0; i< pc->mb_rows; i++)
    644             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
    645 
    646         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    647         for (i=0; i< pc->mb_rows; i++)
    648             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
    649     }
    650     return 0;
    651 #else
    652     (void) pbi;
    653     (void) width;
    654 #endif
    655 }
    656 
    657 
    658 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
    659 {
    660 #if CONFIG_MULTITHREAD
    661 
    662     /* shutdown MB Decoding thread; */
    663     if (pbi->b_multithreaded_rd)
    664     {
    665         int i;
    666 
    667         pbi->b_multithreaded_rd = 0;
    668 
    669         /* allow all threads to exit */
    670         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    671         {
    672             sem_post(&pbi->h_event_start_decoding[i]);
    673             pthread_join(pbi->h_decoding_thread[i], NULL);
    674         }
    675 
    676         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    677         {
    678             sem_destroy(&pbi->h_event_start_decoding[i]);
    679         }
    680 
    681         sem_destroy(&pbi->h_event_end_decoding);
    682 
    683         if (pbi->h_decoding_thread)
    684         {
    685             vpx_free(pbi->h_decoding_thread);
    686             pbi->h_decoding_thread = NULL;
    687         }
    688 
    689         if (pbi->h_event_start_decoding)
    690         {
    691             vpx_free(pbi->h_event_start_decoding);
    692             pbi->h_event_start_decoding = NULL;
    693         }
    694 
    695         if (pbi->mb_row_di)
    696         {
    697             vpx_free(pbi->mb_row_di);
    698             pbi->mb_row_di = NULL ;
    699         }
    700 
    701         if (pbi->de_thread_data)
    702         {
    703             vpx_free(pbi->de_thread_data);
    704             pbi->de_thread_data = NULL;
    705         }
    706     }
    707 #else
    708     (void) pbi;
    709 #endif
    710 }
    711 
    712 
    713 void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
    714 {
    715 #if CONFIG_MULTITHREAD
    716     VP8_COMMON *cm  = &pbi->common;
    717     MACROBLOCKD *mbd = &pbi->mb;
    718     /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
    719     loop_filter_info *lfi = cm->lf_info;
    720     FRAME_TYPE frame_type = cm->frame_type;
    721 
    722     /*int mb_row;
    723     int mb_col;
    724     int baseline_filter_level[MAX_MB_SEGMENTS];*/
    725     int filter_level;
    726     int alt_flt_enabled = mbd->segmentation_enabled;
    727 
    728     int i;
    729     /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
    730 
    731     /* Note the baseline filter values for each segment */
    732     if (alt_flt_enabled)
    733     {
    734         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    735         {
    736             /* Abs value */
    737             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
    738                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    739             /* Delta Value */
    740             else
    741             {
    742                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    743                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
    744             }
    745         }
    746     }
    747     else
    748     {
    749         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    750             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
    751     }
    752 
    753     /* Initialize the loop filter for this frame. */
    754     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
    755         vp8_init_loop_filter(cm);
    756     else if (frame_type != cm->last_frame_type)
    757         vp8_frame_init_loop_filter(lfi, frame_type);
    758 #else
    759     (void) pbi;
    760     (void) default_filt_lvl;
    761 #endif
    762 }
    763 
    764 
    765 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
    766 {
    767 #if CONFIG_MULTITHREAD
    768     int mb_row;
    769     VP8_COMMON *pc = &pbi->common;
    770 
    771     int ibc = 0;
    772     int num_part = 1 << pbi->common.multi_token_partition;
    773     int i, j;
    774     volatile int *last_row_current_mb_col = NULL;
    775     int nsync = pbi->sync_range;
    776 
    777     int filter_level;
    778     loop_filter_info *lfi = pc->lf_info;
    779     int alt_flt_enabled = xd->segmentation_enabled;
    780     int Segment;
    781 
    782     if(pbi->common.filter_level)
    783     {
    784         /* Set above_row buffer to 127 for decoding first MB row */
    785         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
    786         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
    787         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
    788 
    789         for (i=1; i<pc->mb_rows; i++)
    790         {
    791             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
    792             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
    793             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
    794         }
    795 
    796         /* Set left_col to 129 initially */
    797         for (i=0; i<pc->mb_rows; i++)
    798         {
    799             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
    800             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
    801             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
    802         }
    803         vp8mt_lpf_init(pbi, pc->filter_level);
    804     }
    805 
    806     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
    807 
    808     for (i = 0; i < pbi->decoding_thread_count; i++)
    809         sem_post(&pbi->h_event_start_decoding[i]);
    810 
    811     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    812     {
    813         int i;
    814 
    815         xd->current_bc = &pbi->mbc[mb_row%num_part];
    816 
    817         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
    818         {
    819             int i;
    820             int recon_yoffset, recon_uvoffset;
    821             int mb_col;
    822             int ref_fb_idx = pc->lst_fb_idx;
    823             int dst_fb_idx = pc->new_fb_idx;
    824             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    825             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    826 
    827            /* volatile int *last_row_current_mb_col = NULL; */
    828             if (mb_row > 0)
    829                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
    830 
    831             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
    832             recon_yoffset = mb_row * recon_y_stride * 16;
    833             recon_uvoffset = mb_row * recon_uv_stride * 8;
    834             /* reset above block coeffs */
    835 
    836             xd->above_context = pc->above_context;
    837             xd->up_available = (mb_row != 0);
    838 
    839             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    840             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    841 
    842             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    843             {
    844                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
    845                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
    846                     {
    847                         x86_pause_hint();
    848                         thread_sleep(0);
    849                     }
    850                 }
    851 
    852                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    853                 {
    854                     for (i = 0; i < 16; i++)
    855                     {
    856                         BLOCKD *d = &xd->block[i];
    857                         vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    858                     }
    859                 }
    860 
    861                 if(pbi->common.filter_level)
    862                 {
    863                     /* update loopfilter info */
    864                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
    865                     filter_level = pbi->mt_baseline_filter_level[Segment];
    866                     /* Distance of Mb to the various image edges.
    867                      * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    868                      * Apply any context driven MB level adjustment
    869                      */
    870                     vp8_adjust_mb_lf_value(xd, &filter_level);
    871                 }
    872 
    873                 /* Distance of Mb to the various image edges.
    874                  * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    875                  */
    876                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
    877                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    878 
    879                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    880                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    881                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    882 
    883                 xd->left_available = (mb_col != 0);
    884 
    885                 /* Select the appropriate reference frame for this MB */
    886                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    887                     ref_fb_idx = pc->lst_fb_idx;
    888                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    889                     ref_fb_idx = pc->gld_fb_idx;
    890                 else
    891                     ref_fb_idx = pc->alt_fb_idx;
    892 
    893                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    894                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    895                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    896 
    897                 vp8_build_uvmvs(xd, pc->full_pixel);
    898                 vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
    899 
    900                 if (pbi->common.filter_level)
    901                 {
    902                     /* Save decoded MB last row data for next-row decoding */
    903                     if(mb_row != pc->mb_rows-1)
    904                     {
    905                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
    906                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
    907                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
    908                     }
    909 
    910                     /* save left_col for next MB decoding */
    911                     if(mb_col != pc->mb_cols-1)
    912                     {
    913                         MODE_INFO *next = xd->mode_info_context +1;
    914 
    915                         if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
    916                         {
    917                             for (i = 0; i < 16; i++)
    918                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
    919                             for (i = 0; i < 8; i++)
    920                             {
    921                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
    922                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
    923                             }
    924                         }
    925                     }
    926 
    927                     /* loopfilter on this macroblock. */
    928                     if (filter_level)
    929                     {
    930                         if (mb_col > 0)
    931                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    932 
    933                         if (xd->mode_info_context->mbmi.dc_diff > 0)
    934                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    935 
    936                         /* don't apply across umv border */
    937                         if (mb_row > 0)
    938                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    939 
    940                         if (xd->mode_info_context->mbmi.dc_diff > 0)
    941                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    942                     }
    943                 }
    944 
    945                 recon_yoffset += 16;
    946                 recon_uvoffset += 8;
    947 
    948                 ++xd->mode_info_context;  /* next mb */
    949 
    950                 xd->above_context++;
    951 
    952                 pbi->mt_current_mb_col[mb_row] = mb_col;
    953             }
    954 
    955             /* adjust to the next row of mbs */
    956             if (pbi->common.filter_level)
    957             {
    958                 if(mb_row != pc->mb_rows-1)
    959                 {
    960                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
    961                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
    962 
    963                     for (i = 0; i < 4; i++)
    964                     {
    965                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
    966                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
    967                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
    968                     }
    969                 }
    970             }else
    971                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
    972 
    973             ++xd->mode_info_context;      /* skip prediction column */
    974         }
    975         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    976     }
    977 
    978     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
    979 #else
    980     (void) pbi;
    981     (void) xd;
    982 #endif
    983 }
    984