Home | History | Annotate | Download | only in decoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
     13 # include <unistd.h>
     14 #endif
     15 #include "onyxd_int.h"
     16 #include "vpx_mem/vpx_mem.h"
     17 #include "vp8/common/threading.h"
     18 
     19 #include "vp8/common/loopfilter.h"
     20 #include "vp8/common/extend.h"
     21 #include "vpx_ports/vpx_timer.h"
     22 #include "detokenize.h"
     23 #include "vp8/common/reconinter.h"
     24 #include "reconintra_mt.h"
     25 
     26 extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
     27 extern void clamp_mvs(MACROBLOCKD *xd);
     28 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
     29 
     30 #if CONFIG_RUNTIME_CPU_DETECT
     31 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
     32 #else
     33 #define RTCD_VTABLE(x) NULL
     34 #endif
     35 
     36 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
     37 {
     38     VP8_COMMON *const pc = & pbi->common;
     39     int i, j;
     40 
     41     for (i = 0; i < count; i++)
     42     {
     43         MACROBLOCKD *mbd = &mbrd[i].mbd;
     44 #if CONFIG_RUNTIME_CPU_DETECT
     45         mbd->rtcd = xd->rtcd;
     46 #endif
     47         mbd->subpixel_predict        = xd->subpixel_predict;
     48         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
     49         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
     50         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
     51 
     52         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
     53         mbd->mode_info_stride  = pc->mode_info_stride;
     54 
     55         mbd->frame_type = pc->frame_type;
     56         mbd->frames_since_golden      = pc->frames_since_golden;
     57         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
     58 
     59         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
     60         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
     61 
     62         vp8_setup_block_dptrs(mbd);
     63         vp8_build_block_doffsets(mbd);
     64         mbd->segmentation_enabled    = xd->segmentation_enabled;
     65         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
     66         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
     67 
     68         /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
     69         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
     70         /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
     71         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
     72         /*unsigned char mode_ref_lf_delta_enabled;
     73         unsigned char mode_ref_lf_delta_update;*/
     74         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
     75         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
     76 
     77         mbd->current_bc = &pbi->bc2;
     78 
     79         for (j = 0; j < 25; j++)
     80         {
     81             mbd->block[j].dequant = xd->block[j].dequant;
     82         }
     83     }
     84 
     85     for (i=0; i< pc->mb_rows; i++)
     86         pbi->mt_current_mb_col[i]=-1;
     87 }
     88 
     89 
     90 static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
     91 {
     92     int eobtotal = 0;
     93     int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
     94     VP8_COMMON *pc = &pbi->common;
     95 
     96     if (xd->mode_info_context->mbmi.mb_skip_coeff)
     97     {
     98         vp8_reset_mb_tokens_context(xd);
     99     }
    100     else
    101     {
    102         eobtotal = vp8_decode_mb_tokens(pbi, xd);
    103     }
    104 
    105     /* Perform temporary clamping of the MV to be used for prediction */
    106     if (do_clamp)
    107     {
    108         clamp_mvs(xd);
    109     }
    110 
    111     xd->mode_info_context->mbmi.dc_diff = 1;
    112 
    113     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
    114     {
    115         xd->mode_info_context->mbmi.dc_diff = 0;
    116 
    117         /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
    118         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    119         {
    120             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
    121             vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
    122         }
    123         else
    124         {
    125             vp8_build_inter_predictors_mb_s(xd);
    126         }
    127         return;
    128     }
    129 
    130     if (xd->segmentation_enabled)
    131         mb_init_dequantizer(pbi, xd);
    132 
    133     /* do prediction */
    134     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    135     {
    136         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
    137 
    138         if (xd->mode_info_context->mbmi.mode != B_PRED)
    139         {
    140             vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
    141         } else {
    142             vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
    143         }
    144     }
    145     else
    146     {
    147         vp8_build_inter_predictors_mb(xd);
    148     }
    149 
    150     /* dequantization and idct */
    151     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
    152     {
    153         BLOCKD *b = &xd->block[24];
    154         DEQUANT_INVOKE(&pbi->dequant, block)(b);
    155 
    156         /* do 2nd order transform on the dc block */
    157         if (xd->eobs[24] > 1)
    158         {
    159             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
    160             ((int *)b->qcoeff)[0] = 0;
    161             ((int *)b->qcoeff)[1] = 0;
    162             ((int *)b->qcoeff)[2] = 0;
    163             ((int *)b->qcoeff)[3] = 0;
    164             ((int *)b->qcoeff)[4] = 0;
    165             ((int *)b->qcoeff)[5] = 0;
    166             ((int *)b->qcoeff)[6] = 0;
    167             ((int *)b->qcoeff)[7] = 0;
    168         }
    169         else
    170         {
    171             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
    172             ((int *)b->qcoeff)[0] = 0;
    173         }
    174 
    175         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
    176                         (xd->qcoeff, xd->block[0].dequant,
    177                          xd->predictor, xd->dst.y_buffer,
    178                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
    179     }
    180     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
    181     {
    182         for (i = 0; i < 16; i++)
    183         {
    184             BLOCKD *b = &xd->block[i];
    185             vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
    186 
    187             if (xd->eobs[i] > 1)
    188             {
    189                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
    190                     (b->qcoeff, b->dequant,  b->predictor,
    191                     *(b->base_dst) + b->dst, 16, b->dst_stride);
    192             }
    193             else
    194             {
    195                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
    196                     (b->qcoeff[0] * b->dequant[0], b->predictor,
    197                     *(b->base_dst) + b->dst, 16, b->dst_stride);
    198                 ((int *)b->qcoeff)[0] = 0;
    199             }
    200         }
    201     }
    202     else
    203     {
    204         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
    205                         (xd->qcoeff, xd->block[0].dequant,
    206                          xd->predictor, xd->dst.y_buffer,
    207                          xd->dst.y_stride, xd->eobs);
    208     }
    209 
    210     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
    211                     (xd->qcoeff+16*16, xd->block[16].dequant,
    212                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
    213                      xd->dst.uv_stride, xd->eobs+16);
    214 }
    215 
    216 
    217 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
    218 {
    219     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
    220     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
    221     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
    222     ENTROPY_CONTEXT_PLANES mb_row_left_context;
    223 
    224     while (1)
    225     {
    226         if (pbi->b_multithreaded_rd == 0)
    227             break;
    228 
    229         /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
    230         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
    231         {
    232             if (pbi->b_multithreaded_rd == 0)
    233                 break;
    234             else
    235             {
    236                 VP8_COMMON *pc = &pbi->common;
    237                 MACROBLOCKD *xd = &mbrd->mbd;
    238 
    239                 int mb_row;
    240                 int num_part = 1 << pbi->common.multi_token_partition;
    241                 volatile int *last_row_current_mb_col;
    242                 int nsync = pbi->sync_range;
    243 
    244                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    245                 {
    246                     int i;
    247                     int recon_yoffset, recon_uvoffset;
    248                     int mb_col;
    249                     int ref_fb_idx = pc->lst_fb_idx;
    250                     int dst_fb_idx = pc->new_fb_idx;
    251                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    252                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    253 
    254                     int filter_level;
    255                     loop_filter_info *lfi = pc->lf_info;
    256                     int alt_flt_enabled = xd->segmentation_enabled;
    257                     int Segment;
    258 
    259                     pbi->mb_row_di[ithread].mb_row = mb_row;
    260                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
    261 
    262                     last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
    263 
    264                     recon_yoffset = mb_row * recon_y_stride * 16;
    265                     recon_uvoffset = mb_row * recon_uv_stride * 8;
    266                     /* reset above block coeffs */
    267 
    268                     xd->above_context = pc->above_context;
    269                     xd->left_context = &mb_row_left_context;
    270                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
    271                     xd->up_available = (mb_row != 0);
    272 
    273                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    274                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    275 
    276                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    277                     {
    278                         if ((mb_col & (nsync-1)) == 0)
    279                         {
    280                             while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
    281                             {
    282                                 x86_pause_hint();
    283                                 thread_sleep(0);
    284                             }
    285                         }
    286 
    287                         if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    288                         {
    289                             for (i = 0; i < 16; i++)
    290                             {
    291                                 BLOCKD *d = &xd->block[i];
    292                                 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    293                             }
    294                         }
    295 
    296                         /* Distance of Mb to the various image edges.
    297                          * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    298                          */
    299                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
    300                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    301 
    302                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    303                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    304                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    305 
    306                         xd->left_available = (mb_col != 0);
    307 
    308                         /* Select the appropriate reference frame for this MB */
    309                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    310                             ref_fb_idx = pc->lst_fb_idx;
    311                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    312                             ref_fb_idx = pc->gld_fb_idx;
    313                         else
    314                             ref_fb_idx = pc->alt_fb_idx;
    315 
    316                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    317                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    318                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    319 
    320                         vp8_build_uvmvs(xd, pc->full_pixel);
    321                         decode_macroblock(pbi, xd, mb_row, mb_col);
    322 
    323                         if (pbi->common.filter_level)
    324                         {
    325                             if( mb_row != pc->mb_rows-1 )
    326                             {
    327                                 /* Save decoded MB last row data for next-row decoding */
    328                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
    329                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
    330                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
    331                             }
    332 
    333                             /* save left_col for next MB decoding */
    334                             if(mb_col != pc->mb_cols-1)
    335                             {
    336                                 MODE_INFO *next = xd->mode_info_context +1;
    337 
    338                                 if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
    339                                 {
    340                                     for (i = 0; i < 16; i++)
    341                                         pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
    342                                     for (i = 0; i < 8; i++)
    343                                     {
    344                                         pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
    345                                         pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
    346                                     }
    347                                 }
    348                             }
    349 
    350                             /* update loopfilter info */
    351                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
    352                             filter_level = pbi->mt_baseline_filter_level[Segment];
    353                             /* Distance of Mb to the various image edges.
    354                              * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    355                              * Apply any context driven MB level adjustment
    356                              */
    357                             filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
    358 
    359                             /* loopfilter on this macroblock. */
    360                             if (filter_level)
    361                             {
    362                                 if (mb_col > 0)
    363                                     pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    364 
    365                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
    366                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    367 
    368                                 /* don't apply across umv border */
    369                                 if (mb_row > 0)
    370                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    371 
    372                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
    373                                     pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    374                             }
    375                         }
    376 
    377                         recon_yoffset += 16;
    378                         recon_uvoffset += 8;
    379 
    380                         ++xd->mode_info_context;  /* next mb */
    381 
    382                         xd->above_context++;
    383 
    384                         /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
    385                         pbi->mt_current_mb_col[mb_row] = mb_col;
    386                     }
    387 
    388                     /* adjust to the next row of mbs */
    389                     if (pbi->common.filter_level)
    390                     {
    391                         if(mb_row != pc->mb_rows-1)
    392                         {
    393                             int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
    394                             int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
    395 
    396                             for (i = 0; i < 4; i++)
    397                             {
    398                                 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
    399                                 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
    400                                 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
    401                             }
    402                         }
    403                     } else
    404                         vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
    405 
    406                     ++xd->mode_info_context;      /* skip prediction column */
    407 
    408                     /* since we have multithread */
    409                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    410                 }
    411             }
    412         }
    413         /*  add this to each frame */
    414         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
    415         {
    416             /*SetEvent(pbi->h_event_end_decoding);*/
    417             sem_post(&pbi->h_event_end_decoding);
    418         }
    419     }
    420 
    421     return 0 ;
    422 }
    423 
    424 
    425 void vp8_decoder_create_threads(VP8D_COMP *pbi)
    426 {
    427     int core_count = 0;
    428     int ithread;
    429 
    430     pbi->b_multithreaded_rd = 0;
    431     pbi->allocated_decoding_thread_count = 0;
    432     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
    433 
    434     if (core_count > 1)
    435     {
    436         pbi->b_multithreaded_rd = 1;
    437         pbi->decoding_thread_count = core_count -1;
    438 
    439         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
    440         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
    441         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
    442         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
    443         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
    444 
    445         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
    446         {
    447             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
    448 
    449             pbi->de_thread_data[ithread].ithread  = ithread;
    450             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
    451             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
    452 
    453             pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
    454         }
    455 
    456         sem_init(&pbi->h_event_end_decoding, 0, 0);
    457 
    458         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
    459     }
    460 }
    461 
    462 
    463 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
    464 {
    465     VP8_COMMON *const pc = & pbi->common;
    466     int i;
    467 
    468     if (pbi->b_multithreaded_rd)
    469     {
    470             vpx_free(pbi->mt_current_mb_col);
    471             pbi->mt_current_mb_col = NULL ;
    472 
    473         /* Free above_row buffers. */
    474         if (pbi->mt_yabove_row)
    475         {
    476             for (i=0; i< mb_rows; i++)
    477             {
    478                     vpx_free(pbi->mt_yabove_row[i]);
    479                     pbi->mt_yabove_row[i] = NULL ;
    480             }
    481             vpx_free(pbi->mt_yabove_row);
    482             pbi->mt_yabove_row = NULL ;
    483         }
    484 
    485         if (pbi->mt_uabove_row)
    486         {
    487             for (i=0; i< mb_rows; i++)
    488             {
    489                     vpx_free(pbi->mt_uabove_row[i]);
    490                     pbi->mt_uabove_row[i] = NULL ;
    491             }
    492             vpx_free(pbi->mt_uabove_row);
    493             pbi->mt_uabove_row = NULL ;
    494         }
    495 
    496         if (pbi->mt_vabove_row)
    497         {
    498             for (i=0; i< mb_rows; i++)
    499             {
    500                     vpx_free(pbi->mt_vabove_row[i]);
    501                     pbi->mt_vabove_row[i] = NULL ;
    502             }
    503             vpx_free(pbi->mt_vabove_row);
    504             pbi->mt_vabove_row = NULL ;
    505         }
    506 
    507         /* Free left_col buffers. */
    508         if (pbi->mt_yleft_col)
    509         {
    510             for (i=0; i< mb_rows; i++)
    511             {
    512                     vpx_free(pbi->mt_yleft_col[i]);
    513                     pbi->mt_yleft_col[i] = NULL ;
    514             }
    515             vpx_free(pbi->mt_yleft_col);
    516             pbi->mt_yleft_col = NULL ;
    517         }
    518 
    519         if (pbi->mt_uleft_col)
    520         {
    521             for (i=0; i< mb_rows; i++)
    522             {
    523                     vpx_free(pbi->mt_uleft_col[i]);
    524                     pbi->mt_uleft_col[i] = NULL ;
    525             }
    526             vpx_free(pbi->mt_uleft_col);
    527             pbi->mt_uleft_col = NULL ;
    528         }
    529 
    530         if (pbi->mt_vleft_col)
    531         {
    532             for (i=0; i< mb_rows; i++)
    533             {
    534                     vpx_free(pbi->mt_vleft_col[i]);
    535                     pbi->mt_vleft_col[i] = NULL ;
    536             }
    537             vpx_free(pbi->mt_vleft_col);
    538             pbi->mt_vleft_col = NULL ;
    539         }
    540     }
    541 }
    542 
    543 
    544 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
    545 {
    546     VP8_COMMON *const pc = & pbi->common;
    547     int i;
    548     int uv_width;
    549 
    550     if (pbi->b_multithreaded_rd)
    551     {
    552         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
    553 
    554         /* our internal buffers are always multiples of 16 */
    555         if ((width & 0xf) != 0)
    556             width += 16 - (width & 0xf);
    557 
    558         if (width < 640) pbi->sync_range = 1;
    559         else if (width <= 1280) pbi->sync_range = 8;
    560         else if (width <= 2560) pbi->sync_range =16;
    561         else pbi->sync_range = 32;
    562 
    563         uv_width = width >>1;
    564 
    565         /* Allocate an int for each mb row. */
    566         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
    567 
    568         /* Allocate memory for above_row buffers. */
    569         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    570         for (i=0; i< pc->mb_rows; i++)
    571             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
    572 
    573         CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    574         for (i=0; i< pc->mb_rows; i++)
    575             CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
    576 
    577         CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    578         for (i=0; i< pc->mb_rows; i++)
    579             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
    580 
    581         /* Allocate memory for left_col buffers. */
    582         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    583         for (i=0; i< pc->mb_rows; i++)
    584             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
    585 
    586         CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    587         for (i=0; i< pc->mb_rows; i++)
    588             CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
    589 
    590         CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
    591         for (i=0; i< pc->mb_rows; i++)
    592             CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
    593     }
    594 }
    595 
    596 
    597 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
    598 {
    599     /* shutdown MB Decoding thread; */
    600     if (pbi->b_multithreaded_rd)
    601     {
    602         int i;
    603 
    604         pbi->b_multithreaded_rd = 0;
    605 
    606         /* allow all threads to exit */
    607         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    608         {
    609             sem_post(&pbi->h_event_start_decoding[i]);
    610             pthread_join(pbi->h_decoding_thread[i], NULL);
    611         }
    612 
    613         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    614         {
    615             sem_destroy(&pbi->h_event_start_decoding[i]);
    616         }
    617 
    618         sem_destroy(&pbi->h_event_end_decoding);
    619 
    620             vpx_free(pbi->h_decoding_thread);
    621             pbi->h_decoding_thread = NULL;
    622 
    623             vpx_free(pbi->h_event_start_decoding);
    624             pbi->h_event_start_decoding = NULL;
    625 
    626             vpx_free(pbi->mb_row_di);
    627             pbi->mb_row_di = NULL ;
    628 
    629             vpx_free(pbi->de_thread_data);
    630             pbi->de_thread_data = NULL;
    631     }
    632 }
    633 
    634 
    635 static void lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
    636 {
    637     VP8_COMMON *cm  = &pbi->common;
    638     MACROBLOCKD *mbd = &pbi->mb;
    639     /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
    640     loop_filter_info *lfi = cm->lf_info;
    641     FRAME_TYPE frame_type = cm->frame_type;
    642 
    643     /*int mb_row;
    644     int mb_col;
    645     int baseline_filter_level[MAX_MB_SEGMENTS];*/
    646     int alt_flt_enabled = mbd->segmentation_enabled;
    647 
    648     int i;
    649     /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
    650 
    651     /* Note the baseline filter values for each segment */
    652     if (alt_flt_enabled)
    653     {
    654         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    655         {
    656             /* Abs value */
    657             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
    658                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    659             /* Delta Value */
    660             else
    661             {
    662                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    663                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
    664             }
    665         }
    666     }
    667     else
    668     {
    669         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    670             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
    671     }
    672 
    673     /* Initialize the loop filter for this frame. */
    674     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
    675         vp8_init_loop_filter(cm);
    676     else if (frame_type != cm->last_frame_type)
    677         vp8_frame_init_loop_filter(lfi, frame_type);
    678 }
    679 
    680 
    681 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
    682 {
    683     int mb_row;
    684     VP8_COMMON *pc = &pbi->common;
    685 
    686     int ibc = 0;
    687     int num_part = 1 << pbi->common.multi_token_partition;
    688     int i;
    689     volatile int *last_row_current_mb_col = NULL;
    690     int nsync = pbi->sync_range;
    691 
    692     int filter_level;
    693     loop_filter_info *lfi = pc->lf_info;
    694     int alt_flt_enabled = xd->segmentation_enabled;
    695     int Segment;
    696 
    697     if(pbi->common.filter_level)
    698     {
    699         /* Set above_row buffer to 127 for decoding first MB row */
    700         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
    701         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
    702         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
    703 
    704         for (i=1; i<pc->mb_rows; i++)
    705         {
    706             vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
    707             vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
    708             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
    709         }
    710 
    711         /* Set left_col to 129 initially */
    712         for (i=0; i<pc->mb_rows; i++)
    713         {
    714             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
    715             vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
    716             vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
    717         }
    718         lpf_init(pbi, pc->filter_level);
    719     }
    720 
    721     setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
    722 
    723     for (i = 0; i < pbi->decoding_thread_count; i++)
    724         sem_post(&pbi->h_event_start_decoding[i]);
    725 
    726     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    727     {
    728 
    729         xd->current_bc = &pbi->mbc[mb_row%num_part];
    730 
    731         /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
    732         {
    733             int i;
    734             int recon_yoffset, recon_uvoffset;
    735             int mb_col;
    736             int ref_fb_idx = pc->lst_fb_idx;
    737             int dst_fb_idx = pc->new_fb_idx;
    738             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    739             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    740 
    741            /* volatile int *last_row_current_mb_col = NULL; */
    742             if (mb_row > 0)
    743                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
    744 
    745             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
    746             recon_yoffset = mb_row * recon_y_stride * 16;
    747             recon_uvoffset = mb_row * recon_uv_stride * 8;
    748             /* reset above block coeffs */
    749 
    750             xd->above_context = pc->above_context;
    751             xd->up_available = (mb_row != 0);
    752 
    753             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    754             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    755 
    756             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    757             {
    758                 if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
    759                     while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
    760                     {
    761                         x86_pause_hint();
    762                         thread_sleep(0);
    763                     }
    764                 }
    765 
    766                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    767                 {
    768                     for (i = 0; i < 16; i++)
    769                     {
    770                         BLOCKD *d = &xd->block[i];
    771                         vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    772                     }
    773                 }
    774 
    775                 /* Distance of Mb to the various image edges.
    776                  * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    777                  */
    778                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
    779                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    780 
    781                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    782                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    783                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    784 
    785                 xd->left_available = (mb_col != 0);
    786 
    787                 /* Select the appropriate reference frame for this MB */
    788                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    789                     ref_fb_idx = pc->lst_fb_idx;
    790                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    791                     ref_fb_idx = pc->gld_fb_idx;
    792                 else
    793                     ref_fb_idx = pc->alt_fb_idx;
    794 
    795                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    796                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    797                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    798 
    799                 if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME)
    800                 {
    801                     /* propagate errors from reference frames */
    802                     xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted;
    803                 }
    804 
    805                 vp8_build_uvmvs(xd, pc->full_pixel);
    806                 decode_macroblock(pbi, xd, mb_row, mb_col);
    807 
    808                 /* check if the boolean decoder has suffered an error */
    809                 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
    810 
    811                 if (pbi->common.filter_level)
    812                 {
    813                     /* Save decoded MB last row data for next-row decoding */
    814                     if(mb_row != pc->mb_rows-1)
    815                     {
    816                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
    817                         vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
    818                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
    819                     }
    820 
    821                     /* save left_col for next MB decoding */
    822                     if(mb_col != pc->mb_cols-1)
    823                     {
    824                         MODE_INFO *next = xd->mode_info_context +1;
    825 
    826                         if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
    827                         {
    828                             for (i = 0; i < 16; i++)
    829                                 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
    830                             for (i = 0; i < 8; i++)
    831                             {
    832                                 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
    833                                 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
    834                             }
    835                         }
    836                     }
    837 
    838                     /* update loopfilter info */
    839                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
    840                     filter_level = pbi->mt_baseline_filter_level[Segment];
    841                     /* Distance of Mb to the various image edges.
    842                      * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
    843                      * Apply any context driven MB level adjustment
    844                      */
    845                     filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
    846 
    847                     /* loopfilter on this macroblock. */
    848                     if (filter_level)
    849                     {
    850                         if (mb_col > 0)
    851                             pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    852 
    853                         if (xd->mode_info_context->mbmi.dc_diff > 0)
    854                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    855 
    856                         /* don't apply across umv border */
    857                         if (mb_row > 0)
    858                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    859 
    860                         if (xd->mode_info_context->mbmi.dc_diff > 0)
    861                             pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
    862                     }
    863                 }
    864 
    865                 recon_yoffset += 16;
    866                 recon_uvoffset += 8;
    867 
    868                 ++xd->mode_info_context;  /* next mb */
    869 
    870                 xd->above_context++;
    871 
    872                 pbi->mt_current_mb_col[mb_row] = mb_col;
    873             }
    874 
    875             /* adjust to the next row of mbs */
    876             if (pbi->common.filter_level)
    877             {
    878                 if(mb_row != pc->mb_rows-1)
    879                 {
    880                     int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
    881                     int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
    882 
    883                     for (i = 0; i < 4; i++)
    884                     {
    885                         pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
    886                         pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
    887                         pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
    888                     }
    889                 }
    890             }else
    891                 vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
    892 
    893             ++xd->mode_info_context;      /* skip prediction column */
    894         }
    895         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    896     }
    897 
    898     sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
    899 }
    900