Home | History | Annotate | Download | only in decoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #ifndef WIN32
     13 # include <unistd.h>
     14 #endif
     15 #ifdef __APPLE__
     16 #include <mach/mach_init.h>
     17 #endif
     18 #include "onyxd_int.h"
     19 #include "vpx_mem/vpx_mem.h"
     20 #include "threading.h"
     21 
     22 #include "loopfilter.h"
     23 #include "extend.h"
     24 #include "vpx_ports/vpx_timer.h"
     25 
     26 #define MAX_ROWS 256
     27 
     28 extern void vp8_decode_mb_row(VP8D_COMP *pbi,
     29                               VP8_COMMON *pc,
     30                               int mb_row,
     31                               MACROBLOCKD *xd);
     32 
     33 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
     34 extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
     35 
     36 void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread);
     37 
     38 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
     39 {
     40 #if CONFIG_MULTITHREAD
     41     VP8_COMMON *const pc = & pbi->common;
     42     int i, j;
     43 
     44     for (i = 0; i < count; i++)
     45     {
     46         MACROBLOCKD *mbd = &mbrd[i].mbd;
     47 #if CONFIG_RUNTIME_CPU_DETECT
     48         mbd->rtcd = xd->rtcd;
     49 #endif
     50         mbd->subpixel_predict        = xd->subpixel_predict;
     51         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
     52         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
     53         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
     54 
     55         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
     56         mbd->mode_info_stride  = pc->mode_info_stride;
     57 
     58         mbd->frame_type = pc->frame_type;
     59         mbd->frames_since_golden      = pc->frames_since_golden;
     60         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
     61 
     62         mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
     63         mbd->dst = pc->yv12_fb[pc->new_fb_idx];
     64 
     65         vp8_setup_block_dptrs(mbd);
     66         vp8_build_block_doffsets(mbd);
     67         mbd->segmentation_enabled    = xd->segmentation_enabled;
     68         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
     69         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
     70 
     71         mbd->current_bc = &pbi->bc2;
     72 
     73         for (j = 0; j < 25; j++)
     74         {
     75             mbd->block[j].dequant = xd->block[j].dequant;
     76         }
     77     }
     78 
     79     for (i=0; i< pc->mb_rows; i++)
     80         pbi->current_mb_col[i]=-1;
     81 #else
     82     (void) pbi;
     83     (void) xd;
     84     (void) mbrd;
     85     (void) count;
     86 #endif
     87 }
     88 
     89 void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
     90 {
     91 #if CONFIG_MULTITHREAD
     92     VP8_COMMON *const pc = & pbi->common;
     93     int i, j;
     94 
     95     for (i = 0; i < count; i++)
     96     {
     97         MACROBLOCKD *mbd = &mbrd[i].mbd;
     98 //#if CONFIG_RUNTIME_CPU_DETECT
     99 //        mbd->rtcd = xd->rtcd;
    100 //#endif
    101 
    102         //mbd->subpixel_predict        = xd->subpixel_predict;
    103         //mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
    104         //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
    105         //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
    106 
    107         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
    108         mbd->mode_info_stride  = pc->mode_info_stride;
    109 
    110         //mbd->frame_type = pc->frame_type;
    111         //mbd->frames_since_golden      = pc->frames_since_golden;
    112         //mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
    113 
    114         //mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
    115         //mbd->dst = pc->yv12_fb[pc->new_fb_idx];
    116 
    117         //vp8_setup_block_dptrs(mbd);
    118         //vp8_build_block_doffsets(mbd);
    119         mbd->segmentation_enabled    = xd->segmentation_enabled;  //
    120         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;  //
    121         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));   //
    122 
    123         //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
    124         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
    125         //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
    126         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
    127         //unsigned char mode_ref_lf_delta_enabled;
    128         //unsigned char mode_ref_lf_delta_update;
    129         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
    130         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
    131 
    132         //mbd->mbmi.mode = DC_PRED;
    133         //mbd->mbmi.uv_mode = DC_PRED;
    134         //mbd->current_bc = &pbi->bc2;
    135 
    136         //for (j = 0; j < 25; j++)
    137         //{
    138         //    mbd->block[j].dequant = xd->block[j].dequant;
    139         //}
    140     }
    141 
    142     for (i=0; i< pc->mb_rows; i++)
    143         pbi->current_mb_col[i]=-1;
    144 #else
    145     (void) pbi;
    146     (void) xd;
    147     (void) mbrd;
    148     (void) count;
    149 #endif
    150 }
    151 
    152 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
    153 {
    154 #if CONFIG_MULTITHREAD
    155     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
    156     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
    157     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
    158     ENTROPY_CONTEXT_PLANES mb_row_left_context;
    159 
    160     while (1)
    161     {
    162         int current_filter_level = 0;
    163 
    164         if (pbi->b_multithreaded_rd == 0)
    165             break;
    166 
    167         //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
    168         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
    169         {
    170             if (pbi->b_multithreaded_rd == 0)
    171                 break;
    172             else
    173             {
    174                 VP8_COMMON *pc = &pbi->common;
    175                 MACROBLOCKD *xd = &mbrd->mbd;
    176 
    177                 int mb_row;
    178                 int num_part = 1 << pbi->common.multi_token_partition;
    179                 volatile int *last_row_current_mb_col;
    180 
    181                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    182                 {
    183                     int i;
    184                     int recon_yoffset, recon_uvoffset;
    185                     int mb_col;
    186                     int ref_fb_idx = pc->lst_fb_idx;
    187                     int dst_fb_idx = pc->new_fb_idx;
    188                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    189                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    190 
    191                     pbi->mb_row_di[ithread].mb_row = mb_row;
    192                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
    193 
    194                     last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
    195 
    196                     recon_yoffset = mb_row * recon_y_stride * 16;
    197                     recon_uvoffset = mb_row * recon_uv_stride * 8;
    198                     // reset above block coeffs
    199 
    200                     xd->above_context = pc->above_context;
    201                     xd->left_context = &mb_row_left_context;
    202                     vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
    203                     xd->up_available = (mb_row != 0);
    204 
    205                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    206                     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    207 
    208                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    209                     {
    210                         if ((mb_col & 7) == 0)
    211                         {
    212                             while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
    213                             {
    214                                 x86_pause_hint();
    215                                 thread_sleep(0);
    216                             }
    217                         }
    218 
    219                         if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    220                         {
    221                             for (i = 0; i < 16; i++)
    222                             {
    223                                 BLOCKD *d = &xd->block[i];
    224                                 vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    225                             }
    226                         }
    227 
    228                         // Distance of Mb to the various image edges.
    229                         // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
    230                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
    231                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    232 
    233                         xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    234                         xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    235                         xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    236 
    237                         xd->left_available = (mb_col != 0);
    238 
    239                         // Select the appropriate reference frame for this MB
    240                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    241                             ref_fb_idx = pc->lst_fb_idx;
    242                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    243                             ref_fb_idx = pc->gld_fb_idx;
    244                         else
    245                             ref_fb_idx = pc->alt_fb_idx;
    246 
    247                         xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    248                         xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    249                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    250 
    251                         vp8_build_uvmvs(xd, pc->full_pixel);
    252 
    253                         vp8_decode_macroblock(pbi, xd);
    254 
    255                         recon_yoffset += 16;
    256                         recon_uvoffset += 8;
    257 
    258                         ++xd->mode_info_context;  /* next mb */
    259 
    260                         xd->above_context++;
    261 
    262                         //pbi->mb_row_di[ithread].current_mb_col = mb_col;
    263                         pbi->current_mb_col[mb_row] = mb_col;
    264                     }
    265 
    266                     // adjust to the next row of mbs
    267                     vp8_extend_mb_row(
    268                     &pc->yv12_fb[dst_fb_idx],
    269                     xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
    270                     );
    271 
    272                     ++xd->mode_info_context;      /* skip prediction column */
    273 
    274                     // since we have multithread
    275                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    276 
    277                     pbi->last_mb_row_decoded = mb_row;
    278 
    279                 }
    280             }
    281         }
    282 
    283         // If |pbi->common.filter_level| is 0 the value can change in-between
    284         // the sem_post and the check to call vp8_thread_loop_filter.
    285         current_filter_level = pbi->common.filter_level;
    286 
    287         //  add this to each frame
    288         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
    289         {
    290             //SetEvent(pbi->h_event_end_decoding);
    291             sem_post(&pbi->h_event_end_decoding);
    292         }
    293 
    294         if ((pbi->b_multithreaded_lf) && (current_filter_level))
    295             vp8_thread_loop_filter(pbi, mbrd, ithread);
    296 
    297     }
    298 #else
    299     (void) p_data;
    300 #endif
    301 
    302     return 0 ;
    303 }
    304 
    305 
    306 void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread)
    307 {
    308 #if CONFIG_MULTITHREAD
    309 
    310         if (sem_wait(&pbi->h_event_start_lpf[ithread]) == 0)
    311         {
    312            // if (pbi->b_multithreaded_lf == 0) // we're shutting down      ????
    313            //     break;
    314            // else
    315             {
    316                 VP8_COMMON *cm  = &pbi->common;
    317                 MACROBLOCKD *mbd = &mbrd->mbd;
    318                 int default_filt_lvl = pbi->common.filter_level;
    319 
    320                 YV12_BUFFER_CONFIG *post = cm->frame_to_show;
    321                 loop_filter_info *lfi = cm->lf_info;
    322                 //int frame_type = cm->frame_type;
    323 
    324                 int mb_row;
    325                 int mb_col;
    326 
    327                 int filter_level;
    328                 int alt_flt_enabled = mbd->segmentation_enabled;
    329 
    330                 int i;
    331                 unsigned char *y_ptr, *u_ptr, *v_ptr;
    332 
    333                 volatile int *last_row_current_mb_col;
    334 
    335                 // Set up the buffer pointers
    336                 y_ptr = post->y_buffer + post->y_stride  * 16 * (ithread +1);
    337                 u_ptr = post->u_buffer + post->uv_stride *  8 * (ithread +1);
    338                 v_ptr = post->v_buffer + post->uv_stride *  8 * (ithread +1);
    339 
    340                 // vp8_filter each macro block
    341                 for (mb_row = ithread+1; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
    342                 {
    343                     last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
    344 
    345                     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
    346                     {
    347                         int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
    348 
    349                         if ((mb_col & 7) == 0)
    350                         {
    351                             while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
    352                             {
    353                                 x86_pause_hint();
    354                                 thread_sleep(0);
    355                             }
    356                         }
    357 
    358                         filter_level = pbi->mt_baseline_filter_level[Segment];
    359 
    360                         // Apply any context driven MB level adjustment
    361                         vp8_adjust_mb_lf_value(mbd, &filter_level);
    362 
    363                         if (filter_level)
    364                         {
    365                             if (mb_col > 0)
    366                                 cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    367 
    368                             if (mbd->mode_info_context->mbmi.dc_diff > 0)
    369                                 cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    370 
    371                             // don't apply across umv border
    372                             if (mb_row > 0)
    373                                 cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    374 
    375                             if (mbd->mode_info_context->mbmi.dc_diff > 0)
    376                                 cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    377                         }
    378 
    379                         y_ptr += 16;
    380                         u_ptr += 8;
    381                         v_ptr += 8;
    382 
    383                         mbd->mode_info_context++;     // step to next MB
    384                         pbi->current_mb_col[mb_row] = mb_col;
    385                     }
    386 
    387                     mbd->mode_info_context++;         // Skip border mb
    388 
    389                     y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
    390                     u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
    391                     v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
    392 
    393                     mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;         // Skip border mb
    394                 }
    395             }
    396         }
    397 
    398         //  add this to each frame
    399         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
    400         {
    401           sem_post(&pbi->h_event_end_lpf);
    402         }
    403 #else
    404     (void) pbi;
    405 #endif
    406 }
    407 
    408 void vp8_decoder_create_threads(VP8D_COMP *pbi)
    409 {
    410 #if CONFIG_MULTITHREAD
    411     int core_count = 0;
    412     int ithread;
    413 
    414     pbi->b_multithreaded_rd = 0;
    415     pbi->b_multithreaded_lf = 0;
    416     pbi->allocated_decoding_thread_count = 0;
    417     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
    418 
    419     if (core_count > 1)
    420     {
    421         pbi->b_multithreaded_rd = 1;
    422         pbi->b_multithreaded_lf = 1;  // this can be merged with pbi->b_multithreaded_rd ?
    423         pbi->decoding_thread_count = core_count -1;
    424 
    425         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
    426         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
    427         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
    428         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
    429         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
    430 
    431         CHECK_MEM_ERROR(pbi->current_mb_col, vpx_malloc(sizeof(int) * MAX_ROWS));  // pc->mb_rows));
    432         CHECK_MEM_ERROR(pbi->h_event_start_lpf, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
    433 
    434         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
    435         {
    436             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
    437             sem_init(&pbi->h_event_start_lpf[ithread], 0, 0);
    438 
    439             pbi->de_thread_data[ithread].ithread  = ithread;
    440             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
    441             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
    442 
    443             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
    444         }
    445 
    446         sem_init(&pbi->h_event_end_decoding, 0, 0);
    447         sem_init(&pbi->h_event_end_lpf, 0, 0);
    448 
    449         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
    450     }
    451 
    452 #else
    453     (void) pbi;
    454 #endif
    455 }
    456 
    457 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
    458 {
    459 #if CONFIG_MULTITHREAD
    460 
    461     if (pbi->b_multithreaded_lf)
    462     {
    463         int i;
    464         pbi->b_multithreaded_lf = 0;
    465 
    466         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    467             sem_destroy(&pbi->h_event_start_lpf[i]);
    468 
    469         sem_destroy(&pbi->h_event_end_lpf);
    470     }
    471 
    472     //shutdown MB Decoding thread;
    473     if (pbi->b_multithreaded_rd)
    474     {
    475         int i;
    476 
    477         pbi->b_multithreaded_rd = 0;
    478 
    479         // allow all threads to exit
    480         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    481         {
    482             sem_post(&pbi->h_event_start_decoding[i]);
    483             pthread_join(pbi->h_decoding_thread[i], NULL);
    484         }
    485 
    486         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
    487         {
    488             sem_destroy(&pbi->h_event_start_decoding[i]);
    489         }
    490 
    491         sem_destroy(&pbi->h_event_end_decoding);
    492 
    493         if (pbi->h_decoding_thread)
    494         {
    495             vpx_free(pbi->h_decoding_thread);
    496             pbi->h_decoding_thread = NULL;
    497         }
    498 
    499         if (pbi->h_event_start_decoding)
    500         {
    501             vpx_free(pbi->h_event_start_decoding);
    502             pbi->h_event_start_decoding = NULL;
    503         }
    504 
    505         if (pbi->h_event_start_lpf)
    506         {
    507             vpx_free(pbi->h_event_start_lpf);
    508             pbi->h_event_start_lpf = NULL;
    509         }
    510 
    511         if (pbi->mb_row_di)
    512         {
    513             vpx_free(pbi->mb_row_di);
    514             pbi->mb_row_di = NULL ;
    515         }
    516 
    517         if (pbi->de_thread_data)
    518         {
    519             vpx_free(pbi->de_thread_data);
    520             pbi->de_thread_data = NULL;
    521         }
    522 
    523         if (pbi->current_mb_col)
    524         {
    525             vpx_free(pbi->current_mb_col);
    526             pbi->current_mb_col = NULL ;
    527         }
    528     }
    529 #else
    530     (void) pbi;
    531 #endif
    532 }
    533 
    534 
    535 void vp8_start_lfthread(VP8D_COMP *pbi)
    536 {
    537 #if CONFIG_MULTITHREAD
    538   /*
    539     memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
    540     pbi->last_mb_row_decoded = 0;
    541     sem_post(&pbi->h_event_start_lpf);
    542     */
    543     (void) pbi;
    544 #else
    545     (void) pbi;
    546 #endif
    547 }
    548 
    549 void vp8_stop_lfthread(VP8D_COMP *pbi)
    550 {
    551 #if CONFIG_MULTITHREAD
    552   /*
    553     struct vpx_usec_timer timer;
    554 
    555     vpx_usec_timer_start(&timer);
    556 
    557     sem_wait(&pbi->h_event_end_lpf);
    558 
    559     vpx_usec_timer_mark(&timer);
    560     pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
    561     */
    562     (void) pbi;
    563 #else
    564     (void) pbi;
    565 #endif
    566 }
    567 
    568 
    569 void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
    570                           MACROBLOCKD *xd)
    571 {
    572 #if CONFIG_MULTITHREAD
    573     int mb_row;
    574     VP8_COMMON *pc = &pbi->common;
    575 
    576     int ibc = 0;
    577     int num_part = 1 << pbi->common.multi_token_partition;
    578     int i;
    579     volatile int *last_row_current_mb_col = NULL;
    580 
    581     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
    582 
    583     for (i = 0; i < pbi->decoding_thread_count; i++)
    584         sem_post(&pbi->h_event_start_decoding[i]);
    585 
    586     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
    587     {
    588         int i;
    589 
    590         xd->current_bc = &pbi->mbc[mb_row%num_part];
    591 
    592         //vp8_decode_mb_row(pbi, pc, mb_row, xd);
    593         {
    594             int i;
    595             int recon_yoffset, recon_uvoffset;
    596             int mb_col;
    597             int ref_fb_idx = pc->lst_fb_idx;
    598             int dst_fb_idx = pc->new_fb_idx;
    599             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
    600             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
    601 
    602            // volatile int *last_row_current_mb_col = NULL;
    603             if (mb_row > 0)
    604                 last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
    605 
    606             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
    607             recon_yoffset = mb_row * recon_y_stride * 16;
    608             recon_uvoffset = mb_row * recon_uv_stride * 8;
    609             // reset above block coeffs
    610 
    611             xd->above_context = pc->above_context;
    612             xd->up_available = (mb_row != 0);
    613 
    614             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
    615             xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
    616 
    617             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
    618             {
    619                 if ( mb_row > 0 && (mb_col & 7) == 0){
    620                     while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
    621                     {
    622                         x86_pause_hint();
    623                         thread_sleep(0);
    624                     }
    625                 }
    626 
    627                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
    628                 {
    629                     for (i = 0; i < 16; i++)
    630                     {
    631                         BLOCKD *d = &xd->block[i];
    632                         vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
    633                     }
    634                 }
    635 
    636                 // Distance of Mb to the various image edges.
    637                 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
    638                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
    639                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
    640 
    641                 xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    642                 xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    643                 xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    644 
    645                 xd->left_available = (mb_col != 0);
    646 
    647                 // Select the appropriate reference frame for this MB
    648                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
    649                     ref_fb_idx = pc->lst_fb_idx;
    650                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
    651                     ref_fb_idx = pc->gld_fb_idx;
    652                 else
    653                     ref_fb_idx = pc->alt_fb_idx;
    654 
    655                 xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
    656                 xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
    657                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
    658 
    659                 vp8_build_uvmvs(xd, pc->full_pixel);
    660 
    661                 vp8_decode_macroblock(pbi, xd);
    662 
    663                 recon_yoffset += 16;
    664                 recon_uvoffset += 8;
    665 
    666                 ++xd->mode_info_context;  /* next mb */
    667 
    668                 xd->above_context++;
    669 
    670                 //pbi->current_mb_col_main = mb_col;
    671                 pbi->current_mb_col[mb_row] = mb_col;
    672             }
    673 
    674             // adjust to the next row of mbs
    675             vp8_extend_mb_row(
    676                 &pc->yv12_fb[dst_fb_idx],
    677                 xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
    678             );
    679 
    680             ++xd->mode_info_context;      /* skip prediction column */
    681 
    682             pbi->last_mb_row_decoded = mb_row;
    683         }
    684         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
    685     }
    686 
    687     sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
    688 #else
    689     (void) pbi;
    690     (void) xd;
    691 #endif
    692 }
    693 
    694 
    695 void vp8_mt_loop_filter_frame( VP8D_COMP *pbi)
    696 {
    697 #if CONFIG_MULTITHREAD
    698     VP8_COMMON *cm  = &pbi->common;
    699     MACROBLOCKD *mbd = &pbi->mb;
    700     int default_filt_lvl = pbi->common.filter_level;
    701 
    702     YV12_BUFFER_CONFIG *post = cm->frame_to_show;
    703     loop_filter_info *lfi = cm->lf_info;
    704     int frame_type = cm->frame_type;
    705 
    706     int mb_row;
    707     int mb_col;
    708 
    709     int filter_level;
    710     int alt_flt_enabled = mbd->segmentation_enabled;
    711 
    712     int i;
    713     unsigned char *y_ptr, *u_ptr, *v_ptr;
    714 
    715     volatile int *last_row_current_mb_col=NULL;
    716 
    717     vp8_setup_loop_filter_thread_data(pbi, mbd, pbi->mb_row_di, pbi->decoding_thread_count);
    718 
    719     mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
    720 
    721     // Note the baseline filter values for each segment
    722     if (alt_flt_enabled)
    723     {
    724         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    725         {
    726             // Abs value
    727             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
    728                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    729             // Delta Value
    730             else
    731             {
    732                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
    733                 pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
    734             }
    735         }
    736     }
    737     else
    738     {
    739         for (i = 0; i < MAX_MB_SEGMENTS; i++)
    740             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
    741     }
    742 
    743     // Initialize the loop filter for this frame.
    744     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
    745         vp8_init_loop_filter(cm);
    746     else if (frame_type != cm->last_frame_type)
    747         vp8_frame_init_loop_filter(lfi, frame_type);
    748 
    749     for (i = 0; i < pbi->decoding_thread_count; i++)
    750         sem_post(&pbi->h_event_start_lpf[i]);
    751         // sem_post(&pbi->h_event_start_lpf);
    752 
    753     // Set up the buffer pointers
    754     y_ptr = post->y_buffer;
    755     u_ptr = post->u_buffer;
    756     v_ptr = post->v_buffer;
    757 
    758     // vp8_filter each macro block
    759     for (mb_row = 0; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
    760     {
    761         if (mb_row > 0)
    762             last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
    763 
    764         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
    765         {
    766             int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
    767 
    768             if ( mb_row > 0 && (mb_col & 7) == 0){
    769             // if ( mb_row > 0 ){
    770                 while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
    771                 {
    772                     x86_pause_hint();
    773                     thread_sleep(0);
    774                 }
    775             }
    776 
    777             filter_level = pbi->mt_baseline_filter_level[Segment];
    778 
    779             // Distance of Mb to the various image edges.
    780             // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
    781             // Apply any context driven MB level adjustment
    782             vp8_adjust_mb_lf_value(mbd, &filter_level);
    783 
    784             if (filter_level)
    785             {
    786                 if (mb_col > 0)
    787                     cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    788 
    789                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
    790                     cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    791 
    792                 // don't apply across umv border
    793                 if (mb_row > 0)
    794                     cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    795 
    796                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
    797                     cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
    798             }
    799 
    800             y_ptr += 16;
    801             u_ptr += 8;
    802             v_ptr += 8;
    803 
    804             mbd->mode_info_context++;     // step to next MB
    805             pbi->current_mb_col[mb_row] = mb_col;
    806         }
    807         mbd->mode_info_context++;         // Skip border mb
    808 
    809         //update for multi-thread
    810         y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
    811         u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
    812         v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
    813         mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;
    814     }
    815 
    816     sem_wait(&pbi->h_event_end_lpf);
    817 #else
    818     (void) pbi;
    819 #endif
    820 }
    821