Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vpx_ports/config.h"
     13 #include "encodemb.h"
     14 #include "encodemv.h"
     15 #include "vp8/common/common.h"
     16 #include "onyx_int.h"
     17 #include "vp8/common/extend.h"
     18 #include "vp8/common/entropymode.h"
     19 #include "vp8/common/quant_common.h"
     20 #include "segmentation.h"
     21 #include "vp8/common/setupintrarecon.h"
     22 #include "encodeintra.h"
     23 #include "vp8/common/reconinter.h"
     24 #include "rdopt.h"
     25 #include "pickinter.h"
     26 #include "vp8/common/findnearmv.h"
     27 #include "vp8/common/reconintra.h"
     28 #include <stdio.h>
     29 #include <limits.h>
     30 #include "vp8/common/subpixel.h"
     31 #include "vpx_ports/vpx_timer.h"
     32 
     33 #if CONFIG_RUNTIME_CPU_DETECT
     34 #define RTCD(x)     &cpi->common.rtcd.x
     35 #define IF_RTCD(x)  (x)
     36 #else
     37 #define RTCD(x)     NULL
     38 #define IF_RTCD(x)  NULL
     39 #endif
     40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
     41 
     42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
     43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
     44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
     45                                       MACROBLOCK *x,
     46                                       MB_ROW_COMP *mbr_ei,
     47                                       int mb_row,
     48                                       int count);
     49 void vp8_build_block_offsets(MACROBLOCK *x);
     50 void vp8_setup_block_ptrs(MACROBLOCK *x);
     51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
     52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
     53 
     54 #ifdef MODE_STATS
     55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
     57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
     59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
     60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     61 #endif
     62 
     63 static const int qrounding_factors[129] =
     64 {
     65     48, 48, 48, 48, 48, 48, 48, 48,
     66     48, 48, 48, 48, 48, 48, 48, 48,
     67     48, 48, 48, 48, 48, 48, 48, 48,
     68     48, 48, 48, 48, 48, 48, 48, 48,
     69     48, 48, 48, 48, 48, 48, 48, 48,
     70     48, 48, 48, 48, 48, 48, 48, 48,
     71     48, 48, 48, 48, 48, 48, 48, 48,
     72     48, 48, 48, 48, 48, 48, 48, 48,
     73     48, 48, 48, 48, 48, 48, 48, 48,
     74     48, 48, 48, 48, 48, 48, 48, 48,
     75     48, 48, 48, 48, 48, 48, 48, 48,
     76     48, 48, 48, 48, 48, 48, 48, 48,
     77     48, 48, 48, 48, 48, 48, 48, 48,
     78     48, 48, 48, 48, 48, 48, 48, 48,
     79     48, 48, 48, 48, 48, 48, 48, 48,
     80     48, 48, 48, 48, 48, 48, 48, 48,
     81     48
     82 };
     83 
     84 static const int qzbin_factors[129] =
     85 {
     86     84, 84, 84, 84, 84, 84, 84, 84,
     87     84, 84, 84, 84, 84, 84, 84, 84,
     88     84, 84, 84, 84, 84, 84, 84, 84,
     89     84, 84, 84, 84, 84, 84, 84, 84,
     90     84, 84, 84, 84, 84, 84, 84, 84,
     91     84, 84, 84, 84, 84, 84, 84, 84,
     92     80, 80, 80, 80, 80, 80, 80, 80,
     93     80, 80, 80, 80, 80, 80, 80, 80,
     94     80, 80, 80, 80, 80, 80, 80, 80,
     95     80, 80, 80, 80, 80, 80, 80, 80,
     96     80, 80, 80, 80, 80, 80, 80, 80,
     97     80, 80, 80, 80, 80, 80, 80, 80,
     98     80, 80, 80, 80, 80, 80, 80, 80,
     99     80, 80, 80, 80, 80, 80, 80, 80,
    100     80, 80, 80, 80, 80, 80, 80, 80,
    101     80, 80, 80, 80, 80, 80, 80, 80,
    102     80
    103 };
    104 
    105 static const int qrounding_factors_y2[129] =
    106 {
    107     48, 48, 48, 48, 48, 48, 48, 48,
    108     48, 48, 48, 48, 48, 48, 48, 48,
    109     48, 48, 48, 48, 48, 48, 48, 48,
    110     48, 48, 48, 48, 48, 48, 48, 48,
    111     48, 48, 48, 48, 48, 48, 48, 48,
    112     48, 48, 48, 48, 48, 48, 48, 48,
    113     48, 48, 48, 48, 48, 48, 48, 48,
    114     48, 48, 48, 48, 48, 48, 48, 48,
    115     48, 48, 48, 48, 48, 48, 48, 48,
    116     48, 48, 48, 48, 48, 48, 48, 48,
    117     48, 48, 48, 48, 48, 48, 48, 48,
    118     48, 48, 48, 48, 48, 48, 48, 48,
    119     48, 48, 48, 48, 48, 48, 48, 48,
    120     48, 48, 48, 48, 48, 48, 48, 48,
    121     48, 48, 48, 48, 48, 48, 48, 48,
    122     48, 48, 48, 48, 48, 48, 48, 48,
    123     48
    124 };
    125 
    126 static const int qzbin_factors_y2[129] =
    127 {
    128     84, 84, 84, 84, 84, 84, 84, 84,
    129     84, 84, 84, 84, 84, 84, 84, 84,
    130     84, 84, 84, 84, 84, 84, 84, 84,
    131     84, 84, 84, 84, 84, 84, 84, 84,
    132     84, 84, 84, 84, 84, 84, 84, 84,
    133     84, 84, 84, 84, 84, 84, 84, 84,
    134     80, 80, 80, 80, 80, 80, 80, 80,
    135     80, 80, 80, 80, 80, 80, 80, 80,
    136     80, 80, 80, 80, 80, 80, 80, 80,
    137     80, 80, 80, 80, 80, 80, 80, 80,
    138     80, 80, 80, 80, 80, 80, 80, 80,
    139     80, 80, 80, 80, 80, 80, 80, 80,
    140     80, 80, 80, 80, 80, 80, 80, 80,
    141     80, 80, 80, 80, 80, 80, 80, 80,
    142     80, 80, 80, 80, 80, 80, 80, 80,
    143     80, 80, 80, 80, 80, 80, 80, 80,
    144     80
    145 };
    146 
    147 #define EXACT_QUANT
    148 #ifdef EXACT_QUANT
    149 static void vp8cx_invert_quant(int improved_quant, short *quant,
    150                                short *shift, short d)
    151 {
    152     if(improved_quant)
    153     {
    154         unsigned t;
    155         int l;
    156         t = d;
    157         for(l = 0; t > 1; l++)
    158             t>>=1;
    159         t = 1 + (1<<(16+l))/d;
    160         *quant = (short)(t - (1<<16));
    161         *shift = l;
    162     }
    163     else
    164     {
    165         *quant = (1 << 16) / d;
    166         *shift = 0;
    167     }
    168 }
    169 
    170 void vp8cx_init_quantizer(VP8_COMP *cpi)
    171 {
    172     int i;
    173     int quant_val;
    174     int Q;
    175 
    176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
    177 
    178     for (Q = 0; Q < QINDEX_RANGE; Q++)
    179     {
    180         // dc values
    181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
    182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
    183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
    184                            cpi->Y1quant_shift[Q] + 0, quant_val);
    185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
    187         cpi->common.Y1dequant[Q][0] = quant_val;
    188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    189 
    190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
    191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
    192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
    193                            cpi->Y2quant_shift[Q] + 0, quant_val);
    194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
    195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
    196         cpi->common.Y2dequant[Q][0] = quant_val;
    197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    198 
    199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
    200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
    201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
    202                            cpi->UVquant_shift[Q] + 0, quant_val);
    203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
    204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
    205         cpi->common.UVdequant[Q][0] = quant_val;
    206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    207 
    208         // all the ac values = ;
    209         for (i = 1; i < 16; i++)
    210         {
    211             int rc = vp8_default_zig_zag1d[i];
    212 
    213             quant_val = vp8_ac_yquant(Q);
    214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
    215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
    216                                cpi->Y1quant_shift[Q] + rc, quant_val);
    217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
    219             cpi->common.Y1dequant[Q][rc] = quant_val;
    220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    221 
    222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
    223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
    224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
    225                                cpi->Y2quant_shift[Q] + rc, quant_val);
    226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
    227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
    228             cpi->common.Y2dequant[Q][rc] = quant_val;
    229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    230 
    231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
    232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
    233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
    234                                cpi->UVquant_shift[Q] + rc, quant_val);
    235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
    237             cpi->common.UVdequant[Q][rc] = quant_val;
    238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    239         }
    240     }
    241 }
    242 #else
    243 void vp8cx_init_quantizer(VP8_COMP *cpi)
    244 {
    245     int i;
    246     int quant_val;
    247     int Q;
    248 
    249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
    250 
    251     for (Q = 0; Q < QINDEX_RANGE; Q++)
    252     {
    253         // dc values
    254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
    255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
    256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
    258         cpi->common.Y1dequant[Q][0] = quant_val;
    259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    260 
    261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
    262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
    263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
    264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
    265         cpi->common.Y2dequant[Q][0] = quant_val;
    266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    267 
    268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
    269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
    270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
    271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
    272         cpi->common.UVdequant[Q][0] = quant_val;
    273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
    274 
    275         // all the ac values = ;
    276         for (i = 1; i < 16; i++)
    277         {
    278             int rc = vp8_default_zig_zag1d[i];
    279 
    280             quant_val = vp8_ac_yquant(Q);
    281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
    282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
    284             cpi->common.Y1dequant[Q][rc] = quant_val;
    285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    286 
    287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
    288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
    289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
    290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
    291             cpi->common.Y2dequant[Q][rc] = quant_val;
    292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    293 
    294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
    295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
    296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
    297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
    298             cpi->common.UVdequant[Q][rc] = quant_val;
    299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
    300         }
    301     }
    302 }
    303 #endif
    304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
    305 {
    306     int i;
    307     int QIndex;
    308     MACROBLOCKD *xd = &x->e_mbd;
    309     int zbin_extra;
    310 
    311     // Select the baseline MB Q index.
    312     if (xd->segmentation_enabled)
    313     {
    314         // Abs Value
    315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
    316 
    317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
    318         // Delta Value
    319         else
    320         {
    321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
    322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
    323         }
    324     }
    325     else
    326         QIndex = cpi->common.base_qindex;
    327 
    328     // Y
    329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
    330 
    331     for (i = 0; i < 16; i++)
    332     {
    333         x->block[i].quant = cpi->Y1quant[QIndex];
    334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
    335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
    336         x->block[i].zbin = cpi->Y1zbin[QIndex];
    337         x->block[i].round = cpi->Y1round[QIndex];
    338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
    339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
    340         x->block[i].zbin_extra = (short)zbin_extra;
    341     }
    342 
    343     // UV
    344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
    345 
    346     for (i = 16; i < 24; i++)
    347     {
    348         x->block[i].quant = cpi->UVquant[QIndex];
    349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
    350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
    351         x->block[i].zbin = cpi->UVzbin[QIndex];
    352         x->block[i].round = cpi->UVround[QIndex];
    353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
    354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
    355         x->block[i].zbin_extra = (short)zbin_extra;
    356     }
    357 
    358     // Y2
    359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
    360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
    361     x->block[24].quant = cpi->Y2quant[QIndex];
    362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
    363     x->block[24].zbin = cpi->Y2zbin[QIndex];
    364     x->block[24].round = cpi->Y2round[QIndex];
    365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
    366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
    367     x->block[24].zbin_extra = (short)zbin_extra;
    368 
    369     /* save this macroblock QIndex for vp8_update_zbin_extra() */
    370     x->q_index = QIndex;
    371 }
    372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
    373 {
    374     int i;
    375     int QIndex = x->q_index;
    376     int zbin_extra;
    377 
    378     // Y
    379     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
    380     for (i = 0; i < 16; i++)
    381     {
    382         x->block[i].zbin_extra = (short)zbin_extra;
    383     }
    384 
    385     // UV
    386     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
    387     for (i = 16; i < 24; i++)
    388     {
    389         x->block[i].zbin_extra = (short)zbin_extra;
    390     }
    391 
    392     // Y2
    393     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
    394     x->block[24].zbin_extra = (short)zbin_extra;
    395 }
    396 
    397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
    398 {
    399     // Clear Zbin mode boost for default case
    400     cpi->zbin_mode_boost = 0;
    401 
    402     // MB level quantizer setup
    403     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
    404 }
    405 
    406 
    407 /* activity_avg must be positive, or flat regions could get a zero weight
    408  *  (infinite lambda), which confounds analysis.
    409  * This also avoids the need for divide by zero checks in
    410  *  vp8_activity_masking().
    411  */
    412 #define VP8_ACTIVITY_AVG_MIN (64)
    413 
    414 /* This is used as a reference when computing the source variance for the
    415  *  purposes of activity masking.
    416  * Eventually this should be replaced by custom no-reference routines,
    417  *  which will be faster.
    418  */
    419 static const unsigned char VP8_VAR_OFFS[16]=
    420 {
    421     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
    422 };
    423 
    424 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
    425 {
    426     unsigned int act;
    427     unsigned int sse;
    428     int sum;
    429     unsigned int a;
    430     unsigned int b;
    431     /* TODO: This could also be done over smaller areas (8x8), but that would
    432      *  require extensive changes elsewhere, as lambda is assumed to be fixed
    433      *  over an entire MB in most of the code.
    434      * Another option is to compute four 8x8 variances, and pick a single
    435      *  lambda using a non-linear combination (e.g., the smallest, or second
    436      *  smallest, etc.).
    437      */
    438     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
    439      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
    440     /* This requires a full 32 bits of precision. */
    441     act = (sse<<8) - sum*sum;
    442     /* Drop 4 to give us some headroom to work with. */
    443     act = (act + 8) >> 4;
    444     /* If the region is flat, lower the activity some more. */
    445     if (act < 8<<12)
    446         act = act < 5<<12 ? act : 5<<12;
    447     /* TODO: For non-flat regions, edge regions should receive less masking
    448      *  than textured regions, but identifying edge regions quickly and
    449      *  reliably enough is still a subject of experimentation.
    450      * This will be most noticable near edges with a complex shape (e.g.,
    451      *  text), but the 4x4 transform size should make this less of a problem
    452      *  than it would be for an 8x8 transform.
    453      */
    454     /* Apply the masking to the RD multiplier. */
    455     a = act + 4*cpi->activity_avg;
    456     b = 4*act + cpi->activity_avg;
    457     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
    458     return act;
    459 }
    460 
    461 
    462 
    463 static
    464 void encode_mb_row(VP8_COMP *cpi,
    465                    VP8_COMMON *cm,
    466                    int mb_row,
    467                    MACROBLOCK  *x,
    468                    MACROBLOCKD *xd,
    469                    TOKENEXTRA **tp,
    470                    int *segment_counts,
    471                    int *totalrate)
    472 {
    473     INT64 activity_sum = 0;
    474     int i;
    475     int recon_yoffset, recon_uvoffset;
    476     int mb_col;
    477     int ref_fb_idx = cm->lst_fb_idx;
    478     int dst_fb_idx = cm->new_fb_idx;
    479     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
    480     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
    481     int seg_map_index = (mb_row * cpi->common.mb_cols);
    482 
    483 #if CONFIG_MULTITHREAD
    484     const int nsync = cpi->mt_sync_range;
    485     const int rightmost_col = cm->mb_cols - 1;
    486     volatile const int *last_row_current_mb_col;
    487 
    488     if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
    489         last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
    490     else
    491         last_row_current_mb_col = &rightmost_col;
    492 #endif
    493 
    494     // reset above block coeffs
    495     xd->above_context = cm->above_context;
    496 
    497     xd->up_available = (mb_row != 0);
    498     recon_yoffset = (mb_row * recon_y_stride * 16);
    499     recon_uvoffset = (mb_row * recon_uv_stride * 8);
    500 
    501     cpi->tplist[mb_row].start = *tp;
    502     //printf("Main mb_row = %d\n", mb_row);
    503 
    504     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
    505     // units as they are always compared to values that are in 1/8th pel units
    506     xd->mb_to_top_edge = -((mb_row * 16) << 3);
    507     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
    508 
    509     // Set up limit values for vertical motion vector components
    510     // to prevent them extending beyond the UMV borders
    511     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
    512     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
    513                         + (VP8BORDERINPIXELS - 16);
    514 
    515     // for each macroblock col in image
    516     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
    517     {
    518         // Distance of Mb to the left & right edges, specified in
    519         // 1/8th pel units as they are always compared to values
    520         // that are in 1/8th pel units
    521         xd->mb_to_left_edge = -((mb_col * 16) << 3);
    522         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
    523 
    524         // Set up limit values for horizontal motion vector components
    525         // to prevent them extending beyond the UMV borders
    526         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
    527         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
    528                             + (VP8BORDERINPIXELS - 16);
    529 
    530         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
    531         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
    532         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
    533         xd->left_available = (mb_col != 0);
    534 
    535         x->rddiv = cpi->RDDIV;
    536         x->rdmult = cpi->RDMULT;
    537 
    538 #if CONFIG_MULTITHREAD
    539         if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
    540         {
    541             if ((mb_col & (nsync - 1)) == 0)
    542             {
    543                 while (mb_col > (*last_row_current_mb_col - nsync)
    544                         && (*last_row_current_mb_col) != (cm->mb_cols - 1))
    545                 {
    546                     x86_pause_hint();
    547                     thread_sleep(0);
    548                 }
    549             }
    550         }
    551 #endif
    552 
    553         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
    554             activity_sum += vp8_activity_masking(cpi, x);
    555 
    556         // Is segmentation enabled
    557         // MB level adjutment to quantizer
    558         if (xd->segmentation_enabled)
    559         {
    560             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
    561             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
    562                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
    563             else
    564                 xd->mode_info_context->mbmi.segment_id = 0;
    565 
    566             vp8cx_mb_init_quantizer(cpi, x);
    567         }
    568         else
    569             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
    570 
    571         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
    572 
    573         if (cm->frame_type == KEY_FRAME)
    574         {
    575             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
    576 #ifdef MODE_STATS
    577             y_modes[xd->mbmi.mode] ++;
    578 #endif
    579         }
    580         else
    581         {
    582             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
    583 
    584 #ifdef MODE_STATS
    585             inter_y_modes[xd->mbmi.mode] ++;
    586 
    587             if (xd->mbmi.mode == SPLITMV)
    588             {
    589                 int b;
    590 
    591                 for (b = 0; b < xd->mbmi.partition_count; b++)
    592                 {
    593                     inter_b_modes[x->partition->bmi[b].mode] ++;
    594                 }
    595             }
    596 
    597 #endif
    598 
    599             // Count of last ref frame 0,0 useage
    600             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
    601                 cpi->inter_zz_count ++;
    602 
    603             // Special case code for cyclic refresh
    604             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
    605             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
    606             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
    607             {
    608                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
    609 
    610                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
    611                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
    612                 // else mark it as dirty (1).
    613                 if (xd->mode_info_context->mbmi.segment_id)
    614                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
    615                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
    616                 {
    617                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
    618                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
    619                 }
    620                 else
    621                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
    622 
    623             }
    624         }
    625 
    626         cpi->tplist[mb_row].stop = *tp;
    627 
    628         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
    629 
    630         for (i = 0; i < 16; i++)
    631             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
    632 
    633         // adjust to the next column of macroblocks
    634         x->src.y_buffer += 16;
    635         x->src.u_buffer += 8;
    636         x->src.v_buffer += 8;
    637 
    638         recon_yoffset += 16;
    639         recon_uvoffset += 8;
    640 
    641         // Keep track of segment useage
    642         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
    643 
    644         // skip to next mb
    645         xd->mode_info_context++;
    646         x->partition_info++;
    647 
    648         xd->above_context++;
    649 #if CONFIG_MULTITHREAD
    650         if (cpi->b_multi_threaded != 0)
    651         {
    652             cpi->mt_current_mb_col[mb_row] = mb_col;
    653         }
    654 #endif
    655     }
    656 
    657     //extend the recon for intra prediction
    658     vp8_extend_mb_row(
    659         &cm->yv12_fb[dst_fb_idx],
    660         xd->dst.y_buffer + 16,
    661         xd->dst.u_buffer + 8,
    662         xd->dst.v_buffer + 8);
    663 
    664     // this is to account for the border
    665     xd->mode_info_context++;
    666     x->partition_info++;
    667     x->activity_sum += activity_sum;
    668 
    669 #if CONFIG_MULTITHREAD
    670     if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
    671     {
    672         sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
    673     }
    674 #endif
    675 }
    676 
    677 void vp8_encode_frame(VP8_COMP *cpi)
    678 {
    679     int mb_row;
    680     MACROBLOCK *const x = & cpi->mb;
    681     VP8_COMMON *const cm = & cpi->common;
    682     MACROBLOCKD *const xd = & x->e_mbd;
    683 
    684     TOKENEXTRA *tp = cpi->tok;
    685     int segment_counts[MAX_MB_SEGMENTS];
    686     int totalrate;
    687 
    688     // Functions setup for all frame types so we can use MC in AltRef
    689     if (cm->mcomp_filter_type == SIXTAP)
    690     {
    691         xd->subpixel_predict        = SUBPIX_INVOKE(
    692                                         &cpi->common.rtcd.subpix, sixtap4x4);
    693         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
    694                                         &cpi->common.rtcd.subpix, sixtap8x4);
    695         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
    696                                         &cpi->common.rtcd.subpix, sixtap8x8);
    697         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
    698                                         &cpi->common.rtcd.subpix, sixtap16x16);
    699     }
    700     else
    701     {
    702         xd->subpixel_predict        = SUBPIX_INVOKE(
    703                                         &cpi->common.rtcd.subpix, bilinear4x4);
    704         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
    705                                         &cpi->common.rtcd.subpix, bilinear8x4);
    706         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
    707                                         &cpi->common.rtcd.subpix, bilinear8x8);
    708         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
    709                                       &cpi->common.rtcd.subpix, bilinear16x16);
    710     }
    711 
    712     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
    713 
    714     x->vector_range = 32;
    715 
    716     // Count of MBs using the alternate Q if any
    717     cpi->alt_qcount = 0;
    718 
    719     // Reset frame count of inter 0,0 motion vector useage.
    720     cpi->inter_zz_count = 0;
    721 
    722     vpx_memset(segment_counts, 0, sizeof(segment_counts));
    723 
    724     cpi->prediction_error = 0;
    725     cpi->intra_error = 0;
    726     cpi->skip_true_count = 0;
    727     cpi->skip_false_count = 0;
    728 
    729 #if 0
    730     // Experimental code
    731     cpi->frame_distortion = 0;
    732     cpi->last_mb_distortion = 0;
    733 #endif
    734 
    735     totalrate = 0;
    736 
    737     x->partition_info = x->pi;
    738 
    739     xd->mode_info_context = cm->mi;
    740     xd->mode_info_stride = cm->mode_info_stride;
    741 
    742     xd->frame_type = cm->frame_type;
    743 
    744     xd->frames_since_golden = cm->frames_since_golden;
    745     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
    746     vp8_zero(cpi->MVcount);
    747     // vp8_zero( Contexts)
    748     vp8_zero(cpi->coef_counts);
    749 
    750     // reset intra mode contexts
    751     if (cm->frame_type == KEY_FRAME)
    752         vp8_init_mbmode_probs(cm);
    753 
    754 
    755     vp8cx_frame_init_quantizer(cpi);
    756 
    757     if (cpi->compressor_speed == 2)
    758     {
    759         if (cpi->oxcf.cpu_used < 0)
    760             cpi->Speed = -(cpi->oxcf.cpu_used);
    761         else
    762             vp8_auto_select_speed(cpi);
    763     }
    764 
    765     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
    766     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
    767 
    768     // Copy data over into macro block data sturctures.
    769 
    770     x->src = * cpi->Source;
    771     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
    772     xd->dst = cm->yv12_fb[cm->new_fb_idx];
    773 
    774     // set up frame new frame for intra coded blocks
    775 
    776     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
    777 
    778     vp8_build_block_offsets(x);
    779 
    780     vp8_setup_block_dptrs(&x->e_mbd);
    781 
    782     vp8_setup_block_ptrs(x);
    783 
    784     x->activity_sum = 0;
    785 
    786     xd->mode_info_context->mbmi.mode = DC_PRED;
    787     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
    788 
    789     xd->left_context = &cm->left_context;
    790 
    791     vp8_zero(cpi->count_mb_ref_frame_usage)
    792     vp8_zero(cpi->ymode_count)
    793     vp8_zero(cpi->uv_mode_count)
    794 
    795     x->mvc = cm->fc.mvc;
    796 
    797     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
    798 
    799     {
    800         struct vpx_usec_timer  emr_timer;
    801         vpx_usec_timer_start(&emr_timer);
    802 
    803 #if CONFIG_MULTITHREAD
    804         if (cpi->b_multi_threaded)
    805         {
    806             int i;
    807 
    808             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
    809 
    810             for (i = 0; i < cm->mb_rows; i++)
    811                 cpi->mt_current_mb_col[i] = -1;
    812 
    813             for (i = 0; i < cpi->encoding_thread_count; i++)
    814             {
    815                 sem_post(&cpi->h_event_start_encoding[i]);
    816             }
    817 
    818             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
    819             {
    820                 vp8_zero(cm->left_context)
    821 
    822                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
    823 
    824                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
    825 
    826                 // adjust to the next row of mbs
    827                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
    828                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
    829                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
    830 
    831                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
    832                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
    833 
    834             }
    835 
    836             sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
    837 
    838             cpi->tok_count = 0;
    839 
    840             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
    841             {
    842                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
    843             }
    844 
    845             if (xd->segmentation_enabled)
    846             {
    847                 int i, j;
    848 
    849                 if (xd->segmentation_enabled)
    850                 {
    851 
    852                     for (i = 0; i < cpi->encoding_thread_count; i++)
    853                     {
    854                         for (j = 0; j < 4; j++)
    855                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
    856                     }
    857                 }
    858             }
    859 
    860             for (i = 0; i < cpi->encoding_thread_count; i++)
    861             {
    862                 totalrate += cpi->mb_row_ei[i].totalrate;
    863             }
    864 
    865             for (i = 0; i < cpi->encoding_thread_count; i++)
    866             {
    867                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
    868             }
    869 
    870         }
    871         else
    872 #endif
    873         {
    874             // for each macroblock row in image
    875             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
    876             {
    877 
    878                 vp8_zero(cm->left_context)
    879 
    880                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
    881 
    882                 // adjust to the next row of mbs
    883                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
    884                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
    885                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
    886             }
    887 
    888             cpi->tok_count = tp - cpi->tok;
    889 
    890         }
    891 
    892         vpx_usec_timer_mark(&emr_timer);
    893         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
    894 
    895     }
    896 
    897 
    898     // Work out the segment probabilites if segmentation is enabled
    899     if (xd->segmentation_enabled)
    900     {
    901         int tot_count;
    902         int i;
    903 
    904         // Set to defaults
    905         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
    906 
    907         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
    908 
    909         if (tot_count)
    910         {
    911             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
    912 
    913             tot_count = segment_counts[0] + segment_counts[1];
    914 
    915             if (tot_count > 0)
    916             {
    917                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
    918             }
    919 
    920             tot_count = segment_counts[2] + segment_counts[3];
    921 
    922             if (tot_count > 0)
    923                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
    924 
    925             // Zero probabilities not allowed
    926             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
    927             {
    928                 if (xd->mb_segment_tree_probs[i] == 0)
    929                     xd->mb_segment_tree_probs[i] = 1;
    930             }
    931         }
    932     }
    933 
    934     // 256 rate units to the bit
    935     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
    936 
    937     // Make a note of the percentage MBs coded Intra.
    938     if (cm->frame_type == KEY_FRAME)
    939     {
    940         cpi->this_frame_percent_intra = 100;
    941     }
    942     else
    943     {
    944         int tot_modes;
    945 
    946         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
    947                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
    948                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
    949                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
    950 
    951         if (tot_modes)
    952             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
    953 
    954     }
    955 
    956 #if 0
    957     {
    958         int cnt = 0;
    959         int flag[2] = {0, 0};
    960 
    961         for (cnt = 0; cnt < MVPcount; cnt++)
    962         {
    963             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
    964             {
    965                 flag[0] = 1;
    966                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
    967                 break;
    968             }
    969         }
    970 
    971         for (cnt = 0; cnt < MVPcount; cnt++)
    972         {
    973             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
    974             {
    975                 flag[1] = 1;
    976                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
    977                 break;
    978             }
    979         }
    980 
    981         if (flag[0] || flag[1])
    982             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
    983     }
    984 #endif
    985 
    986     // Adjust the projected reference frame useage probability numbers to reflect
    987     // what we have just seen. This may be usefull when we make multiple itterations
    988     // of the recode loop rather than continuing to use values from the previous frame.
    989     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
    990     {
    991         const int *const rfct = cpi->count_mb_ref_frame_usage;
    992         const int rf_intra = rfct[INTRA_FRAME];
    993         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
    994 
    995         if ((rf_intra + rf_inter) > 0)
    996         {
    997             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
    998 
    999             if (cpi->prob_intra_coded < 1)
   1000                 cpi->prob_intra_coded = 1;
   1001 
   1002             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
   1003             {
   1004                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
   1005 
   1006                 if (cpi->prob_last_coded < 1)
   1007                     cpi->prob_last_coded = 1;
   1008 
   1009                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
   1010                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
   1011 
   1012                 if (cpi->prob_gf_coded < 1)
   1013                     cpi->prob_gf_coded = 1;
   1014             }
   1015         }
   1016     }
   1017 
   1018 #if 0
   1019     // Keep record of the total distortion this time around for future use
   1020     cpi->last_frame_distortion = cpi->frame_distortion;
   1021 #endif
   1022 
   1023     /* Update the average activity for the next frame.
   1024      * This is feed-forward for now; it could also be saved in two-pass, or
   1025      *  done during lookahead when that is eventually added.
   1026      */
   1027     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
   1028     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
   1029         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
   1030 
   1031 }
   1032 void vp8_setup_block_ptrs(MACROBLOCK *x)
   1033 {
   1034     int r, c;
   1035     int i;
   1036 
   1037     for (r = 0; r < 4; r++)
   1038     {
   1039         for (c = 0; c < 4; c++)
   1040         {
   1041             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
   1042         }
   1043     }
   1044 
   1045     for (r = 0; r < 2; r++)
   1046     {
   1047         for (c = 0; c < 2; c++)
   1048         {
   1049             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
   1050         }
   1051     }
   1052 
   1053 
   1054     for (r = 0; r < 2; r++)
   1055     {
   1056         for (c = 0; c < 2; c++)
   1057         {
   1058             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
   1059         }
   1060     }
   1061 
   1062     x->block[24].src_diff = x->src_diff + 384;
   1063 
   1064 
   1065     for (i = 0; i < 25; i++)
   1066     {
   1067         x->block[i].coeff = x->coeff + i * 16;
   1068     }
   1069 }
   1070 
   1071 void vp8_build_block_offsets(MACROBLOCK *x)
   1072 {
   1073     int block = 0;
   1074     int br, bc;
   1075 
   1076     vp8_build_block_doffsets(&x->e_mbd);
   1077 
   1078     // y blocks
   1079     for (br = 0; br < 4; br++)
   1080     {
   1081         for (bc = 0; bc < 4; bc++)
   1082         {
   1083             BLOCK *this_block = &x->block[block];
   1084             this_block->base_src = &x->src.y_buffer;
   1085             this_block->src_stride = x->src.y_stride;
   1086             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
   1087             ++block;
   1088         }
   1089     }
   1090 
   1091     // u blocks
   1092     for (br = 0; br < 2; br++)
   1093     {
   1094         for (bc = 0; bc < 2; bc++)
   1095         {
   1096             BLOCK *this_block = &x->block[block];
   1097             this_block->base_src = &x->src.u_buffer;
   1098             this_block->src_stride = x->src.uv_stride;
   1099             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
   1100             ++block;
   1101         }
   1102     }
   1103 
   1104     // v blocks
   1105     for (br = 0; br < 2; br++)
   1106     {
   1107         for (bc = 0; bc < 2; bc++)
   1108         {
   1109             BLOCK *this_block = &x->block[block];
   1110             this_block->base_src = &x->src.v_buffer;
   1111             this_block->src_stride = x->src.uv_stride;
   1112             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
   1113             ++block;
   1114         }
   1115     }
   1116 }
   1117 
   1118 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
   1119 {
   1120     const MACROBLOCKD *xd = & x->e_mbd;
   1121     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
   1122     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
   1123 
   1124 #ifdef MODE_STATS
   1125     const int is_key = cpi->common.frame_type == KEY_FRAME;
   1126 
   1127     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
   1128 
   1129     if (m == B_PRED)
   1130     {
   1131         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
   1132 
   1133         int b = 0;
   1134 
   1135         do
   1136         {
   1137             ++ bct[xd->block[b].bmi.mode];
   1138         }
   1139         while (++b < 16);
   1140     }
   1141 
   1142 #endif
   1143 
   1144     ++cpi->ymode_count[m];
   1145     ++cpi->uv_mode_count[uvm];
   1146 
   1147 }
   1148 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
   1149 {
   1150     int Error4x4, Error16x16, error_uv;
   1151     int rate4x4, rate16x16, rateuv;
   1152     int dist4x4, dist16x16, distuv;
   1153     int rate = 0;
   1154     int rate4x4_tokenonly = 0;
   1155     int rate16x16_tokenonly = 0;
   1156     int rateuv_tokenonly = 0;
   1157 
   1158     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
   1159 
   1160 #if !(CONFIG_REALTIME_ONLY)
   1161     if (cpi->sf.RD && cpi->compressor_speed != 2)
   1162     {
   1163         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
   1164         rate += rateuv;
   1165 
   1166         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
   1167 
   1168         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
   1169 
   1170         rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
   1171     }
   1172     else
   1173 #endif
   1174     {
   1175         int rate2, best_distortion;
   1176         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
   1177         int this_rd;
   1178         Error16x16 = INT_MAX;
   1179 
   1180         vp8_pick_intra_mbuv_mode(x);
   1181 
   1182         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
   1183         {
   1184             int distortion2;
   1185 
   1186             x->e_mbd.mode_info_context->mbmi.mode = mode;
   1187             RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
   1188                 (&x->e_mbd);
   1189             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
   1190             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
   1191             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
   1192 
   1193             if (Error16x16 > this_rd)
   1194             {
   1195                 Error16x16 = this_rd;
   1196                 best_mode = mode;
   1197                 best_distortion = distortion2;
   1198             }
   1199         }
   1200         x->e_mbd.mode_info_context->mbmi.mode = best_mode;
   1201 
   1202         Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
   1203     }
   1204 
   1205     if (Error4x4 < Error16x16)
   1206     {
   1207         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
   1208         vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
   1209     }
   1210     else
   1211     {
   1212         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
   1213     }
   1214 
   1215     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
   1216     sum_intra_stats(cpi, x);
   1217     vp8_tokenize_mb(cpi, &x->e_mbd, t);
   1218 
   1219     return rate;
   1220 }
   1221 #ifdef SPEEDSTATS
   1222 extern int cnt_pm;
   1223 #endif
   1224 
   1225 extern void vp8_fix_contexts(MACROBLOCKD *x);
   1226 
   1227 int vp8cx_encode_inter_macroblock
   1228 (
   1229     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
   1230     int recon_yoffset, int recon_uvoffset
   1231 )
   1232 {
   1233     MACROBLOCKD *const xd = &x->e_mbd;
   1234     int inter_error;
   1235     int intra_error = 0;
   1236     int rate;
   1237     int distortion;
   1238 
   1239     x->skip = 0;
   1240 
   1241     if (xd->segmentation_enabled)
   1242         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
   1243     else
   1244         x->encode_breakout = cpi->oxcf.encode_breakout;
   1245 
   1246 #if !(CONFIG_REALTIME_ONLY)
   1247 
   1248     if (cpi->sf.RD)
   1249     {
   1250         int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
   1251 
   1252         /* Are we using the fast quantizer for the mode selection? */
   1253         if(cpi->sf.use_fastquant_for_pick)
   1254         {
   1255             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
   1256 
   1257             /* the fast quantizer does not use zbin_extra, so
   1258              * do not recalculate */
   1259             cpi->zbin_mode_boost_enabled = 0;
   1260         }
   1261         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
   1262 
   1263         /* switch back to the regular quantizer for the encode */
   1264         if (cpi->sf.improved_quant)
   1265         {
   1266             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
   1267         }
   1268 
   1269         /* restore cpi->zbin_mode_boost_enabled */
   1270         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
   1271 
   1272     }
   1273     else
   1274 #endif
   1275         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
   1276 
   1277 
   1278     cpi->prediction_error += inter_error;
   1279     cpi->intra_error += intra_error;
   1280 
   1281 #if 0
   1282     // Experimental RD code
   1283     cpi->frame_distortion += distortion;
   1284     cpi->last_mb_distortion = distortion;
   1285 #endif
   1286 
   1287     // MB level adjutment to quantizer setup
   1288     if (xd->segmentation_enabled)
   1289     {
   1290         // If cyclic update enabled
   1291         if (cpi->cyclic_refresh_mode_enabled)
   1292         {
   1293             // Clear segment_id back to 0 if not coded (last frame 0,0)
   1294             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
   1295                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
   1296             {
   1297                 xd->mode_info_context->mbmi.segment_id = 0;
   1298 
   1299                 /* segment_id changed, so update */
   1300                 vp8cx_mb_init_quantizer(cpi, x);
   1301             }
   1302         }
   1303     }
   1304 
   1305     {
   1306         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
   1307         if (cpi->zbin_mode_boost_enabled)
   1308         {
   1309             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
   1310                  cpi->zbin_mode_boost = 0;
   1311             else
   1312             {
   1313                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
   1314                 {
   1315                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
   1316                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
   1317                     else
   1318                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
   1319                 }
   1320                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
   1321                     cpi->zbin_mode_boost = 0;
   1322                 else
   1323                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
   1324             }
   1325         }
   1326         else
   1327             cpi->zbin_mode_boost = 0;
   1328 
   1329         vp8_update_zbin_extra(cpi, x);
   1330     }
   1331 
   1332     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
   1333 
   1334     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
   1335     {
   1336         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
   1337 
   1338         if (xd->mode_info_context->mbmi.mode == B_PRED)
   1339         {
   1340             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
   1341         }
   1342         else
   1343         {
   1344             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
   1345         }
   1346 
   1347         sum_intra_stats(cpi, x);
   1348     }
   1349     else
   1350     {
   1351         MV best_ref_mv;
   1352         MV nearest, nearby;
   1353         int mdcounts[4];
   1354         int ref_fb_idx;
   1355 
   1356         vp8_find_near_mvs(xd, xd->mode_info_context,
   1357                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
   1358 
   1359         vp8_build_uvmvs(xd, cpi->common.full_pixel);
   1360 
   1361         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
   1362             ref_fb_idx = cpi->common.lst_fb_idx;
   1363         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
   1364             ref_fb_idx = cpi->common.gld_fb_idx;
   1365         else
   1366             ref_fb_idx = cpi->common.alt_fb_idx;
   1367 
   1368         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
   1369         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
   1370         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
   1371 
   1372         if (xd->mode_info_context->mbmi.mode == SPLITMV)
   1373         {
   1374             int i;
   1375 
   1376             for (i = 0; i < 16; i++)
   1377             {
   1378                 if (xd->block[i].bmi.mode == NEW4X4)
   1379                 {
   1380                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
   1381                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
   1382                 }
   1383             }
   1384         }
   1385         else if (xd->mode_info_context->mbmi.mode == NEWMV)
   1386         {
   1387             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
   1388             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
   1389         }
   1390 
   1391         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
   1392         {
   1393             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
   1394 
   1395             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
   1396             if (!cpi->common.mb_no_coeff_skip)
   1397                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
   1398 
   1399         }
   1400         else
   1401             vp8_stuff_inter16x16(x);
   1402     }
   1403 
   1404     if (!x->skip)
   1405         vp8_tokenize_mb(cpi, xd, t);
   1406     else
   1407     {
   1408         if (cpi->common.mb_no_coeff_skip)
   1409         {
   1410             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
   1411                 xd->mode_info_context->mbmi.dc_diff = 0;
   1412             else
   1413                 xd->mode_info_context->mbmi.dc_diff = 1;
   1414 
   1415             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
   1416             cpi->skip_true_count ++;
   1417             vp8_fix_contexts(xd);
   1418         }
   1419         else
   1420         {
   1421             vp8_stuff_mb(cpi, xd, t);
   1422             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
   1423             cpi->skip_false_count ++;
   1424         }
   1425     }
   1426 
   1427     return rate;
   1428 }
   1429