Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "vpx_config.h"
     12 #include "vp9/common/vp9_loopfilter.h"
     13 #include "vp9/common/vp9_onyxc_int.h"
     14 #include "vp9/common/vp9_reconinter.h"
     15 #include "vpx_mem/vpx_mem.h"
     16 
     17 #include "vp9/common/vp9_seg_common.h"
     18 
     19 struct loop_filter_info {
     20   const uint8_t *mblim;
     21   const uint8_t *lim;
     22   const uint8_t *hev_thr;
     23 };
     24 
     25 // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
     26 // Each 1 bit represents a position in which we want to apply the loop filter.
     27 // Left_ entries refer to whether we apply a filter on the border to the
     28 // left of the block.   Above_ entries refer to whether or not to apply a
     29 // filter on the above border.   Int_ entries refer to whether or not to
     30 // apply borders on the 4x4 edges within the 8x8 block that each bit
     31 // represents.
     32 // Since each transform is accompanied by a potentially different type of
     33 // loop filter there is a different entry in the array for each transform size.
     34 typedef struct {
     35   uint64_t left_y[TX_SIZES];
     36   uint64_t above_y[TX_SIZES];
     37   uint64_t int_4x4_y;
     38   uint16_t left_uv[TX_SIZES];
     39   uint16_t above_uv[TX_SIZES];
     40   uint16_t int_4x4_uv;
     41 } LOOP_FILTER_MASK;
     42 
     43 // 64 bit masks for left transform size.  Each 1 represents a position where
     44 // we should apply a loop filter across the left border of an 8x8 block
     45 // boundary.
     46 //
     47 // In the case of TX_16X16->  ( in low order byte first we end up with
     48 // a mask that looks like this
     49 //
     50 //    10101010
     51 //    10101010
     52 //    10101010
     53 //    10101010
     54 //    10101010
     55 //    10101010
     56 //    10101010
     57 //    10101010
     58 //
     59 // A loopfilter should be applied to every other 8x8 horizontally.
     60 static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
     61     0xffffffffffffffff,  // TX_4X4
     62     0xffffffffffffffff,  // TX_8x8
     63     0x5555555555555555,  // TX_16x16
     64     0x1111111111111111,  // TX_32x32
     65 };
     66 
     67 // 64 bit masks for above transform size.  Each 1 represents a position where
     68 // we should apply a loop filter across the top border of an 8x8 block
     69 // boundary.
     70 //
     71 // In the case of TX_32x32 ->  ( in low order byte first we end up with
     72 // a mask that looks like this
     73 //
     74 //    11111111
     75 //    00000000
     76 //    00000000
     77 //    00000000
     78 //    11111111
     79 //    00000000
     80 //    00000000
     81 //    00000000
     82 //
     83 // A loopfilter should be applied to every other 4 the row vertically.
     84 static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
     85     0xffffffffffffffff,  // TX_4X4
     86     0xffffffffffffffff,  // TX_8x8
     87     0x00ff00ff00ff00ff,  // TX_16x16
     88     0x000000ff000000ff,  // TX_32x32
     89 };
     90 
     91 // 64 bit masks for prediction sizes (left).  Each 1 represents a position
     92 // where left border of an 8x8 block.  These are aligned to the right most
     93 // appropriate bit,  and then shifted into place.
     94 //
     95 // In the case of TX_16x32 ->  ( low order byte first ) we end up with
     96 // a mask that looks like this :
     97 //
     98 //  10000000
     99 //  10000000
    100 //  10000000
    101 //  10000000
    102 //  00000000
    103 //  00000000
    104 //  00000000
    105 //  00000000
    106 static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
    107     0x0000000000000001,  // BLOCK_4X4,
    108     0x0000000000000001,  // BLOCK_4X8,
    109     0x0000000000000001,  // BLOCK_8X4,
    110     0x0000000000000001,  // BLOCK_8X8,
    111     0x0000000000000101,  // BLOCK_8X16,
    112     0x0000000000000001,  // BLOCK_16X8,
    113     0x0000000000000101,  // BLOCK_16X16,
    114     0x0000000001010101,  // BLOCK_16X32,
    115     0x0000000000000101,  // BLOCK_32X16,
    116     0x0000000001010101,  // BLOCK_32X32,
    117     0x0101010101010101,  // BLOCK_32X64,
    118     0x0000000001010101,  // BLOCK_64X32,
    119     0x0101010101010101,  // BLOCK_64X64
    120 };
    121 
    122 // 64 bit mask to shift and set for each prediction size.
    123 static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
    124     0x0000000000000001,  // BLOCK_4X4
    125     0x0000000000000001,  // BLOCK_4X8
    126     0x0000000000000001,  // BLOCK_8X4
    127     0x0000000000000001,  // BLOCK_8X8
    128     0x0000000000000001,  // BLOCK_8X16,
    129     0x0000000000000003,  // BLOCK_16X8
    130     0x0000000000000003,  // BLOCK_16X16
    131     0x0000000000000003,  // BLOCK_16X32,
    132     0x000000000000000f,  // BLOCK_32X16,
    133     0x000000000000000f,  // BLOCK_32X32,
    134     0x000000000000000f,  // BLOCK_32X64,
    135     0x00000000000000ff,  // BLOCK_64X32,
    136     0x00000000000000ff,  // BLOCK_64X64
    137 };
    138 // 64 bit mask to shift and set for each prediction size.  A bit is set for
    139 // each 8x8 block that would be in the left most block of the given block
    140 // size in the 64x64 block.
    141 static const uint64_t size_mask[BLOCK_SIZES] = {
    142     0x0000000000000001,  // BLOCK_4X4
    143     0x0000000000000001,  // BLOCK_4X8
    144     0x0000000000000001,  // BLOCK_8X4
    145     0x0000000000000001,  // BLOCK_8X8
    146     0x0000000000000101,  // BLOCK_8X16,
    147     0x0000000000000003,  // BLOCK_16X8
    148     0x0000000000000303,  // BLOCK_16X16
    149     0x0000000003030303,  // BLOCK_16X32,
    150     0x0000000000000f0f,  // BLOCK_32X16,
    151     0x000000000f0f0f0f,  // BLOCK_32X32,
    152     0x0f0f0f0f0f0f0f0f,  // BLOCK_32X64,
    153     0x00000000ffffffff,  // BLOCK_64X32,
    154     0xffffffffffffffff,  // BLOCK_64X64
    155 };
    156 
    157 // These are used for masking the left and above borders.
    158 static const uint64_t left_border =  0x1111111111111111;
    159 static const uint64_t above_border = 0x000000ff000000ff;
    160 
    161 // 16 bit masks for uv transform sizes.
    162 static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
    163     0xffff,  // TX_4X4
    164     0xffff,  // TX_8x8
    165     0x5555,  // TX_16x16
    166     0x1111,  // TX_32x32
    167 };
    168 
    169 static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
    170     0xffff,  // TX_4X4
    171     0xffff,  // TX_8x8
    172     0x0f0f,  // TX_16x16
    173     0x000f,  // TX_32x32
    174 };
    175 
    176 // 16 bit left mask to shift and set for each uv prediction size.
    177 static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
    178     0x0001,  // BLOCK_4X4,
    179     0x0001,  // BLOCK_4X8,
    180     0x0001,  // BLOCK_8X4,
    181     0x0001,  // BLOCK_8X8,
    182     0x0001,  // BLOCK_8X16,
    183     0x0001,  // BLOCK_16X8,
    184     0x0001,  // BLOCK_16X16,
    185     0x0011,  // BLOCK_16X32,
    186     0x0001,  // BLOCK_32X16,
    187     0x0011,  // BLOCK_32X32,
    188     0x1111,  // BLOCK_32X64
    189     0x0011,  // BLOCK_64X32,
    190     0x1111,  // BLOCK_64X64
    191 };
    192 // 16 bit above mask to shift and set for uv each prediction size.
    193 static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
    194     0x0001,  // BLOCK_4X4
    195     0x0001,  // BLOCK_4X8
    196     0x0001,  // BLOCK_8X4
    197     0x0001,  // BLOCK_8X8
    198     0x0001,  // BLOCK_8X16,
    199     0x0001,  // BLOCK_16X8
    200     0x0001,  // BLOCK_16X16
    201     0x0001,  // BLOCK_16X32,
    202     0x0003,  // BLOCK_32X16,
    203     0x0003,  // BLOCK_32X32,
    204     0x0003,  // BLOCK_32X64,
    205     0x000f,  // BLOCK_64X32,
    206     0x000f,  // BLOCK_64X64
    207 };
    208 
    209 // 64 bit mask to shift and set for each uv prediction size
    210 static const uint16_t size_mask_uv[BLOCK_SIZES] = {
    211     0x0001,  // BLOCK_4X4
    212     0x0001,  // BLOCK_4X8
    213     0x0001,  // BLOCK_8X4
    214     0x0001,  // BLOCK_8X8
    215     0x0001,  // BLOCK_8X16,
    216     0x0001,  // BLOCK_16X8
    217     0x0001,  // BLOCK_16X16
    218     0x0011,  // BLOCK_16X32,
    219     0x0003,  // BLOCK_32X16,
    220     0x0033,  // BLOCK_32X32,
    221     0x3333,  // BLOCK_32X64,
    222     0x00ff,  // BLOCK_64X32,
    223     0xffff,  // BLOCK_64X64
    224 };
    225 static const uint16_t left_border_uv =  0x1111;
    226 static const uint16_t above_border_uv = 0x000f;
    227 
    228 
    229 static void lf_init_lut(loop_filter_info_n *lfi) {
    230   lfi->mode_lf_lut[DC_PRED] = 0;
    231   lfi->mode_lf_lut[D45_PRED] = 0;
    232   lfi->mode_lf_lut[D135_PRED] = 0;
    233   lfi->mode_lf_lut[D117_PRED] = 0;
    234   lfi->mode_lf_lut[D153_PRED] = 0;
    235   lfi->mode_lf_lut[D207_PRED] = 0;
    236   lfi->mode_lf_lut[D63_PRED] = 0;
    237   lfi->mode_lf_lut[V_PRED] = 0;
    238   lfi->mode_lf_lut[H_PRED] = 0;
    239   lfi->mode_lf_lut[TM_PRED] = 0;
    240   lfi->mode_lf_lut[ZEROMV]  = 0;
    241   lfi->mode_lf_lut[NEARESTMV] = 1;
    242   lfi->mode_lf_lut[NEARMV] = 1;
    243   lfi->mode_lf_lut[NEWMV] = 1;
    244 }
    245 
    246 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
    247   int lvl;
    248 
    249   // For each possible value for the loop filter fill out limits
    250   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
    251     // Set loop filter paramaeters that control sharpness.
    252     int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
    253 
    254     if (sharpness_lvl > 0) {
    255       if (block_inside_limit > (9 - sharpness_lvl))
    256         block_inside_limit = (9 - sharpness_lvl);
    257     }
    258 
    259     if (block_inside_limit < 1)
    260       block_inside_limit = 1;
    261 
    262     vpx_memset(lfi->lim[lvl], block_inside_limit, SIMD_WIDTH);
    263     vpx_memset(lfi->mblim[lvl], (2 * (lvl + 2) + block_inside_limit),
    264                SIMD_WIDTH);
    265   }
    266 }
    267 
    268 void vp9_loop_filter_init(VP9_COMMON *cm) {
    269   loop_filter_info_n *lfi = &cm->lf_info;
    270   struct loopfilter *lf = &cm->lf;
    271   int i;
    272 
    273   // init limits for given sharpness
    274   update_sharpness(lfi, lf->sharpness_level);
    275   lf->last_sharpness_level = lf->sharpness_level;
    276 
    277   // init LUT for lvl  and hev thr picking
    278   lf_init_lut(lfi);
    279 
    280   // init hev threshold const vectors
    281   for (i = 0; i < 4; i++)
    282     vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
    283 }
    284 
    285 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
    286   int seg_id;
    287   // n_shift is the a multiplier for lf_deltas
    288   // the multiplier is 1 for when filter_lvl is between 0 and 31;
    289   // 2 when filter_lvl is between 32 and 63
    290   const int n_shift = default_filt_lvl >> 5;
    291   loop_filter_info_n *const lfi = &cm->lf_info;
    292   struct loopfilter *const lf = &cm->lf;
    293   struct segmentation *const seg = &cm->seg;
    294 
    295   // update limits if sharpness has changed
    296   if (lf->last_sharpness_level != lf->sharpness_level) {
    297     update_sharpness(lfi, lf->sharpness_level);
    298     lf->last_sharpness_level = lf->sharpness_level;
    299   }
    300 
    301   for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
    302     int lvl_seg = default_filt_lvl, ref, mode, intra_lvl;
    303 
    304     // Set the baseline filter values for each segment
    305     if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
    306       const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
    307       lvl_seg = seg->abs_delta == SEGMENT_ABSDATA
    308                   ? data
    309                   : clamp(default_filt_lvl + data, 0, MAX_LOOP_FILTER);
    310     }
    311 
    312     if (!lf->mode_ref_delta_enabled) {
    313       // we could get rid of this if we assume that deltas are set to
    314       // zero when not in use; encoder always uses deltas
    315       vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
    316       continue;
    317     }
    318 
    319     intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
    320     lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
    321 
    322     for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
    323       for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
    324         const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
    325                                       + (lf->mode_deltas[mode] << n_shift);
    326         lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
    327       }
    328   }
    329 }
    330 
    331 static int build_lfi(const loop_filter_info_n *lfi_n,
    332                      const MB_MODE_INFO *mbmi,
    333                      struct loop_filter_info *lfi) {
    334   const int seg = mbmi->segment_id;
    335   const int ref = mbmi->ref_frame[0];
    336   const int mode = lfi_n->mode_lf_lut[mbmi->mode];
    337   const int filter_level = lfi_n->lvl[seg][ref][mode];
    338 
    339   if (filter_level > 0) {
    340     lfi->mblim = lfi_n->mblim[filter_level];
    341     lfi->lim = lfi_n->lim[filter_level];
    342     lfi->hev_thr = lfi_n->hev_thr[filter_level >> 4];
    343     return 1;
    344   } else {
    345     return 0;
    346   }
    347 }
    348 
    349 static void filter_selectively_vert(uint8_t *s, int pitch,
    350                                     unsigned int mask_16x16,
    351                                     unsigned int mask_8x8,
    352                                     unsigned int mask_4x4,
    353                                     unsigned int mask_4x4_int,
    354                                     const struct loop_filter_info *lfi) {
    355   unsigned int mask;
    356 
    357   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
    358        mask; mask >>= 1) {
    359     if (mask & 1) {
    360       if (mask_16x16 & 1) {
    361         vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
    362                                    lfi->hev_thr);
    363         assert(!(mask_8x8 & 1));
    364         assert(!(mask_4x4 & 1));
    365         assert(!(mask_4x4_int & 1));
    366       } else if (mask_8x8 & 1) {
    367         vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
    368                                         lfi->hev_thr, 1);
    369         assert(!(mask_16x16 & 1));
    370         assert(!(mask_4x4 & 1));
    371       } else if (mask_4x4 & 1) {
    372         vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
    373                                       lfi->hev_thr, 1);
    374         assert(!(mask_16x16 & 1));
    375         assert(!(mask_8x8 & 1));
    376       }
    377     }
    378     if (mask_4x4_int & 1)
    379       vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
    380                                     lfi->hev_thr, 1);
    381     s += 8;
    382     lfi++;
    383     mask_16x16 >>= 1;
    384     mask_8x8 >>= 1;
    385     mask_4x4 >>= 1;
    386     mask_4x4_int >>= 1;
    387   }
    388 }
    389 
    390 static void filter_selectively_horiz(uint8_t *s, int pitch,
    391                                      unsigned int mask_16x16,
    392                                      unsigned int mask_8x8,
    393                                      unsigned int mask_4x4,
    394                                      unsigned int mask_4x4_int,
    395                                      int only_4x4_1,
    396                                      const struct loop_filter_info *lfi) {
    397   unsigned int mask;
    398   int count;
    399 
    400   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
    401        mask; mask >>= count) {
    402     count = 1;
    403     if (mask & 1) {
    404       if (!only_4x4_1) {
    405         if (mask_16x16 & 1) {
    406           if ((mask_16x16 & 3) == 3) {
    407             vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
    408                                          lfi->hev_thr, 2);
    409             count = 2;
    410           } else {
    411             vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
    412                                          lfi->hev_thr, 1);
    413           }
    414           assert(!(mask_8x8 & 1));
    415           assert(!(mask_4x4 & 1));
    416           assert(!(mask_4x4_int & 1));
    417         } else if (mask_8x8 & 1) {
    418           vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
    419                                             lfi->hev_thr, 1);
    420           assert(!(mask_16x16 & 1));
    421           assert(!(mask_4x4 & 1));
    422         } else if (mask_4x4 & 1) {
    423           vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
    424                                           lfi->hev_thr, 1);
    425           assert(!(mask_16x16 & 1));
    426           assert(!(mask_8x8 & 1));
    427         }
    428       }
    429 
    430       if (mask_4x4_int & 1)
    431         vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
    432                                         lfi->lim, lfi->hev_thr, 1);
    433     }
    434     s += 8 * count;
    435     lfi += count;
    436     mask_16x16 >>= count;
    437     mask_8x8 >>= count;
    438     mask_4x4 >>= count;
    439     mask_4x4_int >>= count;
    440   }
    441 }
    442 
    443 // This function ors into the current lfm structure, where to do loop
    444 // filters for the specific mi we are looking at.   It uses information
    445 // including the block_size_type (32x16, 32x32, etc),  the transform size,
    446 // whether there were any coefficients encoded, and the loop filter strength
    447 // block we are currently looking at. Shift is used to position the
    448 // 1's we produce.
    449 // TODO(JBB) Need another function for different resolution color..
    450 static void build_masks(const loop_filter_info_n *const lfi_n,
    451                         const MODE_INFO *mi, const int shift_y,
    452                         const int shift_uv,
    453                         LOOP_FILTER_MASK *lfm) {
    454   const BLOCK_SIZE block_size = mi->mbmi.sb_type;
    455   const TX_SIZE tx_size_y = mi->mbmi.tx_size;
    456   const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
    457   const int skip = mi->mbmi.skip_coeff;
    458   const int seg = mi->mbmi.segment_id;
    459   const int ref = mi->mbmi.ref_frame[0];
    460   const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
    461   const int filter_level = lfi_n->lvl[seg][ref][mode];
    462   uint64_t *left_y = &lfm->left_y[tx_size_y];
    463   uint64_t *above_y = &lfm->above_y[tx_size_y];
    464   uint64_t *int_4x4_y = &lfm->int_4x4_y;
    465   uint16_t *left_uv = &lfm->left_uv[tx_size_uv];
    466   uint16_t *above_uv = &lfm->above_uv[tx_size_uv];
    467   uint16_t *int_4x4_uv = &lfm->int_4x4_uv;
    468 
    469   // If filter level is 0 we don't loop filter.
    470   if (!filter_level)
    471     return;
    472 
    473   // These set 1 in the current block size for the block size edges.
    474   // For instance if the block size is 32x16,   we'll set :
    475   //    above =   1111
    476   //              0000
    477   //    and
    478   //    left  =   1000
    479   //          =   1000
    480   // NOTE : In this example the low bit is left most ( 1000 ) is stored as
    481   //        1,  not 8...
    482   //
    483   // U and v set things on a 16 bit scale.
    484   //
    485   *above_y |= above_prediction_mask[block_size] << shift_y;
    486   *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
    487   *left_y |= left_prediction_mask[block_size] << shift_y;
    488   *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
    489 
    490   // If the block has no coefficients and is not intra we skip applying
    491   // the loop filter on block edges.
    492   if (skip && ref > INTRA_FRAME)
    493     return;
    494 
    495   // Here we are adding a mask for the transform size.  The transform
    496   // size mask is set to be correct for a 64x64 prediction block size. We
    497   // mask to match the size of the block we are working on and then shift it
    498   // into place..
    499   *above_y |= (size_mask[block_size] &
    500                above_64x64_txform_mask[tx_size_y]) << shift_y;
    501   *above_uv |= (size_mask_uv[block_size] &
    502                 above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
    503 
    504   *left_y |= (size_mask[block_size] &
    505               left_64x64_txform_mask[tx_size_y]) << shift_y;
    506   *left_uv |= (size_mask_uv[block_size] &
    507                left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
    508 
    509   // Here we are trying to determine what to do with the internal 4x4 block
    510   // boundaries.  These differ from the 4x4 boundaries on the outside edge of
    511   // an 8x8 in that the internal ones can be skipped and don't depend on
    512   // the prediction block size.
    513   if (tx_size_y == TX_4X4) {
    514     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
    515   }
    516   if (tx_size_uv == TX_4X4) {
    517     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
    518   }
    519 }
    520 
    521 // This function does the same thing as the one above with the exception that
    522 // it only affects the y masks.   It exists because for blocks < 16x16 in size,
    523 // we only update u and v masks on the first block.
    524 static void build_y_mask(const loop_filter_info_n *const lfi_n,
    525                          const MODE_INFO *mi, const int shift_y,
    526                          LOOP_FILTER_MASK *lfm) {
    527   const BLOCK_SIZE block_size = mi->mbmi.sb_type;
    528   const TX_SIZE tx_size_y = mi->mbmi.tx_size;
    529   const int skip = mi->mbmi.skip_coeff;
    530   const int seg = mi->mbmi.segment_id;
    531   const int ref = mi->mbmi.ref_frame[0];
    532   const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
    533   const int filter_level = lfi_n->lvl[seg][ref][mode];
    534   uint64_t *left_y = &lfm->left_y[tx_size_y];
    535   uint64_t *above_y = &lfm->above_y[tx_size_y];
    536   uint64_t *int_4x4_y = &lfm->int_4x4_y;
    537 
    538   if (!filter_level)
    539     return;
    540 
    541   *above_y |= above_prediction_mask[block_size] << shift_y;
    542   *left_y |= left_prediction_mask[block_size] << shift_y;
    543 
    544   if (skip && ref > INTRA_FRAME)
    545     return;
    546 
    547   *above_y |= (size_mask[block_size] &
    548                above_64x64_txform_mask[tx_size_y]) << shift_y;
    549 
    550   *left_y |= (size_mask[block_size] &
    551               left_64x64_txform_mask[tx_size_y]) << shift_y;
    552 
    553   if (tx_size_y == TX_4X4) {
    554     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
    555   }
    556 }
    557 
    558 // This function sets up the bit masks for the entire 64x64 region represented
    559 // by mi_row, mi_col.
    560 // TODO(JBB): This function only works for yv12.
    561 static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
    562                        MODE_INFO **mi_8x8, const int mode_info_stride,
    563                        LOOP_FILTER_MASK *lfm) {
    564   int idx_32, idx_16, idx_8;
    565   const loop_filter_info_n *const lfi_n = &cm->lf_info;
    566   MODE_INFO **mip = mi_8x8;
    567   MODE_INFO **mip2 = mi_8x8;
    568 
    569   // These are offsets to the next mi in the 64x64 block. It is what gets
    570   // added to the mi ptr as we go through each loop.  It helps us to avoids
    571   // setting up special row and column counters for each index.  The last step
    572   // brings us out back to the starting position.
    573   const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4,
    574                            -(mode_info_stride << 2) - 4};
    575   const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2,
    576                            -(mode_info_stride << 1) - 2};
    577   const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1};
    578 
    579   // Following variables represent shifts to position the current block
    580   // mask over the appropriate block.   A shift of 36 to the left will move
    581   // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
    582   // 4 rows to the appropriate spot.
    583   const int shift_32_y[] = {0, 4, 32, 36};
    584   const int shift_16_y[] = {0, 2, 16, 18};
    585   const int shift_8_y[] = {0, 1, 8, 9};
    586   const int shift_32_uv[] = {0, 2, 8, 10};
    587   const int shift_16_uv[] = {0, 1, 4, 5};
    588   int i;
    589   const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
    590                         cm->mi_rows - mi_row : MI_BLOCK_SIZE);
    591   const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
    592                         cm->mi_cols - mi_col : MI_BLOCK_SIZE);
    593 
    594   vp9_zero(*lfm);
    595 
    596   // TODO(jimbankoski): Try moving most of the following code into decode
    597   // loop and storing lfm in the mbmi structure so that we don't have to go
    598   // through the recursive loop structure multiple times.
    599   switch (mip[0]->mbmi.sb_type) {
    600     case BLOCK_64X64:
    601       build_masks(lfi_n, mip[0] , 0, 0, lfm);
    602       break;
    603     case BLOCK_64X32:
    604       build_masks(lfi_n, mip[0], 0, 0, lfm);
    605       mip2 = mip + mode_info_stride * 4;
    606       if (4 >= max_rows)
    607         break;
    608       build_masks(lfi_n, mip2[0], 32, 8, lfm);
    609       break;
    610     case BLOCK_32X64:
    611       build_masks(lfi_n, mip[0], 0, 0, lfm);
    612       mip2 = mip + 4;
    613       if (4 >= max_cols)
    614         break;
    615       build_masks(lfi_n, mip2[0], 4, 2, lfm);
    616       break;
    617     default:
    618       for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
    619         const int shift_y = shift_32_y[idx_32];
    620         const int shift_uv = shift_32_uv[idx_32];
    621         const int mi_32_col_offset = ((idx_32 & 1) << 2);
    622         const int mi_32_row_offset = ((idx_32 >> 1) << 2);
    623         if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
    624           continue;
    625         switch (mip[0]->mbmi.sb_type) {
    626           case BLOCK_32X32:
    627             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    628             break;
    629           case BLOCK_32X16:
    630             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    631             if (mi_32_row_offset + 2 >= max_rows)
    632               continue;
    633             mip2 = mip + mode_info_stride * 2;
    634             build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
    635             break;
    636           case BLOCK_16X32:
    637             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    638             if (mi_32_col_offset + 2 >= max_cols)
    639               continue;
    640             mip2 = mip + 2;
    641             build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
    642             break;
    643           default:
    644             for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
    645               const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
    646               const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
    647               const int mi_16_col_offset = mi_32_col_offset +
    648                   ((idx_16 & 1) << 1);
    649               const int mi_16_row_offset = mi_32_row_offset +
    650                   ((idx_16 >> 1) << 1);
    651 
    652               if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
    653                 continue;
    654 
    655               switch (mip[0]->mbmi.sb_type) {
    656                 case BLOCK_16X16:
    657                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    658                   break;
    659                 case BLOCK_16X8:
    660                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    661                   if (mi_16_row_offset + 1 >= max_rows)
    662                     continue;
    663                   mip2 = mip + mode_info_stride;
    664                   build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
    665                   break;
    666                 case BLOCK_8X16:
    667                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    668                   if (mi_16_col_offset +1 >= max_cols)
    669                     continue;
    670                   mip2 = mip + 1;
    671                   build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
    672                   break;
    673                 default: {
    674                   const int shift_y = shift_32_y[idx_32] +
    675                                       shift_16_y[idx_16] +
    676                                       shift_8_y[0];
    677                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
    678                   mip += offset[0];
    679                   for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
    680                     const int shift_y = shift_32_y[idx_32] +
    681                                         shift_16_y[idx_16] +
    682                                         shift_8_y[idx_8];
    683                     const int mi_8_col_offset = mi_16_col_offset +
    684                         ((idx_8 & 1));
    685                     const int mi_8_row_offset = mi_16_row_offset +
    686                         ((idx_8 >> 1));
    687 
    688                     if (mi_8_col_offset >= max_cols ||
    689                         mi_8_row_offset >= max_rows)
    690                       continue;
    691                     build_y_mask(lfi_n, mip[0], shift_y, lfm);
    692                   }
    693                   break;
    694                 }
    695               }
    696             }
    697             break;
    698         }
    699       }
    700       break;
    701   }
    702   // The largest loopfilter we have is 16x16 so we use the 16x16 mask
    703   // for 32x32 transforms also also.
    704   lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
    705   lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
    706   lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
    707   lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
    708 
    709   // We do at least 8 tap filter on every 32x32 even if the transform size
    710   // is 4x4.  So if the 4x4 is set on a border pixel add it to the 8x8 and
    711   // remove it from the 4x4.
    712   lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
    713   lfm->left_y[TX_4X4] &= ~left_border;
    714   lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
    715   lfm->above_y[TX_4X4] &= ~above_border;
    716   lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
    717   lfm->left_uv[TX_4X4] &= ~left_border_uv;
    718   lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
    719   lfm->above_uv[TX_4X4] &= ~above_border_uv;
    720 
    721   // We do some special edge handling.
    722   if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
    723     const uint64_t rows = cm->mi_rows - mi_row;
    724 
    725     // Each pixel inside the border gets a 1,
    726     const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
    727     const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);
    728 
    729     // Remove values completely outside our border.
    730     for (i = 0; i < TX_32X32; i++) {
    731       lfm->left_y[i] &= mask_y;
    732       lfm->above_y[i] &= mask_y;
    733       lfm->left_uv[i] &= mask_uv;
    734       lfm->above_uv[i] &= mask_uv;
    735     }
    736     lfm->int_4x4_y &= mask_y;
    737     lfm->int_4x4_uv &= mask_uv;
    738 
    739     // We don't apply a wide loop filter on the last uv block row.  If set
    740     // apply the shorter one instead.
    741     if (rows == 1) {
    742       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
    743       lfm->above_uv[TX_16X16] = 0;
    744     }
    745     if (rows == 5) {
    746       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
    747       lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
    748     }
    749   }
    750 
    751   if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
    752     const uint64_t columns = cm->mi_cols - mi_col;
    753 
    754     // Each pixel inside the border gets a 1, the multiply copies the border
    755     // to where we need it.
    756     const uint64_t mask_y  = (((1 << columns) - 1)) * 0x0101010101010101;
    757     const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
    758 
    759     // Internal edges are not applied on the last column of the image so
    760     // we mask 1 more for the internal edges
    761     const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
    762 
    763     // Remove the bits outside the image edge.
    764     for (i = 0; i < TX_32X32; i++) {
    765       lfm->left_y[i] &= mask_y;
    766       lfm->above_y[i] &= mask_y;
    767       lfm->left_uv[i] &= mask_uv;
    768       lfm->above_uv[i] &= mask_uv;
    769     }
    770     lfm->int_4x4_y &= mask_y;
    771     lfm->int_4x4_uv &= mask_uv_int;
    772 
    773     // We don't apply a wide loop filter on the last uv column.  If set
    774     // apply the shorter one instead.
    775     if (columns == 1) {
    776       lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
    777       lfm->left_uv[TX_16X16] = 0;
    778     }
    779     if (columns == 5) {
    780       lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
    781       lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
    782     }
    783   }
    784   // We don't a loop filter on the first column in the image.  Mask that out.
    785   if (mi_col == 0) {
    786     for (i = 0; i < TX_32X32; i++) {
    787       lfm->left_y[i] &= 0xfefefefefefefefe;
    788       lfm->left_uv[i] &= 0xeeee;
    789     }
    790   }
    791 }
    792 #if CONFIG_NON420
    793 static void filter_block_plane_non420(VP9_COMMON *cm,
    794                                       struct macroblockd_plane *plane,
    795                                       MODE_INFO **mi_8x8,
    796                                       int mi_row, int mi_col) {
    797   const int ss_x = plane->subsampling_x;
    798   const int ss_y = plane->subsampling_y;
    799   const int row_step = 1 << ss_x;
    800   const int col_step = 1 << ss_y;
    801   const int row_step_stride = cm->mode_info_stride * row_step;
    802   struct buf_2d *const dst = &plane->dst;
    803   uint8_t* const dst0 = dst->buf;
    804   unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
    805   unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
    806   unsigned int mask_4x4[MI_BLOCK_SIZE] = {0};
    807   unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
    808   struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
    809   int r, c;
    810 
    811   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    812     unsigned int mask_16x16_c = 0;
    813     unsigned int mask_8x8_c = 0;
    814     unsigned int mask_4x4_c = 0;
    815     unsigned int border_mask;
    816 
    817     // Determine the vertical edges that need filtering
    818     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
    819       const MODE_INFO *mi = mi_8x8[c];
    820       const int skip_this = mi[0].mbmi.skip_coeff
    821                             && is_inter_block(&mi[0].mbmi);
    822       // left edge of current unit is block/partition edge -> no skip
    823       const int block_edge_left = b_width_log2(mi[0].mbmi.sb_type) ?
    824           !(c & ((1 << (b_width_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1;
    825       const int skip_this_c = skip_this && !block_edge_left;
    826       // top edge of current unit is block/partition edge -> no skip
    827       const int block_edge_above = b_height_log2(mi[0].mbmi.sb_type) ?
    828           !(r & ((1 << (b_height_log2(mi[0].mbmi.sb_type)-1)) - 1)) : 1;
    829       const int skip_this_r = skip_this && !block_edge_above;
    830       const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
    831                             ? get_uv_tx_size(&mi[0].mbmi)
    832                             : mi[0].mbmi.tx_size;
    833       const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
    834       const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
    835 
    836       // Filter level can vary per MI
    837       if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
    838         continue;
    839 
    840       // Build masks based on the transform size of each block
    841       if (tx_size == TX_32X32) {
    842         if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
    843           if (!skip_border_4x4_c)
    844             mask_16x16_c |= 1 << (c >> ss_x);
    845           else
    846             mask_8x8_c |= 1 << (c >> ss_x);
    847         }
    848         if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
    849           if (!skip_border_4x4_r)
    850             mask_16x16[r] |= 1 << (c >> ss_x);
    851           else
    852             mask_8x8[r] |= 1 << (c >> ss_x);
    853         }
    854       } else if (tx_size == TX_16X16) {
    855         if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
    856           if (!skip_border_4x4_c)
    857             mask_16x16_c |= 1 << (c >> ss_x);
    858           else
    859             mask_8x8_c |= 1 << (c >> ss_x);
    860         }
    861         if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
    862           if (!skip_border_4x4_r)
    863             mask_16x16[r] |= 1 << (c >> ss_x);
    864           else
    865             mask_8x8[r] |= 1 << (c >> ss_x);
    866         }
    867       } else {
    868         // force 8x8 filtering on 32x32 boundaries
    869         if (!skip_this_c) {
    870           if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
    871             mask_8x8_c |= 1 << (c >> ss_x);
    872           else
    873             mask_4x4_c |= 1 << (c >> ss_x);
    874         }
    875 
    876         if (!skip_this_r) {
    877           if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
    878             mask_8x8[r] |= 1 << (c >> ss_x);
    879           else
    880             mask_4x4[r] |= 1 << (c >> ss_x);
    881         }
    882 
    883         if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
    884           mask_4x4_int[r] |= 1 << (c >> ss_x);
    885       }
    886     }
    887 
    888     // Disable filtering on the leftmost column
    889     border_mask = ~(mi_col == 0);
    890     filter_selectively_vert(dst->buf, dst->stride,
    891                             mask_16x16_c & border_mask,
    892                             mask_8x8_c & border_mask,
    893                             mask_4x4_c & border_mask,
    894                             mask_4x4_int[r], lfi[r]);
    895     dst->buf += 8 * dst->stride;
    896     mi_8x8 += row_step_stride;
    897   }
    898 
    899   // Now do horizontal pass
    900   dst->buf = dst0;
    901   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    902     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
    903     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
    904 
    905     filter_selectively_horiz(dst->buf, dst->stride,
    906                              mask_16x16[r],
    907                              mask_8x8[r],
    908                              mask_4x4[r],
    909                              mask_4x4_int_r, mi_row + r == 0, lfi[r]);
    910     dst->buf += 8 * dst->stride;
    911   }
    912 }
    913 #endif
    914 
    915 static void filter_block_plane(VP9_COMMON *const cm,
    916                                struct macroblockd_plane *const plane,
    917                                MODE_INFO **mi_8x8,
    918                                int mi_row, int mi_col,
    919                                LOOP_FILTER_MASK *lfm) {
    920   const int ss_x = plane->subsampling_x;
    921   const int ss_y = plane->subsampling_y;
    922   const int row_step = 1 << ss_x;
    923   const int col_step = 1 << ss_y;
    924   const int row_step_stride = cm->mode_info_stride * row_step;
    925   struct buf_2d *const dst = &plane->dst;
    926   uint8_t* const dst0 = dst->buf;
    927   unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
    928   struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
    929   int r, c;
    930   int row_shift = 3 - ss_x;
    931   int row_mask = 0xff >> (ss_x << 2);
    932 
    933 #define MASK_ROW(value) ((value >> (r_sampled << row_shift)) & row_mask)
    934 
    935   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    936     int r_sampled = r >> ss_x;
    937 
    938     // Determine the vertical edges that need filtering
    939     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
    940       const MODE_INFO *mi = mi_8x8[c];
    941       if (!build_lfi(&cm->lf_info, &mi[0].mbmi, lfi[r] + (c >> ss_x)))
    942         continue;
    943     }
    944     if (!plane->plane_type) {
    945       mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_y);
    946       // Disable filtering on the leftmost column
    947       filter_selectively_vert(dst->buf, dst->stride,
    948                               MASK_ROW(lfm->left_y[TX_16X16]),
    949                               MASK_ROW(lfm->left_y[TX_8X8]),
    950                               MASK_ROW(lfm->left_y[TX_4X4]),
    951                               MASK_ROW(lfm->int_4x4_y),
    952                               lfi[r]);
    953     } else {
    954       mask_4x4_int[r] = MASK_ROW(lfm->int_4x4_uv);
    955       // Disable filtering on the leftmost column
    956       filter_selectively_vert(dst->buf, dst->stride,
    957                               MASK_ROW(lfm->left_uv[TX_16X16]),
    958                               MASK_ROW(lfm->left_uv[TX_8X8]),
    959                               MASK_ROW(lfm->left_uv[TX_4X4]),
    960                               MASK_ROW(lfm->int_4x4_uv),
    961                               lfi[r]);
    962     }
    963     dst->buf += 8 * dst->stride;
    964     mi_8x8 += row_step_stride;
    965   }
    966 
    967   // Now do horizontal pass
    968   dst->buf = dst0;
    969   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    970     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
    971     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
    972     int r_sampled = r >> ss_x;
    973 
    974     if (!plane->plane_type) {
    975       filter_selectively_horiz(dst->buf, dst->stride,
    976                                MASK_ROW(lfm->above_y[TX_16X16]),
    977                                MASK_ROW(lfm->above_y[TX_8X8]),
    978                                MASK_ROW(lfm->above_y[TX_4X4]),
    979                                MASK_ROW(lfm->int_4x4_y),
    980                                mi_row + r == 0, lfi[r]);
    981     } else {
    982       filter_selectively_horiz(dst->buf, dst->stride,
    983                                MASK_ROW(lfm->above_uv[TX_16X16]),
    984                                MASK_ROW(lfm->above_uv[TX_8X8]),
    985                                MASK_ROW(lfm->above_uv[TX_4X4]),
    986                                mask_4x4_int_r,
    987                                mi_row + r == 0, lfi[r]);
    988     }
    989     dst->buf += 8 * dst->stride;
    990   }
    991 #undef MASK_ROW
    992 }
    993 
    994 void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
    995                           VP9_COMMON *cm, MACROBLOCKD *xd,
    996                           int start, int stop, int y_only) {
    997   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
    998   int mi_row, mi_col;
    999   LOOP_FILTER_MASK lfm;
   1000 #if CONFIG_NON420
   1001   int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
   1002       xd->plane[1].subsampling_x == 1);
   1003 #endif
   1004 
   1005   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
   1006     MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
   1007 
   1008     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
   1009       int plane;
   1010 
   1011       setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
   1012 
   1013       // TODO(JBB): Make setup_mask work for non 420.
   1014 #if CONFIG_NON420
   1015       if (use_420)
   1016 #endif
   1017         setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
   1018                    &lfm);
   1019 
   1020       for (plane = 0; plane < num_planes; ++plane) {
   1021 #if CONFIG_NON420
   1022         if (use_420)
   1023 #endif
   1024           filter_block_plane(cm, &xd->plane[plane], mi_8x8 + mi_col, mi_row,
   1025                              mi_col, &lfm);
   1026 #if CONFIG_NON420
   1027         else
   1028           filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
   1029                                     mi_row, mi_col);
   1030 #endif
   1031       }
   1032     }
   1033   }
   1034 }
   1035 
   1036 void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
   1037                            int frame_filter_level,
   1038                            int y_only, int partial) {
   1039   int start_mi_row, end_mi_row, mi_rows_to_filter;
   1040   if (!frame_filter_level) return;
   1041   start_mi_row = 0;
   1042   mi_rows_to_filter = cm->mi_rows;
   1043   if (partial && cm->mi_rows > 8) {
   1044     start_mi_row = cm->mi_rows >> 1;
   1045     start_mi_row &= 0xfffffff8;
   1046     mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
   1047   }
   1048   end_mi_row = start_mi_row + mi_rows_to_filter;
   1049   vp9_loop_filter_frame_init(cm, frame_filter_level);
   1050   vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
   1051                        start_mi_row, end_mi_row,
   1052                        y_only);
   1053 }
   1054 
   1055 int vp9_loop_filter_worker(void *arg1, void *arg2) {
   1056   LFWorkerData *const lf_data = (LFWorkerData*)arg1;
   1057   (void)arg2;
   1058   vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
   1059                        lf_data->start, lf_data->stop, lf_data->y_only);
   1060   return 1;
   1061 }
   1062