Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_config.h"
     12 #include "./vp9_rtcd.h"
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "./vpx_scale_rtcd.h"
     15 
     16 #include "vp9/common/vp9_onyxc_int.h"
     17 #include "vp9/common/vp9_postproc.h"
     18 
     19 // TODO(jackychen): Replace this function with SSE2 code. There is
     20 // one SSE2 implementation in vp8, so will consider how to share it
     21 // between vp8 and vp9.
     22 static void filter_by_weight(const uint8_t *src, int src_stride, uint8_t *dst,
     23                              int dst_stride, int block_size, int src_weight) {
     24   const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
     25   const int rounding_bit = 1 << (MFQE_PRECISION - 1);
     26   int r, c;
     27 
     28   for (r = 0; r < block_size; r++) {
     29     for (c = 0; c < block_size; c++) {
     30       dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
     31                MFQE_PRECISION;
     32     }
     33     src += src_stride;
     34     dst += dst_stride;
     35   }
     36 }
     37 
     38 void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst,
     39                                int dst_stride, int src_weight) {
     40   filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
     41 }
     42 
     43 void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
     44                                  uint8_t *dst, int dst_stride, int src_weight) {
     45   filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
     46 }
     47 
     48 static void filter_by_weight32x32(const uint8_t *src, int src_stride,
     49                                   uint8_t *dst, int dst_stride, int weight) {
     50   vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
     51   vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, weight);
     52   vp9_filter_by_weight16x16(src + src_stride * 16, src_stride,
     53                             dst + dst_stride * 16, dst_stride, weight);
     54   vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
     55                             dst + dst_stride * 16 + 16, dst_stride, weight);
     56 }
     57 
     58 static void filter_by_weight64x64(const uint8_t *src, int src_stride,
     59                                   uint8_t *dst, int dst_stride, int weight) {
     60   filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
     61   filter_by_weight32x32(src + 32, src_stride, dst + 32, dst_stride, weight);
     62   filter_by_weight32x32(src + src_stride * 32, src_stride,
     63                         dst + dst_stride * 32, dst_stride, weight);
     64   filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
     65                         dst + dst_stride * 32 + 32, dst_stride, weight);
     66 }
     67 
     68 static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
     69                           int yd_stride, const uint8_t *u, const uint8_t *v,
     70                           int uv_stride, uint8_t *ud, uint8_t *vd,
     71                           int uvd_stride, BLOCK_SIZE block_size, int weight) {
     72   if (block_size == BLOCK_16X16) {
     73     vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
     74     vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
     75     vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
     76   } else if (block_size == BLOCK_32X32) {
     77     filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
     78     vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
     79     vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
     80   } else if (block_size == BLOCK_64X64) {
     81     filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
     82     filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
     83     filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
     84   }
     85 }
     86 
     87 // TODO(jackychen): Determine whether replace it with assembly code.
     88 static void copy_mem8x8(const uint8_t *src, int src_stride, uint8_t *dst,
     89                         int dst_stride) {
     90   int r;
     91   for (r = 0; r < 8; r++) {
     92     memcpy(dst, src, 8);
     93     src += src_stride;
     94     dst += dst_stride;
     95   }
     96 }
     97 
     98 static void copy_mem16x16(const uint8_t *src, int src_stride, uint8_t *dst,
     99                           int dst_stride) {
    100   int r;
    101   for (r = 0; r < 16; r++) {
    102     memcpy(dst, src, 16);
    103     src += src_stride;
    104     dst += dst_stride;
    105   }
    106 }
    107 
    108 static void copy_mem32x32(const uint8_t *src, int src_stride, uint8_t *dst,
    109                           int dst_stride) {
    110   copy_mem16x16(src, src_stride, dst, dst_stride);
    111   copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
    112   copy_mem16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16,
    113                 dst_stride);
    114   copy_mem16x16(src + src_stride * 16 + 16, src_stride,
    115                 dst + dst_stride * 16 + 16, dst_stride);
    116 }
    117 
    118 static void copy_mem64x64(const uint8_t *src, int src_stride, uint8_t *dst,
    119                           int dst_stride) {
    120   copy_mem32x32(src, src_stride, dst, dst_stride);
    121   copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
    122   copy_mem32x32(src + src_stride * 32, src_stride, dst + src_stride * 32,
    123                 dst_stride);
    124   copy_mem32x32(src + src_stride * 32 + 32, src_stride,
    125                 dst + src_stride * 32 + 32, dst_stride);
    126 }
    127 
    128 static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
    129                        int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
    130                        uint8_t *vd, int yd_stride, int uvd_stride,
    131                        BLOCK_SIZE bs) {
    132   if (bs == BLOCK_16X16) {
    133     copy_mem16x16(y, y_stride, yd, yd_stride);
    134     copy_mem8x8(u, uv_stride, ud, uvd_stride);
    135     copy_mem8x8(v, uv_stride, vd, uvd_stride);
    136   } else if (bs == BLOCK_32X32) {
    137     copy_mem32x32(y, y_stride, yd, yd_stride);
    138     copy_mem16x16(u, uv_stride, ud, uvd_stride);
    139     copy_mem16x16(v, uv_stride, vd, uvd_stride);
    140   } else {
    141     copy_mem64x64(y, y_stride, yd, yd_stride);
    142     copy_mem32x32(u, uv_stride, ud, uvd_stride);
    143     copy_mem32x32(v, uv_stride, vd, uvd_stride);
    144   }
    145 }
    146 
    147 static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
    148   const int adj = qdiff >> MFQE_PRECISION;
    149   if (bs == BLOCK_16X16) {
    150     *sad_thr = 7 + adj;
    151   } else if (bs == BLOCK_32X32) {
    152     *sad_thr = 6 + adj;
    153   } else {  // BLOCK_64X64
    154     *sad_thr = 5 + adj;
    155   }
    156   *vdiff_thr = 125 + qdiff;
    157 }
    158 
    159 static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
    160                        const uint8_t *v, int y_stride, int uv_stride,
    161                        uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
    162                        int uvd_stride, int qdiff) {
    163   int sad, sad_thr, vdiff, vdiff_thr;
    164   uint32_t sse;
    165 
    166   get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
    167 
    168   if (bs == BLOCK_16X16) {
    169     vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
    170     sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
    171   } else if (bs == BLOCK_32X32) {
    172     vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
    173     sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
    174   } else /* if (bs == BLOCK_64X64) */ {
    175     vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
    176     sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
    177   }
    178 
    179   // vdiff > sad * 3 means vdiff should not be too small, otherwise,
    180   // it might be a lighting change in smooth area. When there is a
    181   // lighting change in smooth area, it is dangerous to do MFQE.
    182   if (sad > 1 && vdiff > sad * 3) {
    183     const int weight = 1 << MFQE_PRECISION;
    184     int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
    185     // When ifactor equals weight, no MFQE is done.
    186     if (ifactor > weight) {
    187       ifactor = weight;
    188     }
    189     apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
    190                   uvd_stride, bs, ifactor);
    191   } else {
    192     // Copy the block from current frame (i.e., no mfqe is done).
    193     copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride,
    194                bs);
    195   }
    196 }
    197 
    198 static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
    199   // Check the motion in current block(for inter frame),
    200   // or check the motion in the correlated block in last frame (for keyframe).
    201   const int mv_len_square = mi->mv[0].as_mv.row * mi->mv[0].as_mv.row +
    202                             mi->mv[0].as_mv.col * mi->mv[0].as_mv.col;
    203   const int mv_threshold = 100;
    204   return mi->mode >= NEARESTMV &&  // Not an intra block
    205          cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold;
    206 }
    207 
    208 // Process each partiton in a super block, recursively.
    209 static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
    210                            const uint8_t *y, const uint8_t *u, const uint8_t *v,
    211                            int y_stride, int uv_stride, uint8_t *yd,
    212                            uint8_t *ud, uint8_t *vd, int yd_stride,
    213                            int uvd_stride) {
    214   int mi_offset, y_offset, uv_offset;
    215   const BLOCK_SIZE cur_bs = mi->sb_type;
    216   const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
    217   const int bsl = b_width_log2_lookup[bs];
    218   PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
    219   const BLOCK_SIZE subsize = get_subsize(bs, partition);
    220 
    221   if (cur_bs < BLOCK_8X8) {
    222     // If there are blocks smaller than 8x8, it must be on the boundary.
    223     return;
    224   }
    225   // No MFQE on blocks smaller than 16x16
    226   if (bs == BLOCK_16X16) {
    227     partition = PARTITION_NONE;
    228   }
    229   if (bs == BLOCK_64X64) {
    230     mi_offset = 4;
    231     y_offset = 32;
    232     uv_offset = 16;
    233   } else {
    234     mi_offset = 2;
    235     y_offset = 16;
    236     uv_offset = 8;
    237   }
    238   switch (partition) {
    239     BLOCK_SIZE mfqe_bs, bs_tmp;
    240     case PARTITION_HORZ:
    241       if (bs == BLOCK_64X64) {
    242         mfqe_bs = BLOCK_64X32;
    243         bs_tmp = BLOCK_32X32;
    244       } else {
    245         mfqe_bs = BLOCK_32X16;
    246         bs_tmp = BLOCK_16X16;
    247       }
    248       if (mfqe_decision(mi, mfqe_bs)) {
    249         // Do mfqe on the first square partition.
    250         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
    251                    uvd_stride, qdiff);
    252         // Do mfqe on the second square partition.
    253         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
    254                    uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
    255                    yd_stride, uvd_stride, qdiff);
    256       }
    257       if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
    258         // Do mfqe on the first square partition.
    259         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
    260                    v + uv_offset * uv_stride, y_stride, uv_stride,
    261                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    262                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
    263         // Do mfqe on the second square partition.
    264         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
    265                    u + uv_offset * uv_stride + uv_offset,
    266                    v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
    267                    yd + y_offset * yd_stride + y_offset,
    268                    ud + uv_offset * uvd_stride + uv_offset,
    269                    vd + uv_offset * uvd_stride + uv_offset, yd_stride,
    270                    uvd_stride, qdiff);
    271       }
    272       break;
    273     case PARTITION_VERT:
    274       if (bs == BLOCK_64X64) {
    275         mfqe_bs = BLOCK_32X64;
    276         bs_tmp = BLOCK_32X32;
    277       } else {
    278         mfqe_bs = BLOCK_16X32;
    279         bs_tmp = BLOCK_16X16;
    280       }
    281       if (mfqe_decision(mi, mfqe_bs)) {
    282         // Do mfqe on the first square partition.
    283         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
    284                    uvd_stride, qdiff);
    285         // Do mfqe on the second square partition.
    286         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
    287                    v + uv_offset * uv_stride, y_stride, uv_stride,
    288                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    289                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
    290       }
    291       if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
    292         // Do mfqe on the first square partition.
    293         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
    294                    uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
    295                    yd_stride, uvd_stride, qdiff);
    296         // Do mfqe on the second square partition.
    297         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
    298                    u + uv_offset * uv_stride + uv_offset,
    299                    v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
    300                    yd + y_offset * yd_stride + y_offset,
    301                    ud + uv_offset * uvd_stride + uv_offset,
    302                    vd + uv_offset * uvd_stride + uv_offset, yd_stride,
    303                    uvd_stride, qdiff);
    304       }
    305       break;
    306     case PARTITION_NONE:
    307       if (mfqe_decision(mi, cur_bs)) {
    308         // Do mfqe on this partition.
    309         mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
    310                    uvd_stride, qdiff);
    311       } else {
    312         // Copy the block from current frame(i.e., no mfqe is done).
    313         copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
    314                    uvd_stride, bs);
    315       }
    316       break;
    317     case PARTITION_SPLIT:
    318       // Recursion on four square partitions, e.g. if bs is 64X64,
    319       // then look into four 32X32 blocks in it.
    320       mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
    321                      yd_stride, uvd_stride);
    322       mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
    323                      v + uv_offset, y_stride, uv_stride, yd + y_offset,
    324                      ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
    325       mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
    326                      y + y_offset * y_stride, u + uv_offset * uv_stride,
    327                      v + uv_offset * uv_stride, y_stride, uv_stride,
    328                      yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    329                      vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
    330       mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, subsize,
    331                      y + y_offset * y_stride + y_offset,
    332                      u + uv_offset * uv_stride + uv_offset,
    333                      v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
    334                      yd + y_offset * yd_stride + y_offset,
    335                      ud + uv_offset * uvd_stride + uv_offset,
    336                      vd + uv_offset * uvd_stride + uv_offset, yd_stride,
    337                      uvd_stride);
    338       break;
    339     default: assert(0);
    340   }
    341 }
    342 
    343 void vp9_mfqe(VP9_COMMON *cm) {
    344   int mi_row, mi_col;
    345   // Current decoded frame.
    346   const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
    347   // Last decoded frame and will store the MFQE result.
    348   YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
    349   // Loop through each super block.
    350   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
    351     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
    352       MODE_INFO *mi;
    353       MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
    354       // Motion Info in last frame.
    355       MODE_INFO *mi_prev =
    356           cm->postproc_state.prev_mi + (mi_row * cm->mi_stride + mi_col);
    357       const uint32_t y_stride = show->y_stride;
    358       const uint32_t uv_stride = show->uv_stride;
    359       const uint32_t yd_stride = dest->y_stride;
    360       const uint32_t uvd_stride = dest->uv_stride;
    361       const uint32_t row_offset_y = mi_row << 3;
    362       const uint32_t row_offset_uv = mi_row << 2;
    363       const uint32_t col_offset_y = mi_col << 3;
    364       const uint32_t col_offset_uv = mi_col << 2;
    365       const uint8_t *y =
    366           show->y_buffer + row_offset_y * y_stride + col_offset_y;
    367       const uint8_t *u =
    368           show->u_buffer + row_offset_uv * uv_stride + col_offset_uv;
    369       const uint8_t *v =
    370           show->v_buffer + row_offset_uv * uv_stride + col_offset_uv;
    371       uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
    372       uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + col_offset_uv;
    373       uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + col_offset_uv;
    374       if (frame_is_intra_only(cm)) {
    375         mi = mi_prev;
    376       } else {
    377         mi = mi_local;
    378       }
    379       mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
    380                      vd, yd_stride, uvd_stride);
    381     }
    382   }
    383 }
    384