Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "./vpx_config.h"
     12 #include "./vp9_rtcd.h"
     13 #include "./vpx_dsp_rtcd.h"
     14 #include "./vpx_scale_rtcd.h"
     15 
     16 #include "vp9/common/vp9_onyxc_int.h"
     17 #include "vp9/common/vp9_postproc.h"
     18 
     19 // TODO(jackychen): Replace this function with SSE2 code. There is
     20 // one SSE2 implementation in vp8, so will consider how to share it
     21 // between vp8 and vp9.
     22 static void filter_by_weight(const uint8_t *src, int src_stride,
     23                              uint8_t *dst, int dst_stride,
     24                              int block_size, int src_weight) {
     25   const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
     26   const int rounding_bit = 1 << (MFQE_PRECISION - 1);
     27   int r, c;
     28 
     29   for (r = 0; r < block_size; r++) {
     30     for (c = 0; c < block_size; c++) {
     31       dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit)
     32                >> MFQE_PRECISION;
     33     }
     34     src += src_stride;
     35     dst += dst_stride;
     36   }
     37 }
     38 
     39 void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride,
     40                                uint8_t *dst, int dst_stride, int src_weight) {
     41   filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
     42 }
     43 
     44 void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
     45                                  uint8_t *dst, int dst_stride,
     46                                  int src_weight) {
     47   filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
     48 }
     49 
     50 static void filter_by_weight32x32(const uint8_t *src, int src_stride,
     51                                   uint8_t *dst, int dst_stride, int weight) {
     52   vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
     53   vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride,
     54                             weight);
     55   vp9_filter_by_weight16x16(src + src_stride * 16, src_stride,
     56                             dst + dst_stride * 16, dst_stride, weight);
     57   vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
     58                             dst + dst_stride * 16 + 16, dst_stride, weight);
     59 }
     60 
     61 static void filter_by_weight64x64(const uint8_t *src, int src_stride,
     62                                   uint8_t *dst, int dst_stride, int weight) {
     63   filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
     64   filter_by_weight32x32(src + 32, src_stride, dst + 32,
     65                         dst_stride, weight);
     66   filter_by_weight32x32(src + src_stride * 32, src_stride,
     67                         dst + dst_stride * 32, dst_stride, weight);
     68   filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
     69                         dst + dst_stride * 32 + 32, dst_stride, weight);
     70 }
     71 
     72 static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
     73                           int yd_stride, const uint8_t *u, const uint8_t *v,
     74                           int uv_stride, uint8_t *ud, uint8_t *vd,
     75                           int uvd_stride, BLOCK_SIZE block_size,
     76                           int weight) {
     77   if (block_size == BLOCK_16X16) {
     78     vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
     79     vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
     80     vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
     81   } else if (block_size == BLOCK_32X32) {
     82     filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
     83     vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
     84     vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
     85   } else if (block_size == BLOCK_64X64) {
     86     filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
     87     filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
     88     filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
     89   }
     90 }
     91 
     92 // TODO(jackychen): Determine whether replace it with assembly code.
     93 static void copy_mem8x8(const uint8_t *src, int src_stride,
     94                         uint8_t *dst, int dst_stride) {
     95   int r;
     96   for (r = 0; r < 8; r++) {
     97     memcpy(dst, src, 8);
     98     src += src_stride;
     99     dst += dst_stride;
    100   }
    101 }
    102 
    103 static void copy_mem16x16(const uint8_t *src, int src_stride,
    104                           uint8_t *dst, int dst_stride) {
    105   int r;
    106   for (r = 0; r < 16; r++) {
    107     memcpy(dst, src, 16);
    108     src += src_stride;
    109     dst += dst_stride;
    110   }
    111 }
    112 
    113 static void copy_mem32x32(const uint8_t *src, int src_stride,
    114                           uint8_t *dst, int dst_stride) {
    115   copy_mem16x16(src, src_stride, dst, dst_stride);
    116   copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
    117   copy_mem16x16(src + src_stride * 16, src_stride,
    118                 dst + dst_stride * 16, dst_stride);
    119   copy_mem16x16(src + src_stride * 16 + 16, src_stride,
    120                 dst + dst_stride * 16 + 16, dst_stride);
    121 }
    122 
    123 static void copy_mem64x64(const uint8_t *src, int src_stride,
    124                           uint8_t *dst, int dst_stride) {
    125   copy_mem32x32(src, src_stride, dst, dst_stride);
    126   copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
    127   copy_mem32x32(src + src_stride * 32, src_stride,
    128                 dst + src_stride * 32, dst_stride);
    129   copy_mem32x32(src + src_stride * 32 + 32, src_stride,
    130                 dst + src_stride * 32 + 32, dst_stride);
    131 }
    132 
    133 static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
    134                        int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
    135                        uint8_t *vd, int yd_stride, int uvd_stride,
    136                        BLOCK_SIZE bs) {
    137   if (bs == BLOCK_16X16) {
    138     copy_mem16x16(y, y_stride, yd, yd_stride);
    139     copy_mem8x8(u, uv_stride, ud, uvd_stride);
    140     copy_mem8x8(v, uv_stride, vd, uvd_stride);
    141   } else if (bs == BLOCK_32X32) {
    142     copy_mem32x32(y, y_stride, yd, yd_stride);
    143     copy_mem16x16(u, uv_stride, ud, uvd_stride);
    144     copy_mem16x16(v, uv_stride, vd, uvd_stride);
    145   } else {
    146     copy_mem64x64(y, y_stride, yd, yd_stride);
    147     copy_mem32x32(u, uv_stride, ud, uvd_stride);
    148     copy_mem32x32(v, uv_stride, vd, uvd_stride);
    149   }
    150 }
    151 
    152 static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
    153   const int adj = qdiff >> MFQE_PRECISION;
    154   if (bs == BLOCK_16X16) {
    155     *sad_thr = 7 + adj;
    156   } else if (bs == BLOCK_32X32) {
    157     *sad_thr = 6 + adj;
    158   } else {  // BLOCK_64X64
    159     *sad_thr = 5 + adj;
    160   }
    161   *vdiff_thr = 125 + qdiff;
    162 }
    163 
    164 static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
    165                        const uint8_t *v, int y_stride, int uv_stride,
    166                        uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
    167                        int uvd_stride, int qdiff) {
    168   int sad, sad_thr, vdiff, vdiff_thr;
    169   uint32_t sse;
    170 
    171   get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
    172 
    173   if (bs == BLOCK_16X16) {
    174     vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
    175     sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
    176   } else if (bs == BLOCK_32X32) {
    177     vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
    178     sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
    179   } else /* if (bs == BLOCK_64X64) */ {
    180     vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
    181     sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
    182   }
    183 
    184   // vdiff > sad * 3 means vdiff should not be too small, otherwise,
    185   // it might be a lighting change in smooth area. When there is a
    186   // lighting change in smooth area, it is dangerous to do MFQE.
    187   if (sad > 1 && vdiff > sad * 3) {
    188     const int weight = 1 << MFQE_PRECISION;
    189     int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
    190     // When ifactor equals weight, no MFQE is done.
    191     if (ifactor > weight) {
    192       ifactor = weight;
    193     }
    194     apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
    195                   uvd_stride, bs, ifactor);
    196   } else {
    197     // Copy the block from current frame (i.e., no mfqe is done).
    198     copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
    199                yd_stride, uvd_stride, bs);
    200   }
    201 }
    202 
    203 static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
    204   // Check the motion in current block(for inter frame),
    205   // or check the motion in the correlated block in last frame (for keyframe).
    206   const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
    207                             mi->mbmi.mv[0].as_mv.row +
    208                             mi->mbmi.mv[0].as_mv.col *
    209                             mi->mbmi.mv[0].as_mv.col;
    210   const int mv_threshold = 100;
    211   return mi->mbmi.mode >= NEARESTMV &&  // Not an intra block
    212          cur_bs >= BLOCK_16X16 &&
    213          mv_len_square <= mv_threshold;
    214 }
    215 
    216 // Process each partiton in a super block, recursively.
    217 static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
    218                            const uint8_t *y, const uint8_t *u,
    219                            const uint8_t *v, int y_stride, int uv_stride,
    220                            uint8_t *yd, uint8_t *ud, uint8_t *vd,
    221                            int yd_stride, int uvd_stride) {
    222   int mi_offset, y_offset, uv_offset;
    223   const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
    224   const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
    225   const int bsl = b_width_log2_lookup[bs];
    226   PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
    227   const BLOCK_SIZE subsize = get_subsize(bs, partition);
    228 
    229   if (cur_bs < BLOCK_8X8) {
    230     // If there are blocks smaller than 8x8, it must be on the boundary.
    231     return;
    232   }
    233   // No MFQE on blocks smaller than 16x16
    234   if (bs == BLOCK_16X16) {
    235     partition = PARTITION_NONE;
    236   }
    237   if (bs == BLOCK_64X64) {
    238     mi_offset = 4;
    239     y_offset = 32;
    240     uv_offset = 16;
    241   } else {
    242     mi_offset = 2;
    243     y_offset = 16;
    244     uv_offset = 8;
    245   }
    246   switch (partition) {
    247     BLOCK_SIZE mfqe_bs, bs_tmp;
    248     case PARTITION_HORZ:
    249       if (bs == BLOCK_64X64) {
    250         mfqe_bs = BLOCK_64X32;
    251         bs_tmp = BLOCK_32X32;
    252       } else {
    253         mfqe_bs = BLOCK_32X16;
    254         bs_tmp = BLOCK_16X16;
    255       }
    256       if (mfqe_decision(mi, mfqe_bs)) {
    257         // Do mfqe on the first square partition.
    258         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
    259                    yd, ud, vd, yd_stride, uvd_stride, qdiff);
    260         // Do mfqe on the second square partition.
    261         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
    262                    y_stride, uv_stride, yd + y_offset, ud + uv_offset,
    263                    vd + uv_offset, yd_stride, uvd_stride, qdiff);
    264       }
    265       if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
    266         // Do mfqe on the first square partition.
    267         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
    268                    v + uv_offset * uv_stride, y_stride, uv_stride,
    269                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    270                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
    271         // Do mfqe on the second square partition.
    272         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
    273                    u + uv_offset * uv_stride + uv_offset,
    274                    v + uv_offset * uv_stride + uv_offset, y_stride,
    275                    uv_stride, yd + y_offset * yd_stride + y_offset,
    276                    ud + uv_offset * uvd_stride + uv_offset,
    277                    vd + uv_offset * uvd_stride + uv_offset,
    278                    yd_stride, uvd_stride, qdiff);
    279       }
    280       break;
    281     case PARTITION_VERT:
    282       if (bs == BLOCK_64X64) {
    283         mfqe_bs = BLOCK_32X64;
    284         bs_tmp = BLOCK_32X32;
    285       } else {
    286         mfqe_bs = BLOCK_16X32;
    287         bs_tmp = BLOCK_16X16;
    288       }
    289       if (mfqe_decision(mi, mfqe_bs)) {
    290         // Do mfqe on the first square partition.
    291         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
    292                    yd, ud, vd, yd_stride, uvd_stride, qdiff);
    293         // Do mfqe on the second square partition.
    294         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
    295                    v + uv_offset * uv_stride, y_stride, uv_stride,
    296                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    297                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
    298       }
    299       if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
    300         // Do mfqe on the first square partition.
    301         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
    302                    y_stride, uv_stride, yd + y_offset, ud + uv_offset,
    303                    vd + uv_offset, yd_stride, uvd_stride, qdiff);
    304         // Do mfqe on the second square partition.
    305         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
    306                    u + uv_offset * uv_stride + uv_offset,
    307                    v + uv_offset * uv_stride + uv_offset, y_stride,
    308                    uv_stride, yd + y_offset * yd_stride + y_offset,
    309                    ud + uv_offset * uvd_stride + uv_offset,
    310                    vd + uv_offset * uvd_stride + uv_offset,
    311                    yd_stride, uvd_stride, qdiff);
    312       }
    313       break;
    314     case PARTITION_NONE:
    315       if (mfqe_decision(mi, cur_bs)) {
    316         // Do mfqe on this partition.
    317         mfqe_block(cur_bs, y, u, v, y_stride, uv_stride,
    318                    yd, ud, vd, yd_stride, uvd_stride, qdiff);
    319       } else {
    320         // Copy the block from current frame(i.e., no mfqe is done).
    321         copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
    322                    yd_stride, uvd_stride, bs);
    323       }
    324       break;
    325     case PARTITION_SPLIT:
    326       // Recursion on four square partitions, e.g. if bs is 64X64,
    327       // then look into four 32X32 blocks in it.
    328       mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
    329                      yd_stride, uvd_stride);
    330       mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
    331                      v + uv_offset, y_stride, uv_stride, yd + y_offset,
    332                      ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
    333       mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
    334                      y + y_offset * y_stride, u + uv_offset * uv_stride,
    335                      v + uv_offset * uv_stride, y_stride, uv_stride,
    336                      yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
    337                      vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
    338       mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset,
    339                      subsize, y + y_offset * y_stride + y_offset,
    340                      u + uv_offset * uv_stride + uv_offset,
    341                      v + uv_offset * uv_stride + uv_offset, y_stride,
    342                      uv_stride, yd + y_offset * yd_stride + y_offset,
    343                      ud + uv_offset * uvd_stride + uv_offset,
    344                      vd + uv_offset * uvd_stride + uv_offset,
    345                      yd_stride, uvd_stride);
    346       break;
    347     default:
    348       assert(0);
    349   }
    350 }
    351 
    352 void vp9_mfqe(VP9_COMMON *cm) {
    353   int mi_row, mi_col;
    354   // Current decoded frame.
    355   const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
    356   // Last decoded frame and will store the MFQE result.
    357   YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
    358   // Loop through each super block.
    359   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
    360     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
    361       MODE_INFO *mi;
    362       MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
    363       // Motion Info in last frame.
    364       MODE_INFO *mi_prev = cm->postproc_state.prev_mi +
    365                            (mi_row * cm->mi_stride + mi_col);
    366       const uint32_t y_stride = show->y_stride;
    367       const uint32_t uv_stride = show->uv_stride;
    368       const uint32_t yd_stride = dest->y_stride;
    369       const uint32_t uvd_stride = dest->uv_stride;
    370       const uint32_t row_offset_y = mi_row << 3;
    371       const uint32_t row_offset_uv = mi_row << 2;
    372       const uint32_t col_offset_y = mi_col << 3;
    373       const uint32_t col_offset_uv = mi_col << 2;
    374       const uint8_t *y = show->y_buffer + row_offset_y * y_stride +
    375                          col_offset_y;
    376       const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride +
    377                          col_offset_uv;
    378       const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride +
    379                          col_offset_uv;
    380       uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
    381       uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride +
    382                     col_offset_uv;
    383       uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride +
    384                     col_offset_uv;
    385       if (frame_is_intra_only(cm)) {
    386         mi = mi_prev;
    387       } else {
    388         mi = mi_local;
    389       }
    390       mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
    391                      vd, yd_stride, uvd_stride);
    392     }
    393   }
    394 }
    395