Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #ifndef AOM_AV1_COMMON_CFL_H_
     13 #define AOM_AV1_COMMON_CFL_H_
     14 
     15 #include "av1/common/blockd.h"
     16 #include "av1/common/onyxc_int.h"
     17 
     18 // Can we use CfL for the current block?
     19 static INLINE CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) {
     20   const MB_MODE_INFO *mbmi = xd->mi[0];
     21   const BLOCK_SIZE bsize = mbmi->sb_type;
     22   assert(bsize < BLOCK_SIZES_ALL);
     23   if (xd->lossless[mbmi->segment_id]) {
     24     // In lossless, CfL is available when the partition size is equal to the
     25     // transform size.
     26     const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
     27     const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
     28     const int plane_bsize = get_plane_block_size(bsize, ssx, ssy);
     29     return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4);
     30   }
     31   // Spec: CfL is available to luma partitions lesser than or equal to 32x32
     32   return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
     33                             block_size_high[bsize] <= 32);
     34 }
     35 
     36 // Do we need to save the luma pixels from the current block,
     37 // for a possible future CfL prediction?
     38 static INLINE CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
     39                                                   const MACROBLOCKD *xd) {
     40   const MB_MODE_INFO *mbmi = xd->mi[0];
     41 
     42   if (cm->seq_params.monochrome) return CFL_DISALLOWED;
     43 
     44   if (!xd->cfl.is_chroma_reference) {
     45     // For non-chroma-reference blocks, we should always store the luma pixels,
     46     // in case the corresponding chroma-reference block uses CfL.
     47     // Note that this can only happen for block sizes which are <8 on
     48     // their shortest side, as otherwise they would be chroma reference
     49     // blocks.
     50     return CFL_ALLOWED;
     51   }
     52 
     53   // If this block has chroma information, we know whether we're
     54   // actually going to perform a CfL prediction
     55   return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) &&
     56                             mbmi->uv_mode == UV_CFL_PRED);
     57 }
     58 
     59 static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
     60   int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
     61   return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
     62 }
     63 
     64 static INLINE CFL_PRED_TYPE get_cfl_pred_type(PLANE_TYPE plane) {
     65   assert(plane > 0);
     66   return (CFL_PRED_TYPE)(plane - 1);
     67 }
     68 
     69 void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
     70                        TX_SIZE tx_size, int plane);
     71 
     72 void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
     73 
     74 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
     75                   BLOCK_SIZE bsize);
     76 
     77 void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
     78                        CFL_PRED_TYPE pred_plane, int width);
     79 
     80 void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
     81                       TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
     82 
     83 // Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth.
     84 #define CFL_lbd_TYPE uint8_t *cfl_type
     85 #define CFL_hbd_TYPE uint16_t *cfl_type
     86 
     87 // Declare a size-specific wrapper for the size-generic function. The compiler
     88 // will inline the size generic function in here, the advantage is that the size
     89 // will be constant allowing for loop unrolling and other constant propagated
     90 // goodness.
     91 #define CFL_SUBSAMPLE(arch, sub, bd, width, height)                       \
     92   void subsample_##bd##_##sub##_##width##x##height##_##arch(              \
     93       const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) {     \
     94     cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride,    \
     95                                                output_q3, width, height); \
     96   }
     97 
     98 // Declare size-specific wrappers for all valid CfL sizes.
     99 #define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd)                            \
    100   CFL_SUBSAMPLE(arch, sub, bd, 4, 4)                                      \
    101   CFL_SUBSAMPLE(arch, sub, bd, 8, 8)                                      \
    102   CFL_SUBSAMPLE(arch, sub, bd, 16, 16)                                    \
    103   CFL_SUBSAMPLE(arch, sub, bd, 32, 32)                                    \
    104   CFL_SUBSAMPLE(arch, sub, bd, 4, 8)                                      \
    105   CFL_SUBSAMPLE(arch, sub, bd, 8, 4)                                      \
    106   CFL_SUBSAMPLE(arch, sub, bd, 8, 16)                                     \
    107   CFL_SUBSAMPLE(arch, sub, bd, 16, 8)                                     \
    108   CFL_SUBSAMPLE(arch, sub, bd, 16, 32)                                    \
    109   CFL_SUBSAMPLE(arch, sub, bd, 32, 16)                                    \
    110   CFL_SUBSAMPLE(arch, sub, bd, 4, 16)                                     \
    111   CFL_SUBSAMPLE(arch, sub, bd, 16, 4)                                     \
    112   CFL_SUBSAMPLE(arch, sub, bd, 8, 32)                                     \
    113   CFL_SUBSAMPLE(arch, sub, bd, 32, 8)                                     \
    114   cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \
    115       TX_SIZE tx_size) {                                                  \
    116     CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd)                           \
    117     return subfn_##sub[tx_size];                                          \
    118   }
    119 
    120 // Declare an architecture-specific array of function pointers for size-specific
    121 // wrappers.
    122 #define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd)                       \
    123   static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = {      \
    124     subsample_##bd##_##sub##_4x4_##arch,   /* 4x4 */                      \
    125     subsample_##bd##_##sub##_8x8_##arch,   /* 8x8 */                      \
    126     subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */                    \
    127     subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */                    \
    128     NULL,                                  /* 64x64 (invalid CFL size) */ \
    129     subsample_##bd##_##sub##_4x8_##arch,   /* 4x8 */                      \
    130     subsample_##bd##_##sub##_8x4_##arch,   /* 8x4 */                      \
    131     subsample_##bd##_##sub##_8x16_##arch,  /* 8x16 */                     \
    132     subsample_##bd##_##sub##_16x8_##arch,  /* 16x8 */                     \
    133     subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */                    \
    134     subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */                    \
    135     NULL,                                  /* 32x64 (invalid CFL size) */ \
    136     NULL,                                  /* 64x32 (invalid CFL size) */ \
    137     subsample_##bd##_##sub##_4x16_##arch,  /* 4x16  */                    \
    138     subsample_##bd##_##sub##_16x4_##arch,  /* 16x4  */                    \
    139     subsample_##bd##_##sub##_8x32_##arch,  /* 8x32  */                    \
    140     subsample_##bd##_##sub##_32x8_##arch,  /* 32x8  */                    \
    141     NULL,                                  /* 16x64 (invalid CFL size) */ \
    142     NULL,                                  /* 64x16 (invalid CFL size) */ \
    143   };
    144 
    145 // The RTCD script does not support passing in an array, so we wrap it in this
    146 // function.
    147 #define CFL_GET_SUBSAMPLE_FUNCTION(arch)  \
    148   CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
    149   CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
    150   CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \
    151   CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \
    152   CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \
    153   CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd)
    154 
    155 // Declare a size-specific wrapper for the size-generic function. The compiler
    156 // will inline the size generic function in here, the advantage is that the size
    157 // will be constant allowing for loop unrolling and other constant propagated
    158 // goodness.
    159 #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2)   \
    160   void subtract_average_##width##x##height##_##arch(const uint16_t *src, \
    161                                                     int16_t *dst) {      \
    162     subtract_average_##arch(src, dst, width, height, round_offset,       \
    163                             num_pel_log2);                               \
    164   }
    165 
    166 // Declare size-specific wrappers for all valid CfL sizes.
    167 #define CFL_SUB_AVG_FN(arch)                                                \
    168   CFL_SUB_AVG_X(arch, 4, 4, 8, 4)                                           \
    169   CFL_SUB_AVG_X(arch, 4, 8, 16, 5)                                          \
    170   CFL_SUB_AVG_X(arch, 4, 16, 32, 6)                                         \
    171   CFL_SUB_AVG_X(arch, 8, 4, 16, 5)                                          \
    172   CFL_SUB_AVG_X(arch, 8, 8, 32, 6)                                          \
    173   CFL_SUB_AVG_X(arch, 8, 16, 64, 7)                                         \
    174   CFL_SUB_AVG_X(arch, 8, 32, 128, 8)                                        \
    175   CFL_SUB_AVG_X(arch, 16, 4, 32, 6)                                         \
    176   CFL_SUB_AVG_X(arch, 16, 8, 64, 7)                                         \
    177   CFL_SUB_AVG_X(arch, 16, 16, 128, 8)                                       \
    178   CFL_SUB_AVG_X(arch, 16, 32, 256, 9)                                       \
    179   CFL_SUB_AVG_X(arch, 32, 8, 128, 8)                                        \
    180   CFL_SUB_AVG_X(arch, 32, 16, 256, 9)                                       \
    181   CFL_SUB_AVG_X(arch, 32, 32, 512, 10)                                      \
    182   cfl_subtract_average_fn get_subtract_average_fn_##arch(TX_SIZE tx_size) { \
    183     static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {          \
    184       subtract_average_4x4_##arch,   /* 4x4 */                              \
    185       subtract_average_8x8_##arch,   /* 8x8 */                              \
    186       subtract_average_16x16_##arch, /* 16x16 */                            \
    187       subtract_average_32x32_##arch, /* 32x32 */                            \
    188       NULL,                          /* 64x64 (invalid CFL size) */         \
    189       subtract_average_4x8_##arch,   /* 4x8 */                              \
    190       subtract_average_8x4_##arch,   /* 8x4 */                              \
    191       subtract_average_8x16_##arch,  /* 8x16 */                             \
    192       subtract_average_16x8_##arch,  /* 16x8 */                             \
    193       subtract_average_16x32_##arch, /* 16x32 */                            \
    194       subtract_average_32x16_##arch, /* 32x16 */                            \
    195       NULL,                          /* 32x64 (invalid CFL size) */         \
    196       NULL,                          /* 64x32 (invalid CFL size) */         \
    197       subtract_average_4x16_##arch,  /* 4x16 (invalid CFL size) */          \
    198       subtract_average_16x4_##arch,  /* 16x4 (invalid CFL size) */          \
    199       subtract_average_8x32_##arch,  /* 8x32 (invalid CFL size) */          \
    200       subtract_average_32x8_##arch,  /* 32x8 (invalid CFL size) */          \
    201       NULL,                          /* 16x64 (invalid CFL size) */         \
    202       NULL,                          /* 64x16 (invalid CFL size) */         \
    203     };                                                                      \
    204     /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */   \
    205     /* index the function pointer array out of bounds. */                   \
    206     return sub_avg[tx_size % TX_SIZES_ALL];                                 \
    207   }
    208 
    209 // For VSX SIMD optimization, the C versions of width == 4 subtract are
    210 // faster than the VSX. As such, the VSX code calls the C versions.
    211 void subtract_average_4x4_c(const uint16_t *src, int16_t *dst);
    212 void subtract_average_4x8_c(const uint16_t *src, int16_t *dst);
    213 void subtract_average_4x16_c(const uint16_t *src, int16_t *dst);
    214 
    215 #define CFL_PREDICT_lbd(arch, width, height)                                 \
    216   void predict_lbd_##width##x##height##_##arch(const int16_t *pred_buf_q3,   \
    217                                                uint8_t *dst, int dst_stride, \
    218                                                int alpha_q3) {               \
    219     cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width,    \
    220                            height);                                          \
    221   }
    222 
    223 #define CFL_PREDICT_hbd(arch, width, height)                                  \
    224   void predict_hbd_##width##x##height##_##arch(const int16_t *pred_buf_q3,    \
    225                                                uint16_t *dst, int dst_stride, \
    226                                                int alpha_q3, int bd) {        \
    227     cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \
    228                            height);                                           \
    229   }
    230 
    231 // This wrapper exists because clang format does not like calling macros with
    232 // lowercase letters.
    233 #define CFL_PREDICT_X(arch, width, height, bd) \
    234   CFL_PREDICT_##bd(arch, width, height)
    235 
    236 #define CFL_PREDICT_FN(arch, bd)                                          \
    237   CFL_PREDICT_X(arch, 4, 4, bd)                                           \
    238   CFL_PREDICT_X(arch, 4, 8, bd)                                           \
    239   CFL_PREDICT_X(arch, 4, 16, bd)                                          \
    240   CFL_PREDICT_X(arch, 8, 4, bd)                                           \
    241   CFL_PREDICT_X(arch, 8, 8, bd)                                           \
    242   CFL_PREDICT_X(arch, 8, 16, bd)                                          \
    243   CFL_PREDICT_X(arch, 8, 32, bd)                                          \
    244   CFL_PREDICT_X(arch, 16, 4, bd)                                          \
    245   CFL_PREDICT_X(arch, 16, 8, bd)                                          \
    246   CFL_PREDICT_X(arch, 16, 16, bd)                                         \
    247   CFL_PREDICT_X(arch, 16, 32, bd)                                         \
    248   CFL_PREDICT_X(arch, 32, 8, bd)                                          \
    249   CFL_PREDICT_X(arch, 32, 16, bd)                                         \
    250   CFL_PREDICT_X(arch, 32, 32, bd)                                         \
    251   cfl_predict_##bd##_fn get_predict_##bd##_fn_##arch(TX_SIZE tx_size) {   \
    252     static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = {             \
    253       predict_##bd##_4x4_##arch,   /* 4x4 */                              \
    254       predict_##bd##_8x8_##arch,   /* 8x8 */                              \
    255       predict_##bd##_16x16_##arch, /* 16x16 */                            \
    256       predict_##bd##_32x32_##arch, /* 32x32 */                            \
    257       NULL,                        /* 64x64 (invalid CFL size) */         \
    258       predict_##bd##_4x8_##arch,   /* 4x8 */                              \
    259       predict_##bd##_8x4_##arch,   /* 8x4 */                              \
    260       predict_##bd##_8x16_##arch,  /* 8x16 */                             \
    261       predict_##bd##_16x8_##arch,  /* 16x8 */                             \
    262       predict_##bd##_16x32_##arch, /* 16x32 */                            \
    263       predict_##bd##_32x16_##arch, /* 32x16 */                            \
    264       NULL,                        /* 32x64 (invalid CFL size) */         \
    265       NULL,                        /* 64x32 (invalid CFL size) */         \
    266       predict_##bd##_4x16_##arch,  /* 4x16  */                            \
    267       predict_##bd##_16x4_##arch,  /* 16x4  */                            \
    268       predict_##bd##_8x32_##arch,  /* 8x32  */                            \
    269       predict_##bd##_32x8_##arch,  /* 32x8  */                            \
    270       NULL,                        /* 16x64 (invalid CFL size) */         \
    271       NULL,                        /* 64x16 (invalid CFL size) */         \
    272     };                                                                    \
    273     /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
    274     /* index the function pointer array out of bounds. */                 \
    275     return pred[tx_size % TX_SIZES_ALL];                                  \
    276   }
    277 
    278 #endif  // AOM_AV1_COMMON_CFL_H_
    279