Home | History | Annotate | Download | only in aom_dsp
      1 /*
      2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <assert.h>
     13 
     14 #include "aom/aom_integer.h"
     15 #include "aom_ports/mem.h"
     16 #include "aom_dsp/blend.h"
     17 #include "aom_dsp/aom_dsp_common.h"
     18 
     19 #include "config/aom_dsp_rtcd.h"
     20 
     21 // Blending with alpha mask. Mask values come from the range [0, 64],
     22 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
     23 // be the same as dst, or dst can be different from both sources.
     24 
     25 // NOTE(david.barker): The input and output of aom_blend_a64_d16_mask_c() are
     26 // in a higher intermediate precision, and will later be rounded down to pixel
     27 // precision.
     28 // Thus, in order to avoid double-rounding, we want to use normal right shifts
     29 // within this function, not ROUND_POWER_OF_TWO.
     30 // This works because of the identity:
     31 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
     32 //
     33 // In contrast, the output of the non-d16 functions will not be further rounded,
     34 // so we *should* use ROUND_POWER_OF_TWO there.
     35 
     36 void aom_lowbd_blend_a64_d16_mask_c(
     37     uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
     38     uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
     39     const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
     40     ConvolveParams *conv_params) {
     41   int i, j;
     42   const int bd = 8;
     43   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
     44   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
     45                            (1 << (offset_bits - conv_params->round_1 - 1));
     46   const int round_bits =
     47       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
     48 
     49   assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride));
     50   assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride));
     51 
     52   assert(h >= 4);
     53   assert(w >= 4);
     54   assert(IS_POWER_OF_TWO(h));
     55   assert(IS_POWER_OF_TWO(w));
     56 
     57   if (subw == 0 && subh == 0) {
     58     for (i = 0; i < h; ++i) {
     59       for (j = 0; j < w; ++j) {
     60         int32_t res;
     61         const int m = mask[i * mask_stride + j];
     62         res = ((m * (int32_t)src0[i * src0_stride + j] +
     63                 (AOM_BLEND_A64_MAX_ALPHA - m) *
     64                     (int32_t)src1[i * src1_stride + j]) >>
     65                AOM_BLEND_A64_ROUND_BITS);
     66         res -= round_offset;
     67         dst[i * dst_stride + j] =
     68             clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
     69       }
     70     }
     71   } else if (subw == 1 && subh == 1) {
     72     for (i = 0; i < h; ++i) {
     73       for (j = 0; j < w; ++j) {
     74         int32_t res;
     75         const int m = ROUND_POWER_OF_TWO(
     76             mask[(2 * i) * mask_stride + (2 * j)] +
     77                 mask[(2 * i + 1) * mask_stride + (2 * j)] +
     78                 mask[(2 * i) * mask_stride + (2 * j + 1)] +
     79                 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
     80             2);
     81         res = ((m * (int32_t)src0[i * src0_stride + j] +
     82                 (AOM_BLEND_A64_MAX_ALPHA - m) *
     83                     (int32_t)src1[i * src1_stride + j]) >>
     84                AOM_BLEND_A64_ROUND_BITS);
     85         res -= round_offset;
     86         dst[i * dst_stride + j] =
     87             clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
     88       }
     89     }
     90   } else if (subw == 1 && subh == 0) {
     91     for (i = 0; i < h; ++i) {
     92       for (j = 0; j < w; ++j) {
     93         int32_t res;
     94         const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
     95                                     mask[i * mask_stride + (2 * j + 1)]);
     96         res = ((m * (int32_t)src0[i * src0_stride + j] +
     97                 (AOM_BLEND_A64_MAX_ALPHA - m) *
     98                     (int32_t)src1[i * src1_stride + j]) >>
     99                AOM_BLEND_A64_ROUND_BITS);
    100         res -= round_offset;
    101         dst[i * dst_stride + j] =
    102             clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
    103       }
    104     }
    105   } else {
    106     for (i = 0; i < h; ++i) {
    107       for (j = 0; j < w; ++j) {
    108         int32_t res;
    109         const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
    110                                     mask[(2 * i + 1) * mask_stride + j]);
    111         res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] +
    112                          (AOM_BLEND_A64_MAX_ALPHA - m) *
    113                              (int32_t)src1[i * src1_stride + j]) >>
    114                AOM_BLEND_A64_ROUND_BITS);
    115         res -= round_offset;
    116         dst[i * dst_stride + j] =
    117             clip_pixel(ROUND_POWER_OF_TWO(res, round_bits));
    118       }
    119     }
    120   }
    121 }
    122 
    123 void aom_highbd_blend_a64_d16_mask_c(
    124     uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0,
    125     uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride,
    126     const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh,
    127     ConvolveParams *conv_params, const int bd) {
    128   const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
    129   const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
    130                            (1 << (offset_bits - conv_params->round_1 - 1));
    131   const int round_bits =
    132       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
    133   uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
    134 
    135   assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
    136   assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
    137 
    138   assert(h >= 1);
    139   assert(w >= 1);
    140   assert(IS_POWER_OF_TWO(h));
    141   assert(IS_POWER_OF_TWO(w));
    142 
    143   // excerpt from clip_pixel_highbd()
    144   // set saturation_value to (1 << bd) - 1
    145   unsigned int saturation_value;
    146   switch (bd) {
    147     case 8:
    148     default: saturation_value = 255; break;
    149     case 10: saturation_value = 1023; break;
    150     case 12: saturation_value = 4095; break;
    151   }
    152 
    153   if (subw == 0 && subh == 0) {
    154     for (int i = 0; i < h; ++i) {
    155       for (int j = 0; j < w; ++j) {
    156         int32_t res;
    157         const int m = mask[j];
    158         res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
    159                AOM_BLEND_A64_ROUND_BITS);
    160         res -= round_offset;
    161         unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
    162         dst[j] = AOMMIN(v, saturation_value);
    163       }
    164       mask += mask_stride;
    165       src0 += src0_stride;
    166       src1 += src1_stride;
    167       dst += dst_stride;
    168     }
    169   } else if (subw == 1 && subh == 1) {
    170     for (int i = 0; i < h; ++i) {
    171       for (int j = 0; j < w; ++j) {
    172         int32_t res;
    173         const int m = ROUND_POWER_OF_TWO(
    174             mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] +
    175                 mask[mask_stride + 2 * j + 1],
    176             2);
    177         res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
    178               AOM_BLEND_A64_ROUND_BITS;
    179         res -= round_offset;
    180         unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
    181         dst[j] = AOMMIN(v, saturation_value);
    182       }
    183       mask += 2 * mask_stride;
    184       src0 += src0_stride;
    185       src1 += src1_stride;
    186       dst += dst_stride;
    187     }
    188   } else if (subw == 1 && subh == 0) {
    189     for (int i = 0; i < h; ++i) {
    190       for (int j = 0; j < w; ++j) {
    191         int32_t res;
    192         const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]);
    193         res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
    194               AOM_BLEND_A64_ROUND_BITS;
    195         res -= round_offset;
    196         unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
    197         dst[j] = AOMMIN(v, saturation_value);
    198       }
    199       mask += mask_stride;
    200       src0 += src0_stride;
    201       src1 += src1_stride;
    202       dst += dst_stride;
    203     }
    204   } else {
    205     for (int i = 0; i < h; ++i) {
    206       for (int j = 0; j < w; ++j) {
    207         int32_t res;
    208         const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]);
    209         res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >>
    210               AOM_BLEND_A64_ROUND_BITS;
    211         res -= round_offset;
    212         unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits));
    213         dst[j] = AOMMIN(v, saturation_value);
    214       }
    215       mask += 2 * mask_stride;
    216       src0 += src0_stride;
    217       src1 += src1_stride;
    218       dst += dst_stride;
    219     }
    220   }
    221 }
    222 
    223 // Blending with alpha mask. Mask values come from the range [0, 64],
    224 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
    225 // be the same as dst, or dst can be different from both sources.
    226 
    227 void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride,
    228                           const uint8_t *src0, uint32_t src0_stride,
    229                           const uint8_t *src1, uint32_t src1_stride,
    230                           const uint8_t *mask, uint32_t mask_stride, int w,
    231                           int h, int subw, int subh) {
    232   int i, j;
    233 
    234   assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
    235   assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
    236 
    237   assert(h >= 1);
    238   assert(w >= 1);
    239   assert(IS_POWER_OF_TWO(h));
    240   assert(IS_POWER_OF_TWO(w));
    241 
    242   if (subw == 0 && subh == 0) {
    243     for (i = 0; i < h; ++i) {
    244       for (j = 0; j < w; ++j) {
    245         const int m = mask[i * mask_stride + j];
    246         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    247                                                 src1[i * src1_stride + j]);
    248       }
    249     }
    250   } else if (subw == 1 && subh == 1) {
    251     for (i = 0; i < h; ++i) {
    252       for (j = 0; j < w; ++j) {
    253         const int m = ROUND_POWER_OF_TWO(
    254             mask[(2 * i) * mask_stride + (2 * j)] +
    255                 mask[(2 * i + 1) * mask_stride + (2 * j)] +
    256                 mask[(2 * i) * mask_stride + (2 * j + 1)] +
    257                 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
    258             2);
    259         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    260                                                 src1[i * src1_stride + j]);
    261       }
    262     }
    263   } else if (subw == 1 && subh == 0) {
    264     for (i = 0; i < h; ++i) {
    265       for (j = 0; j < w; ++j) {
    266         const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
    267                                     mask[i * mask_stride + (2 * j + 1)]);
    268         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    269                                                 src1[i * src1_stride + j]);
    270       }
    271     }
    272   } else {
    273     for (i = 0; i < h; ++i) {
    274       for (j = 0; j < w; ++j) {
    275         const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
    276                                     mask[(2 * i + 1) * mask_stride + j]);
    277         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    278                                                 src1[i * src1_stride + j]);
    279       }
    280     }
    281   }
    282 }
    283 
    284 void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride,
    285                                  const uint8_t *src0_8, uint32_t src0_stride,
    286                                  const uint8_t *src1_8, uint32_t src1_stride,
    287                                  const uint8_t *mask, uint32_t mask_stride,
    288                                  int w, int h, int subw, int subh, int bd) {
    289   int i, j;
    290   uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
    291   const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
    292   const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
    293   (void)bd;
    294 
    295   assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
    296   assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
    297 
    298   assert(h >= 1);
    299   assert(w >= 1);
    300   assert(IS_POWER_OF_TWO(h));
    301   assert(IS_POWER_OF_TWO(w));
    302 
    303   assert(bd == 8 || bd == 10 || bd == 12);
    304 
    305   if (subw == 0 && subh == 0) {
    306     for (i = 0; i < h; ++i) {
    307       for (j = 0; j < w; ++j) {
    308         const int m = mask[i * mask_stride + j];
    309         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    310                                                 src1[i * src1_stride + j]);
    311       }
    312     }
    313   } else if (subw == 1 && subh == 1) {
    314     for (i = 0; i < h; ++i) {
    315       for (j = 0; j < w; ++j) {
    316         const int m = ROUND_POWER_OF_TWO(
    317             mask[(2 * i) * mask_stride + (2 * j)] +
    318                 mask[(2 * i + 1) * mask_stride + (2 * j)] +
    319                 mask[(2 * i) * mask_stride + (2 * j + 1)] +
    320                 mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
    321             2);
    322         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    323                                                 src1[i * src1_stride + j]);
    324       }
    325     }
    326   } else if (subw == 1 && subh == 0) {
    327     for (i = 0; i < h; ++i) {
    328       for (j = 0; j < w; ++j) {
    329         const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
    330                                     mask[i * mask_stride + (2 * j + 1)]);
    331         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    332                                                 src1[i * src1_stride + j]);
    333       }
    334     }
    335   } else {
    336     for (i = 0; i < h; ++i) {
    337       for (j = 0; j < w; ++j) {
    338         const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
    339                                     mask[(2 * i + 1) * mask_stride + j]);
    340         dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
    341                                                 src1[i * src1_stride + j]);
    342       }
    343     }
    344   }
    345 }
    346