1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 14 #include "aom/aom_integer.h" 15 #include "aom_ports/mem.h" 16 #include "aom_dsp/blend.h" 17 #include "aom_dsp/aom_dsp_common.h" 18 19 #include "config/aom_dsp_rtcd.h" 20 21 // Blending with alpha mask. Mask values come from the range [0, 64], 22 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can 23 // be the same as dst, or dst can be different from both sources. 24 25 // NOTE(david.barker): The input and output of aom_blend_a64_d16_mask_c() are 26 // in a higher intermediate precision, and will later be rounded down to pixel 27 // precision. 28 // Thus, in order to avoid double-rounding, we want to use normal right shifts 29 // within this function, not ROUND_POWER_OF_TWO. 30 // This works because of the identity: 31 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z) 32 // 33 // In contrast, the output of the non-d16 functions will not be further rounded, 34 // so we *should* use ROUND_POWER_OF_TWO there. 35 36 void aom_lowbd_blend_a64_d16_mask_c( 37 uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, 38 uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, 39 const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, 40 ConvolveParams *conv_params) { 41 int i, j; 42 const int bd = 8; 43 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 44 const int round_offset = (1 << (offset_bits - conv_params->round_1)) + 45 (1 << (offset_bits - conv_params->round_1 - 1)); 46 const int round_bits = 47 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 48 49 assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride)); 50 assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride)); 51 52 assert(h >= 4); 53 assert(w >= 4); 54 assert(IS_POWER_OF_TWO(h)); 55 assert(IS_POWER_OF_TWO(w)); 56 57 if (subw == 0 && subh == 0) { 58 for (i = 0; i < h; ++i) { 59 for (j = 0; j < w; ++j) { 60 int32_t res; 61 const int m = mask[i * mask_stride + j]; 62 res = ((m * (int32_t)src0[i * src0_stride + j] + 63 (AOM_BLEND_A64_MAX_ALPHA - m) * 64 (int32_t)src1[i * src1_stride + j]) >> 65 AOM_BLEND_A64_ROUND_BITS); 66 res -= round_offset; 67 dst[i * dst_stride + j] = 68 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 69 } 70 } 71 } else if (subw == 1 && subh == 1) { 72 for (i = 0; i < h; ++i) { 73 for (j = 0; j < w; ++j) { 74 int32_t res; 75 const int m = ROUND_POWER_OF_TWO( 76 mask[(2 * i) * mask_stride + (2 * j)] + 77 mask[(2 * i + 1) * mask_stride + (2 * j)] + 78 mask[(2 * i) * mask_stride + (2 * j + 1)] + 79 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 80 2); 81 res = ((m * (int32_t)src0[i * src0_stride + j] + 82 (AOM_BLEND_A64_MAX_ALPHA - m) * 83 (int32_t)src1[i * src1_stride + j]) >> 84 AOM_BLEND_A64_ROUND_BITS); 85 res -= round_offset; 86 dst[i * dst_stride + j] = 87 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 88 } 89 } 90 } else if (subw == 1 && subh == 0) { 91 for (i = 0; i < h; ++i) { 92 for (j = 0; j < w; ++j) { 93 int32_t res; 94 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 95 mask[i * mask_stride + (2 * j + 1)]); 96 res = ((m * (int32_t)src0[i * src0_stride + j] + 97 (AOM_BLEND_A64_MAX_ALPHA - m) * 98 (int32_t)src1[i * src1_stride + j]) >> 99 AOM_BLEND_A64_ROUND_BITS); 100 res -= round_offset; 101 dst[i * dst_stride + j] = 102 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 103 } 104 } 105 } else { 106 for (i = 0; i < h; ++i) { 107 for (j = 0; j < w; ++j) { 108 int32_t res; 109 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 110 mask[(2 * i + 1) * mask_stride + j]); 111 res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] + 112 (AOM_BLEND_A64_MAX_ALPHA - m) * 113 (int32_t)src1[i * src1_stride + j]) >> 114 AOM_BLEND_A64_ROUND_BITS); 115 res -= round_offset; 116 dst[i * dst_stride + j] = 117 clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); 118 } 119 } 120 } 121 } 122 123 void aom_highbd_blend_a64_d16_mask_c( 124 uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0, 125 uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, 126 const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, 127 ConvolveParams *conv_params, const int bd) { 128 const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; 129 const int round_offset = (1 << (offset_bits - conv_params->round_1)) + 130 (1 << (offset_bits - conv_params->round_1 - 1)); 131 const int round_bits = 132 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; 133 uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); 134 135 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 136 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 137 138 assert(h >= 1); 139 assert(w >= 1); 140 assert(IS_POWER_OF_TWO(h)); 141 assert(IS_POWER_OF_TWO(w)); 142 143 // excerpt from clip_pixel_highbd() 144 // set saturation_value to (1 << bd) - 1 145 unsigned int saturation_value; 146 switch (bd) { 147 case 8: 148 default: saturation_value = 255; break; 149 case 10: saturation_value = 1023; break; 150 case 12: saturation_value = 4095; break; 151 } 152 153 if (subw == 0 && subh == 0) { 154 for (int i = 0; i < h; ++i) { 155 for (int j = 0; j < w; ++j) { 156 int32_t res; 157 const int m = mask[j]; 158 res = ((m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 159 AOM_BLEND_A64_ROUND_BITS); 160 res -= round_offset; 161 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 162 dst[j] = AOMMIN(v, saturation_value); 163 } 164 mask += mask_stride; 165 src0 += src0_stride; 166 src1 += src1_stride; 167 dst += dst_stride; 168 } 169 } else if (subw == 1 && subh == 1) { 170 for (int i = 0; i < h; ++i) { 171 for (int j = 0; j < w; ++j) { 172 int32_t res; 173 const int m = ROUND_POWER_OF_TWO( 174 mask[2 * j] + mask[mask_stride + 2 * j] + mask[2 * j + 1] + 175 mask[mask_stride + 2 * j + 1], 176 2); 177 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 178 AOM_BLEND_A64_ROUND_BITS; 179 res -= round_offset; 180 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 181 dst[j] = AOMMIN(v, saturation_value); 182 } 183 mask += 2 * mask_stride; 184 src0 += src0_stride; 185 src1 += src1_stride; 186 dst += dst_stride; 187 } 188 } else if (subw == 1 && subh == 0) { 189 for (int i = 0; i < h; ++i) { 190 for (int j = 0; j < w; ++j) { 191 int32_t res; 192 const int m = AOM_BLEND_AVG(mask[2 * j], mask[2 * j + 1]); 193 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 194 AOM_BLEND_A64_ROUND_BITS; 195 res -= round_offset; 196 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 197 dst[j] = AOMMIN(v, saturation_value); 198 } 199 mask += mask_stride; 200 src0 += src0_stride; 201 src1 += src1_stride; 202 dst += dst_stride; 203 } 204 } else { 205 for (int i = 0; i < h; ++i) { 206 for (int j = 0; j < w; ++j) { 207 int32_t res; 208 const int m = AOM_BLEND_AVG(mask[j], mask[mask_stride + j]); 209 res = (m * src0[j] + (AOM_BLEND_A64_MAX_ALPHA - m) * src1[j]) >> 210 AOM_BLEND_A64_ROUND_BITS; 211 res -= round_offset; 212 unsigned int v = negative_to_zero(ROUND_POWER_OF_TWO(res, round_bits)); 213 dst[j] = AOMMIN(v, saturation_value); 214 } 215 mask += 2 * mask_stride; 216 src0 += src0_stride; 217 src1 += src1_stride; 218 dst += dst_stride; 219 } 220 } 221 } 222 223 // Blending with alpha mask. Mask values come from the range [0, 64], 224 // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can 225 // be the same as dst, or dst can be different from both sources. 226 227 void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, 228 const uint8_t *src0, uint32_t src0_stride, 229 const uint8_t *src1, uint32_t src1_stride, 230 const uint8_t *mask, uint32_t mask_stride, int w, 231 int h, int subw, int subh) { 232 int i, j; 233 234 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 235 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 236 237 assert(h >= 1); 238 assert(w >= 1); 239 assert(IS_POWER_OF_TWO(h)); 240 assert(IS_POWER_OF_TWO(w)); 241 242 if (subw == 0 && subh == 0) { 243 for (i = 0; i < h; ++i) { 244 for (j = 0; j < w; ++j) { 245 const int m = mask[i * mask_stride + j]; 246 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 247 src1[i * src1_stride + j]); 248 } 249 } 250 } else if (subw == 1 && subh == 1) { 251 for (i = 0; i < h; ++i) { 252 for (j = 0; j < w; ++j) { 253 const int m = ROUND_POWER_OF_TWO( 254 mask[(2 * i) * mask_stride + (2 * j)] + 255 mask[(2 * i + 1) * mask_stride + (2 * j)] + 256 mask[(2 * i) * mask_stride + (2 * j + 1)] + 257 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 258 2); 259 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 260 src1[i * src1_stride + j]); 261 } 262 } 263 } else if (subw == 1 && subh == 0) { 264 for (i = 0; i < h; ++i) { 265 for (j = 0; j < w; ++j) { 266 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 267 mask[i * mask_stride + (2 * j + 1)]); 268 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 269 src1[i * src1_stride + j]); 270 } 271 } 272 } else { 273 for (i = 0; i < h; ++i) { 274 for (j = 0; j < w; ++j) { 275 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 276 mask[(2 * i + 1) * mask_stride + j]); 277 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 278 src1[i * src1_stride + j]); 279 } 280 } 281 } 282 } 283 284 void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, 285 const uint8_t *src0_8, uint32_t src0_stride, 286 const uint8_t *src1_8, uint32_t src1_stride, 287 const uint8_t *mask, uint32_t mask_stride, 288 int w, int h, int subw, int subh, int bd) { 289 int i, j; 290 uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); 291 const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); 292 const uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8); 293 (void)bd; 294 295 assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); 296 assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); 297 298 assert(h >= 1); 299 assert(w >= 1); 300 assert(IS_POWER_OF_TWO(h)); 301 assert(IS_POWER_OF_TWO(w)); 302 303 assert(bd == 8 || bd == 10 || bd == 12); 304 305 if (subw == 0 && subh == 0) { 306 for (i = 0; i < h; ++i) { 307 for (j = 0; j < w; ++j) { 308 const int m = mask[i * mask_stride + j]; 309 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 310 src1[i * src1_stride + j]); 311 } 312 } 313 } else if (subw == 1 && subh == 1) { 314 for (i = 0; i < h; ++i) { 315 for (j = 0; j < w; ++j) { 316 const int m = ROUND_POWER_OF_TWO( 317 mask[(2 * i) * mask_stride + (2 * j)] + 318 mask[(2 * i + 1) * mask_stride + (2 * j)] + 319 mask[(2 * i) * mask_stride + (2 * j + 1)] + 320 mask[(2 * i + 1) * mask_stride + (2 * j + 1)], 321 2); 322 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 323 src1[i * src1_stride + j]); 324 } 325 } 326 } else if (subw == 1 && subh == 0) { 327 for (i = 0; i < h; ++i) { 328 for (j = 0; j < w; ++j) { 329 const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], 330 mask[i * mask_stride + (2 * j + 1)]); 331 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 332 src1[i * src1_stride + j]); 333 } 334 } 335 } else { 336 for (i = 0; i < h; ++i) { 337 for (j = 0; j < w; ++j) { 338 const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], 339 mask[(2 * i + 1) * mask_stride + j]); 340 dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j], 341 src1[i * src1_stride + j]); 342 } 343 } 344 } 345 } 346