1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <stdio.h> 14 #include <limits.h> 15 16 #include "config/aom_config.h" 17 #include "config/aom_dsp_rtcd.h" 18 #include "config/aom_scale_rtcd.h" 19 20 #include "aom/aom_integer.h" 21 #include "aom_dsp/blend.h" 22 23 #include "av1/common/blockd.h" 24 #include "av1/common/mvref_common.h" 25 #include "av1/common/reconinter.h" 26 #include "av1/common/reconintra.h" 27 #include "av1/common/onyxc_int.h" 28 #include "av1/common/obmc.h" 29 30 #define USE_PRECOMPUTED_WEDGE_MASK 1 31 #define USE_PRECOMPUTED_WEDGE_SIGN 1 32 33 // This function will determine whether or not to create a warped 34 // prediction. 35 int av1_allow_warp(const MB_MODE_INFO *const mbmi, 36 const WarpTypesAllowed *const warp_types, 37 const WarpedMotionParams *const gm_params, 38 int build_for_obmc, const struct scale_factors *const sf, 39 WarpedMotionParams *final_warp_params) { 40 // Note: As per the spec, we must test the fixed point scales here, which are 41 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that 42 // have 1 << 10 precision). 43 if (av1_is_scaled(sf)) return 0; 44 45 if (final_warp_params != NULL) *final_warp_params = default_warp_params; 46 47 if (build_for_obmc) return 0; 48 49 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) { 50 if (final_warp_params != NULL) 51 memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params)); 52 return 1; 53 } else if (warp_types->global_warp_allowed && !gm_params->invalid) { 54 if (final_warp_params != NULL) 55 memcpy(final_warp_params, gm_params, sizeof(*final_warp_params)); 56 return 1; 57 } 58 59 return 0; 60 } 61 62 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, 63 int dst_stride, const SubpelParams *subpel_params, 64 const struct scale_factors *sf, int w, int h, 65 ConvolveParams *conv_params, 66 InterpFilters interp_filters, 67 const WarpTypesAllowed *warp_types, int p_col, 68 int p_row, int plane, int ref, 69 const MB_MODE_INFO *mi, int build_for_obmc, 70 const MACROBLOCKD *xd, int can_use_previous) { 71 // Make sure the selected motion mode is valid for this configuration 72 assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi, 73 can_use_previous); 74 assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL)); 75 76 WarpedMotionParams final_warp_params; 77 const int do_warp = 78 (w >= 8 && h >= 8 && 79 av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 80 build_for_obmc, sf, &final_warp_params)); 81 const int is_intrabc = mi->use_intrabc; 82 assert(IMPLIES(is_intrabc, !do_warp)); 83 84 if (do_warp && xd->cur_frame_force_integer_mv == 0) { 85 const struct macroblockd_plane *const pd = &xd->plane[plane]; 86 const struct buf_2d *const pre_buf = &pd->pre[ref]; 87 av1_warp_plane(&final_warp_params, is_cur_buf_hbd(xd), xd->bd, 88 pre_buf->buf0, pre_buf->width, pre_buf->height, 89 pre_buf->stride, dst, p_col, p_row, w, h, dst_stride, 90 pd->subsampling_x, pd->subsampling_y, conv_params); 91 } else if (is_cur_buf_hbd(xd)) { 92 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, 93 w, h, conv_params, interp_filters, is_intrabc, 94 xd->bd); 95 } else { 96 inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h, 97 conv_params, interp_filters, is_intrabc); 98 } 99 } 100 101 #if USE_PRECOMPUTED_WEDGE_MASK 102 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = { 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18, 105 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 106 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 107 }; 108 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = { 109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27, 111 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 112 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 113 }; 114 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = { 115 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21, 117 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 118 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 119 }; 120 121 static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) { 122 if (shift >= 0) { 123 memcpy(dst + shift, src, width - shift); 124 memset(dst, src[0], shift); 125 } else { 126 shift = -shift; 127 memcpy(dst, src + shift, width - shift); 128 memset(dst + width - shift, src[width - 1], shift); 129 } 130 } 131 #endif // USE_PRECOMPUTED_WEDGE_MASK 132 133 #if USE_PRECOMPUTED_WEDGE_SIGN 134 /* clang-format off */ 135 DECLARE_ALIGNED(16, static uint8_t, 136 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = { 137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 138 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 139 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 140 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 141 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 142 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 143 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 144 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 145 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 146 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 147 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 148 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 149 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 150 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 151 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 152 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 153 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 154 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 155 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, }, 156 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, }, 157 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 158 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 159 }; 160 /* clang-format on */ 161 #else 162 DECLARE_ALIGNED(16, static uint8_t, 163 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]); 164 #endif // USE_PRECOMPUTED_WEDGE_SIGN 165 166 // [negative][direction] 167 DECLARE_ALIGNED( 168 16, static uint8_t, 169 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]); 170 171 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound 172 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. 173 DECLARE_ALIGNED(16, static uint8_t, 174 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]); 175 176 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2]; 177 178 static const wedge_code_type wedge_codebook_16_hgtw[16] = { 179 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 180 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 181 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, 182 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, 183 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 184 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 185 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 186 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 187 }; 188 189 static const wedge_code_type wedge_codebook_16_hltw[16] = { 190 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 191 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 192 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, 193 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, 194 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 195 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 196 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 197 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 198 }; 199 200 static const wedge_code_type wedge_codebook_16_heqw[16] = { 201 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 202 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 203 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, 204 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, 205 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 206 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 207 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 208 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 209 }; 210 211 const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = { 212 { 0, NULL, NULL, NULL }, 213 { 0, NULL, NULL, NULL }, 214 { 0, NULL, NULL, NULL }, 215 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 216 wedge_masks[BLOCK_8X8] }, 217 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 218 wedge_masks[BLOCK_8X16] }, 219 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 220 wedge_masks[BLOCK_16X8] }, 221 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 222 wedge_masks[BLOCK_16X16] }, 223 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 224 wedge_masks[BLOCK_16X32] }, 225 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 226 wedge_masks[BLOCK_32X16] }, 227 { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 228 wedge_masks[BLOCK_32X32] }, 229 { 0, NULL, NULL, NULL }, 230 { 0, NULL, NULL, NULL }, 231 { 0, NULL, NULL, NULL }, 232 { 0, NULL, NULL, NULL }, 233 { 0, NULL, NULL, NULL }, 234 { 0, NULL, NULL, NULL }, 235 { 0, NULL, NULL, NULL }, 236 { 0, NULL, NULL, NULL }, 237 { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 238 wedge_masks[BLOCK_8X32] }, 239 { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 240 wedge_masks[BLOCK_32X8] }, 241 { 0, NULL, NULL, NULL }, 242 { 0, NULL, NULL, NULL }, 243 }; 244 245 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, 246 BLOCK_SIZE sb_type) { 247 const uint8_t *master; 248 const int bh = block_size_high[sb_type]; 249 const int bw = block_size_wide[sb_type]; 250 const wedge_code_type *a = 251 wedge_params_lookup[sb_type].codebook + wedge_index; 252 int woff, hoff; 253 const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index]; 254 255 assert(wedge_index >= 0 && 256 wedge_index < (1 << get_wedge_bits_lookup(sb_type))); 257 woff = (a->x_offset * bw) >> 3; 258 hoff = (a->y_offset * bh) >> 3; 259 master = wedge_mask_obl[neg ^ wsignflip][a->direction] + 260 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + 261 MASK_MASTER_SIZE / 2 - woff; 262 return master; 263 } 264 265 const uint8_t *av1_get_compound_type_mask( 266 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) { 267 assert(is_masked_compound_type(comp_data->type)); 268 (void)sb_type; 269 switch (comp_data->type) { 270 case COMPOUND_WEDGE: 271 return av1_get_contiguous_soft_mask(comp_data->wedge_index, 272 comp_data->wedge_sign, sb_type); 273 case COMPOUND_DIFFWTD: return comp_data->seg_mask; 274 default: assert(0); return NULL; 275 } 276 } 277 278 static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base, 279 const CONV_BUF_TYPE *src0, int src0_stride, 280 const CONV_BUF_TYPE *src1, int src1_stride, int h, 281 int w, ConvolveParams *conv_params, int bd) { 282 int round = 283 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); 284 int i, j, m, diff; 285 for (i = 0; i < h; ++i) { 286 for (j = 0; j < w; ++j) { 287 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]); 288 diff = ROUND_POWER_OF_TWO(diff, round); 289 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); 290 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; 291 } 292 } 293 } 294 295 void av1_build_compound_diffwtd_mask_d16_c( 296 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, 297 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, 298 ConvolveParams *conv_params, int bd) { 299 switch (mask_type) { 300 case DIFFWTD_38: 301 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w, 302 conv_params, bd); 303 break; 304 case DIFFWTD_38_INV: 305 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w, 306 conv_params, bd); 307 break; 308 default: assert(0); 309 } 310 } 311 312 static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base, 313 const uint8_t *src0, int src0_stride, 314 const uint8_t *src1, int src1_stride, int h, int w) { 315 int i, j, m, diff; 316 for (i = 0; i < h; ++i) { 317 for (j = 0; j < w; ++j) { 318 diff = 319 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]); 320 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); 321 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; 322 } 323 } 324 } 325 326 void av1_build_compound_diffwtd_mask_c(uint8_t *mask, 327 DIFFWTD_MASK_TYPE mask_type, 328 const uint8_t *src0, int src0_stride, 329 const uint8_t *src1, int src1_stride, 330 int h, int w) { 331 switch (mask_type) { 332 case DIFFWTD_38: 333 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w); 334 break; 335 case DIFFWTD_38_INV: 336 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w); 337 break; 338 default: assert(0); 339 } 340 } 341 342 static AOM_FORCE_INLINE void diffwtd_mask_highbd( 343 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0, 344 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w, 345 const unsigned int bd) { 346 assert(bd >= 8); 347 if (bd == 8) { 348 if (which_inverse) { 349 for (int i = 0; i < h; ++i) { 350 for (int j = 0; j < w; ++j) { 351 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; 352 unsigned int m = negative_to_zero(mask_base + diff); 353 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 354 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; 355 } 356 src0 += src0_stride; 357 src1 += src1_stride; 358 mask += w; 359 } 360 } else { 361 for (int i = 0; i < h; ++i) { 362 for (int j = 0; j < w; ++j) { 363 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; 364 unsigned int m = negative_to_zero(mask_base + diff); 365 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 366 mask[j] = m; 367 } 368 src0 += src0_stride; 369 src1 += src1_stride; 370 mask += w; 371 } 372 } 373 } else { 374 const unsigned int bd_shift = bd - 8; 375 if (which_inverse) { 376 for (int i = 0; i < h; ++i) { 377 for (int j = 0; j < w; ++j) { 378 int diff = 379 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; 380 unsigned int m = negative_to_zero(mask_base + diff); 381 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 382 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; 383 } 384 src0 += src0_stride; 385 src1 += src1_stride; 386 mask += w; 387 } 388 } else { 389 for (int i = 0; i < h; ++i) { 390 for (int j = 0; j < w; ++j) { 391 int diff = 392 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; 393 unsigned int m = negative_to_zero(mask_base + diff); 394 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 395 mask[j] = m; 396 } 397 src0 += src0_stride; 398 src1 += src1_stride; 399 mask += w; 400 } 401 } 402 } 403 } 404 405 void av1_build_compound_diffwtd_mask_highbd_c( 406 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, 407 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, 408 int bd) { 409 switch (mask_type) { 410 case DIFFWTD_38: 411 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, 412 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); 413 break; 414 case DIFFWTD_38_INV: 415 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, 416 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); 417 break; 418 default: assert(0); 419 } 420 } 421 422 static void init_wedge_master_masks() { 423 int i, j; 424 const int w = MASK_MASTER_SIZE; 425 const int h = MASK_MASTER_SIZE; 426 const int stride = MASK_MASTER_STRIDE; 427 // Note: index [0] stores the masters, and [1] its complement. 428 #if USE_PRECOMPUTED_WEDGE_MASK 429 // Generate prototype by shifting the masters 430 int shift = h / 4; 431 for (i = 0; i < h; i += 2) { 432 shift_copy(wedge_master_oblique_even, 433 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift, 434 MASK_MASTER_SIZE); 435 shift--; 436 shift_copy(wedge_master_oblique_odd, 437 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift, 438 MASK_MASTER_SIZE); 439 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride], 440 wedge_master_vertical, 441 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); 442 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride], 443 wedge_master_vertical, 444 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); 445 } 446 #else 447 static const double smoother_param = 2.85; 448 const int a[2] = { 2, 1 }; 449 const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]); 450 for (i = 0; i < h; i++) { 451 for (j = 0; j < w; ++j) { 452 int x = (2 * j + 1 - w); 453 int y = (2 * i + 1 - h); 454 double d = (a[0] * x + a[1] * y) / asqrt; 455 const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32); 456 wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk; 457 const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32); 458 wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx; 459 } 460 } 461 #endif // USE_PRECOMPUTED_WEDGE_MASK 462 for (i = 0; i < h; ++i) { 463 for (j = 0; j < w; ++j) { 464 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j]; 465 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk; 466 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = 467 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = 468 (1 << WEDGE_WEIGHT_BITS) - msk; 469 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] = 470 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] = 471 (1 << WEDGE_WEIGHT_BITS) - msk; 472 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = 473 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk; 474 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j]; 475 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx; 476 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] = 477 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] = 478 (1 << WEDGE_WEIGHT_BITS) - mskx; 479 } 480 } 481 } 482 483 #if !USE_PRECOMPUTED_WEDGE_SIGN 484 // If the signs for the wedges for various blocksizes are 485 // inconsistent flip the sign flag. Do it only once for every 486 // wedge codebook. 487 static void init_wedge_signs() { 488 BLOCK_SIZE sb_type; 489 memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup)); 490 for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) { 491 const int bw = block_size_wide[sb_type]; 492 const int bh = block_size_high[sb_type]; 493 const wedge_params_type wedge_params = wedge_params_lookup[sb_type]; 494 const int wbits = wedge_params.bits; 495 const int wtypes = 1 << wbits; 496 int i, w; 497 if (wbits) { 498 for (w = 0; w < wtypes; ++w) { 499 // Get the mask master, i.e. index [0] 500 const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type); 501 int avg = 0; 502 for (i = 0; i < bw; ++i) avg += mask[i]; 503 for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE]; 504 avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1); 505 // Default sign of this wedge is 1 if the average < 32, 0 otherwise. 506 // If default sign is 1: 507 // If sign requested is 0, we need to flip the sign and return 508 // the complement i.e. index [1] instead. If sign requested is 1 509 // we need to flip the sign and return index [0] instead. 510 // If default sign is 0: 511 // If sign requested is 0, we need to return index [0] the master 512 // if sign requested is 1, we need to return the complement index [1] 513 // instead. 514 wedge_params.signflip[w] = (avg < 32); 515 } 516 } 517 } 518 } 519 #endif // !USE_PRECOMPUTED_WEDGE_SIGN 520 521 static void init_wedge_masks() { 522 uint8_t *dst = wedge_mask_buf; 523 BLOCK_SIZE bsize; 524 memset(wedge_masks, 0, sizeof(wedge_masks)); 525 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { 526 const uint8_t *mask; 527 const int bw = block_size_wide[bsize]; 528 const int bh = block_size_high[bsize]; 529 const wedge_params_type *wedge_params = &wedge_params_lookup[bsize]; 530 const int wbits = wedge_params->bits; 531 const int wtypes = 1 << wbits; 532 int w; 533 if (wbits == 0) continue; 534 for (w = 0; w < wtypes; ++w) { 535 mask = get_wedge_mask_inplace(w, 0, bsize); 536 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, 537 bh); 538 wedge_params->masks[0][w] = dst; 539 dst += bw * bh; 540 541 mask = get_wedge_mask_inplace(w, 1, bsize); 542 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, 543 bh); 544 wedge_params->masks[1][w] = dst; 545 dst += bw * bh; 546 } 547 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf)); 548 } 549 } 550 551 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 552 void av1_init_wedge_masks() { 553 init_wedge_master_masks(); 554 #if !USE_PRECOMPUTED_WEDGE_SIGN 555 init_wedge_signs(); 556 #endif // !USE_PRECOMPUTED_WEDGE_SIGN 557 init_wedge_masks(); 558 } 559 560 static void build_masked_compound_no_round( 561 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride, 562 const CONV_BUF_TYPE *src1, int src1_stride, 563 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, 564 int w, ConvolveParams *conv_params, MACROBLOCKD *xd) { 565 // Derive subsampling from h and w passed in. May be refactored to 566 // pass in subsampling factors directly. 567 const int subh = (2 << mi_size_high_log2[sb_type]) == h; 568 const int subw = (2 << mi_size_wide_log2[sb_type]) == w; 569 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); 570 if (is_cur_buf_hbd(xd)) { 571 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, 572 src1_stride, mask, block_size_wide[sb_type], 573 w, h, subw, subh, conv_params, xd->bd); 574 } else { 575 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, 576 src1_stride, mask, block_size_wide[sb_type], w, 577 h, subw, subh, conv_params); 578 } 579 } 580 581 void av1_make_masked_inter_predictor( 582 const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride, 583 const SubpelParams *subpel_params, const struct scale_factors *sf, int w, 584 int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane, 585 const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref, 586 MACROBLOCKD *xd, int can_use_previous) { 587 MB_MODE_INFO *mi = xd->mi[0]; 588 (void)dst; 589 (void)dst_stride; 590 mi->interinter_comp.seg_mask = xd->seg_mask; 591 const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp; 592 593 // We're going to call av1_make_inter_predictor to generate a prediction into 594 // a temporary buffer, then will blend that temporary buffer with that from 595 // the other reference. 596 // 597 #define INTER_PRED_BYTES_PER_PIXEL 2 598 599 DECLARE_ALIGNED(32, uint8_t, 600 tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]); 601 #undef INTER_PRED_BYTES_PER_PIXEL 602 603 uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf); 604 605 const int tmp_buf_stride = MAX_SB_SIZE; 606 CONV_BUF_TYPE *org_dst = conv_params->dst; 607 int org_dst_stride = conv_params->dst_stride; 608 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf; 609 conv_params->dst = tmp_buf16; 610 conv_params->dst_stride = tmp_buf_stride; 611 assert(conv_params->do_average == 0); 612 613 // This will generate a prediction in tmp_buf for the second reference 614 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params, 615 sf, w, h, conv_params, interp_filters, warp_types, 616 p_col, p_row, plane, ref, mi, 0, xd, 617 can_use_previous); 618 619 if (!plane && comp_data->type == COMPOUND_DIFFWTD) { 620 av1_build_compound_diffwtd_mask_d16( 621 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride, 622 tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd); 623 } 624 build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride, 625 tmp_buf16, tmp_buf_stride, comp_data, 626 mi->sb_type, h, w, conv_params, xd); 627 } 628 629 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm, 630 const MB_MODE_INFO *mbmi, int order_idx, 631 int *fwd_offset, int *bck_offset, 632 int *use_dist_wtd_comp_avg, 633 int is_compound) { 634 assert(fwd_offset != NULL && bck_offset != NULL); 635 if (!is_compound || mbmi->compound_idx) { 636 *use_dist_wtd_comp_avg = 0; 637 return; 638 } 639 640 *use_dist_wtd_comp_avg = 1; 641 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]); 642 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]); 643 const int cur_frame_index = cm->cur_frame->order_hint; 644 int bck_frame_index = 0, fwd_frame_index = 0; 645 646 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint; 647 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint; 648 649 int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info, 650 fwd_frame_index, cur_frame_index)), 651 0, MAX_FRAME_DISTANCE); 652 int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info, 653 cur_frame_index, bck_frame_index)), 654 0, MAX_FRAME_DISTANCE); 655 656 const int order = d0 <= d1; 657 658 if (d0 == 0 || d1 == 0) { 659 *fwd_offset = quant_dist_lookup_table[order_idx][3][order]; 660 *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order]; 661 return; 662 } 663 664 int i; 665 for (i = 0; i < 3; ++i) { 666 int c0 = quant_dist_weight[i][order]; 667 int c1 = quant_dist_weight[i][!order]; 668 int d0_c0 = d0 * c0; 669 int d1_c1 = d1 * c1; 670 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break; 671 } 672 673 *fwd_offset = quant_dist_lookup_table[order_idx][i][order]; 674 *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order]; 675 } 676 677 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, 678 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 679 const int plane_start, const int plane_end) { 680 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 681 // the static analysis warnings. 682 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) { 683 struct macroblockd_plane *const pd = &planes[i]; 684 const int is_uv = i > 0; 685 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv], 686 src->crop_heights[is_uv], src->strides[is_uv], mi_row, 687 mi_col, NULL, pd->subsampling_x, pd->subsampling_y); 688 } 689 } 690 691 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, 692 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 693 const struct scale_factors *sf, 694 const int num_planes) { 695 if (src != NULL) { 696 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 697 // the static analysis warnings. 698 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { 699 struct macroblockd_plane *const pd = &xd->plane[i]; 700 const int is_uv = i > 0; 701 setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i], 702 src->crop_widths[is_uv], src->crop_heights[is_uv], 703 src->strides[is_uv], mi_row, mi_col, sf, 704 pd->subsampling_x, pd->subsampling_y); 705 } 706 } 707 } 708 709 // obmc_mask_N[overlap_position] 710 static const uint8_t obmc_mask_1[1] = { 64 }; 711 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 }; 712 713 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 }; 714 715 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 }; 716 717 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54, 718 56, 58, 60, 61, 64, 64, 64, 64 }; 719 720 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44, 721 45, 47, 48, 50, 51, 52, 53, 55, 722 56, 57, 58, 59, 60, 60, 61, 62, 723 64, 64, 64, 64, 64, 64, 64, 64 }; 724 725 static const uint8_t obmc_mask_64[64] = { 726 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44, 727 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56, 728 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62, 729 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 730 }; 731 732 const uint8_t *av1_get_obmc_mask(int length) { 733 switch (length) { 734 case 1: return obmc_mask_1; 735 case 2: return obmc_mask_2; 736 case 4: return obmc_mask_4; 737 case 8: return obmc_mask_8; 738 case 16: return obmc_mask_16; 739 case 32: return obmc_mask_32; 740 case 64: return obmc_mask_64; 741 default: assert(0); return NULL; 742 } 743 } 744 745 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc, 746 uint8_t mi_hw, MB_MODE_INFO *mi, 747 void *fun_ctxt, const int num_planes) { 748 (void)xd; 749 (void)rel_mi_rc; 750 (void)mi_hw; 751 (void)mi; 752 ++*(int *)fun_ctxt; 753 (void)num_planes; 754 } 755 756 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, 757 int mi_row, int mi_col) { 758 MB_MODE_INFO *mbmi = xd->mi[0]; 759 760 mbmi->overlappable_neighbors[0] = 0; 761 mbmi->overlappable_neighbors[1] = 0; 762 763 if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return; 764 765 foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr, 766 &mbmi->overlappable_neighbors[0]); 767 foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr, 768 &mbmi->overlappable_neighbors[1]); 769 } 770 771 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if 772 // block-size of current plane is smaller than 8x8, always only blend with the 773 // left neighbor(s) (skip blending with the above side). 774 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable 775 776 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, 777 const struct macroblockd_plane *pd, int dir) { 778 assert(is_motion_variation_allowed_bsize(bsize)); 779 780 const BLOCK_SIZE bsize_plane = 781 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 782 switch (bsize_plane) { 783 #if DISABLE_CHROMA_U8X8_OBMC 784 case BLOCK_4X4: 785 case BLOCK_8X4: 786 case BLOCK_4X8: return 1; break; 787 #else 788 case BLOCK_4X4: 789 case BLOCK_8X4: 790 case BLOCK_4X8: return dir == 0; break; 791 #endif 792 default: return 0; 793 } 794 } 795 796 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { 797 mbmi->ref_frame[1] = NONE_FRAME; 798 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 799 800 return; 801 } 802 803 struct obmc_inter_pred_ctxt { 804 uint8_t **adjacent; 805 int *adjacent_stride; 806 }; 807 808 static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col, 809 uint8_t above_mi_width, 810 MB_MODE_INFO *above_mi, 811 void *fun_ctxt, 812 const int num_planes) { 813 (void)above_mi; 814 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; 815 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 816 const int is_hbd = is_cur_buf_hbd(xd); 817 const int overlap = 818 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; 819 820 for (int plane = 0; plane < num_planes; ++plane) { 821 const struct macroblockd_plane *pd = &xd->plane[plane]; 822 const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x; 823 const int bh = overlap >> pd->subsampling_y; 824 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x; 825 826 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; 827 828 const int dst_stride = pd->dst.stride; 829 uint8_t *const dst = &pd->dst.buf[plane_col]; 830 const int tmp_stride = ctxt->adjacent_stride[plane]; 831 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col]; 832 const uint8_t *const mask = av1_get_obmc_mask(bh); 833 834 if (is_hbd) 835 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, 836 tmp_stride, mask, bw, bh, xd->bd); 837 else 838 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, 839 mask, bw, bh); 840 } 841 } 842 843 static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row, 844 uint8_t left_mi_height, 845 MB_MODE_INFO *left_mi, 846 void *fun_ctxt, 847 const int num_planes) { 848 (void)left_mi; 849 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; 850 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 851 const int overlap = 852 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; 853 const int is_hbd = is_cur_buf_hbd(xd); 854 855 for (int plane = 0; plane < num_planes; ++plane) { 856 const struct macroblockd_plane *pd = &xd->plane[plane]; 857 const int bw = overlap >> pd->subsampling_x; 858 const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y; 859 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y; 860 861 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; 862 863 const int dst_stride = pd->dst.stride; 864 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride]; 865 const int tmp_stride = ctxt->adjacent_stride[plane]; 866 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride]; 867 const uint8_t *const mask = av1_get_obmc_mask(bw); 868 869 if (is_hbd) 870 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, 871 tmp_stride, mask, bw, bh, xd->bd); 872 else 873 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, 874 mask, bw, bh); 875 } 876 } 877 878 // This function combines motion compensated predictions that are generated by 879 // top/left neighboring blocks' inter predictors with the regular inter 880 // prediction. We assume the original prediction (bmc) is stored in 881 // xd->plane[].dst.buf 882 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, 883 int mi_row, int mi_col, 884 uint8_t *above[MAX_MB_PLANE], 885 int above_stride[MAX_MB_PLANE], 886 uint8_t *left[MAX_MB_PLANE], 887 int left_stride[MAX_MB_PLANE]) { 888 const BLOCK_SIZE bsize = xd->mi[0]->sb_type; 889 890 // handle above row 891 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride }; 892 foreach_overlappable_nb_above(cm, xd, mi_col, 893 max_neighbor_obmc[mi_size_wide_log2[bsize]], 894 build_obmc_inter_pred_above, &ctxt_above); 895 896 // handle left column 897 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride }; 898 foreach_overlappable_nb_left(cm, xd, mi_row, 899 max_neighbor_obmc[mi_size_high_log2[bsize]], 900 build_obmc_inter_pred_left, &ctxt_left); 901 } 902 903 void av1_setup_build_prediction_by_above_pred( 904 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, 905 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt, 906 const int num_planes) { 907 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type); 908 const int above_mi_col = ctxt->mi_col + rel_mi_col; 909 910 av1_modify_neighbor_predictor_for_obmc(above_mbmi); 911 912 for (int j = 0; j < num_planes; ++j) { 913 struct macroblockd_plane *const pd = &xd->plane[j]; 914 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], 915 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col, 916 NULL, pd->subsampling_x, pd->subsampling_y); 917 } 918 919 const int num_refs = 1 + has_second_ref(above_mbmi); 920 921 for (int ref = 0; ref < num_refs; ++ref) { 922 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; 923 924 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame); 925 const struct scale_factors *const sf = 926 get_ref_scale_factors_const(ctxt->cm, frame); 927 xd->block_ref_scale_factors[ref] = sf; 928 if ((!av1_is_valid_scale(sf))) 929 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, 930 "Reference frame has invalid dimensions"); 931 av1_setup_pre_planes(xd, ref, &ref_buf->buf, ctxt->mi_row, above_mi_col, sf, 932 num_planes); 933 } 934 935 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col); 936 xd->mb_to_right_edge = ctxt->mb_to_far_edge + 937 (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8; 938 } 939 940 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row, 941 uint8_t left_mi_height, 942 MB_MODE_INFO *left_mbmi, 943 struct build_prediction_ctxt *ctxt, 944 const int num_planes) { 945 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type); 946 const int left_mi_row = ctxt->mi_row + rel_mi_row; 947 948 av1_modify_neighbor_predictor_for_obmc(left_mbmi); 949 950 for (int j = 0; j < num_planes; ++j) { 951 struct macroblockd_plane *const pd = &xd->plane[j]; 952 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], 953 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0, 954 NULL, pd->subsampling_x, pd->subsampling_y); 955 } 956 957 const int num_refs = 1 + has_second_ref(left_mbmi); 958 959 for (int ref = 0; ref < num_refs; ++ref) { 960 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; 961 962 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame); 963 const struct scale_factors *const ref_scale_factors = 964 get_ref_scale_factors_const(ctxt->cm, frame); 965 966 xd->block_ref_scale_factors[ref] = ref_scale_factors; 967 if ((!av1_is_valid_scale(ref_scale_factors))) 968 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, 969 "Reference frame has invalid dimensions"); 970 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, ctxt->mi_col, 971 ref_scale_factors, num_planes); 972 } 973 974 xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row); 975 xd->mb_to_bottom_edge = 976 ctxt->mb_to_far_edge + 977 (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8; 978 } 979 980 /* clang-format off */ 981 static const uint8_t ii_weights1d[MAX_SB_SIZE] = { 982 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 983 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16, 984 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 985 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 986 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 987 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 988 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 989 }; 990 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = { 991 32, 16, 16, 16, 8, 8, 8, 4, 992 4, 4, 2, 2, 2, 1, 1, 1, 993 8, 8, 4, 4, 2, 2 994 }; 995 /* clang-format on */ 996 997 static void build_smooth_interintra_mask(uint8_t *mask, int stride, 998 BLOCK_SIZE plane_bsize, 999 INTERINTRA_MODE mode) { 1000 int i, j; 1001 const int bw = block_size_wide[plane_bsize]; 1002 const int bh = block_size_high[plane_bsize]; 1003 const int size_scale = ii_size_scales[plane_bsize]; 1004 1005 switch (mode) { 1006 case II_V_PRED: 1007 for (i = 0; i < bh; ++i) { 1008 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0])); 1009 mask += stride; 1010 } 1011 break; 1012 1013 case II_H_PRED: 1014 for (i = 0; i < bh; ++i) { 1015 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale]; 1016 mask += stride; 1017 } 1018 break; 1019 1020 case II_SMOOTH_PRED: 1021 for (i = 0; i < bh; ++i) { 1022 for (j = 0; j < bw; ++j) 1023 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale]; 1024 mask += stride; 1025 } 1026 break; 1027 1028 case II_DC_PRED: 1029 default: 1030 for (i = 0; i < bh; ++i) { 1031 memset(mask, 32, bw * sizeof(mask[0])); 1032 mask += stride; 1033 } 1034 break; 1035 } 1036 } 1037 1038 static void combine_interintra(INTERINTRA_MODE mode, 1039 int8_t use_wedge_interintra, int wedge_index, 1040 int wedge_sign, BLOCK_SIZE bsize, 1041 BLOCK_SIZE plane_bsize, uint8_t *comppred, 1042 int compstride, const uint8_t *interpred, 1043 int interstride, const uint8_t *intrapred, 1044 int intrastride) { 1045 const int bw = block_size_wide[plane_bsize]; 1046 const int bh = block_size_high[plane_bsize]; 1047 1048 if (use_wedge_interintra) { 1049 if (is_interintra_wedge_used(bsize)) { 1050 const uint8_t *mask = 1051 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 1052 const int subw = 2 * mi_size_wide[bsize] == bw; 1053 const int subh = 2 * mi_size_high[bsize] == bh; 1054 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, 1055 interpred, interstride, mask, block_size_wide[bsize], 1056 bw, bh, subw, subh); 1057 } 1058 return; 1059 } 1060 1061 uint8_t mask[MAX_SB_SQUARE]; 1062 build_smooth_interintra_mask(mask, bw, plane_bsize, mode); 1063 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred, 1064 interstride, mask, bw, bw, bh, 0, 0); 1065 } 1066 1067 static void combine_interintra_highbd( 1068 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int wedge_index, 1069 int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, 1070 uint8_t *comppred8, int compstride, const uint8_t *interpred8, 1071 int interstride, const uint8_t *intrapred8, int intrastride, int bd) { 1072 const int bw = block_size_wide[plane_bsize]; 1073 const int bh = block_size_high[plane_bsize]; 1074 1075 if (use_wedge_interintra) { 1076 if (is_interintra_wedge_used(bsize)) { 1077 const uint8_t *mask = 1078 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 1079 const int subh = 2 * mi_size_high[bsize] == bh; 1080 const int subw = 2 * mi_size_wide[bsize] == bw; 1081 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, 1082 interpred8, interstride, mask, 1083 block_size_wide[bsize], bw, bh, subw, subh, bd); 1084 } 1085 return; 1086 } 1087 1088 uint8_t mask[MAX_SB_SQUARE]; 1089 build_smooth_interintra_mask(mask, bw, plane_bsize, mode); 1090 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, 1091 interpred8, interstride, mask, bw, bw, bh, 0, 0, 1092 bd); 1093 } 1094 1095 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, 1096 MACROBLOCKD *xd, 1097 BLOCK_SIZE bsize, int plane, 1098 const BUFFER_SET *ctx, 1099 uint8_t *dst, int dst_stride) { 1100 struct macroblockd_plane *const pd = &xd->plane[plane]; 1101 const int ssx = xd->plane[plane].subsampling_x; 1102 const int ssy = xd->plane[plane].subsampling_y; 1103 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); 1104 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode]; 1105 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0); 1106 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0); 1107 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0); 1108 assert(xd->mi[0]->use_intrabc == 0); 1109 1110 av1_predict_intra_block(cm, xd, pd->width, pd->height, 1111 max_txsize_rect_lookup[plane_bsize], mode, 0, 0, 1112 FILTER_INTRA_MODES, ctx->plane[plane], 1113 ctx->stride[plane], dst, dst_stride, 0, 0, plane); 1114 } 1115 1116 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, 1117 const uint8_t *inter_pred, int inter_stride, 1118 const uint8_t *intra_pred, int intra_stride) { 1119 const int ssx = xd->plane[plane].subsampling_x; 1120 const int ssy = xd->plane[plane].subsampling_y; 1121 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); 1122 if (is_cur_buf_hbd(xd)) { 1123 combine_interintra_highbd( 1124 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, 1125 xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign, 1126 bsize, plane_bsize, xd->plane[plane].dst.buf, 1127 xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred, 1128 intra_stride, xd->bd); 1129 return; 1130 } 1131 combine_interintra( 1132 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, 1133 xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign, 1134 bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, 1135 inter_pred, inter_stride, intra_pred, intra_stride); 1136 } 1137 1138 // build interintra_predictors for one plane 1139 void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd, 1140 uint8_t *pred, int stride, 1141 const BUFFER_SET *ctx, int plane, 1142 BLOCK_SIZE bsize) { 1143 if (is_cur_buf_hbd(xd)) { 1144 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); 1145 av1_build_intra_predictors_for_interintra( 1146 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor), 1147 MAX_SB_SIZE); 1148 av1_combine_interintra(xd, bsize, plane, pred, stride, 1149 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); 1150 } else { 1151 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); 1152 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx, 1153 intrapredictor, MAX_SB_SIZE); 1154 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor, 1155 MAX_SB_SIZE); 1156 } 1157 } 1158 1159 void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, 1160 uint8_t *upred, uint8_t *vpred, 1161 int ustride, int vstride, 1162 const BUFFER_SET *ctx, 1163 BLOCK_SIZE bsize) { 1164 av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize); 1165 av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize); 1166 } 1167