1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vpx_config.h" 12 #include "./vpx_dsp_rtcd.h" 13 #include "vp9/common/vp9_loopfilter.h" 14 #include "vp9/common/vp9_onyxc_int.h" 15 #include "vp9/common/vp9_reconinter.h" 16 #include "vpx_dsp/vpx_dsp_common.h" 17 #include "vpx_mem/vpx_mem.h" 18 #include "vpx_ports/mem.h" 19 20 #include "vp9/common/vp9_seg_common.h" 21 22 // 64 bit masks for left transform size. Each 1 represents a position where 23 // we should apply a loop filter across the left border of an 8x8 block 24 // boundary. 25 // 26 // In the case of TX_16X16-> ( in low order byte first we end up with 27 // a mask that looks like this 28 // 29 // 10101010 30 // 10101010 31 // 10101010 32 // 10101010 33 // 10101010 34 // 10101010 35 // 10101010 36 // 10101010 37 // 38 // A loopfilter should be applied to every other 8x8 horizontally. 39 static const uint64_t left_64x64_txform_mask[TX_SIZES] = { 40 0xffffffffffffffffULL, // TX_4X4 41 0xffffffffffffffffULL, // TX_8x8 42 0x5555555555555555ULL, // TX_16x16 43 0x1111111111111111ULL, // TX_32x32 44 }; 45 46 // 64 bit masks for above transform size. Each 1 represents a position where 47 // we should apply a loop filter across the top border of an 8x8 block 48 // boundary. 49 // 50 // In the case of TX_32x32 -> ( in low order byte first we end up with 51 // a mask that looks like this 52 // 53 // 11111111 54 // 00000000 55 // 00000000 56 // 00000000 57 // 11111111 58 // 00000000 59 // 00000000 60 // 00000000 61 // 62 // A loopfilter should be applied to every other 4 the row vertically. 63 static const uint64_t above_64x64_txform_mask[TX_SIZES] = { 64 0xffffffffffffffffULL, // TX_4X4 65 0xffffffffffffffffULL, // TX_8x8 66 0x00ff00ff00ff00ffULL, // TX_16x16 67 0x000000ff000000ffULL, // TX_32x32 68 }; 69 70 // 64 bit masks for prediction sizes (left). Each 1 represents a position 71 // where left border of an 8x8 block. These are aligned to the right most 72 // appropriate bit, and then shifted into place. 73 // 74 // In the case of TX_16x32 -> ( low order byte first ) we end up with 75 // a mask that looks like this : 76 // 77 // 10000000 78 // 10000000 79 // 10000000 80 // 10000000 81 // 00000000 82 // 00000000 83 // 00000000 84 // 00000000 85 static const uint64_t left_prediction_mask[BLOCK_SIZES] = { 86 0x0000000000000001ULL, // BLOCK_4X4, 87 0x0000000000000001ULL, // BLOCK_4X8, 88 0x0000000000000001ULL, // BLOCK_8X4, 89 0x0000000000000001ULL, // BLOCK_8X8, 90 0x0000000000000101ULL, // BLOCK_8X16, 91 0x0000000000000001ULL, // BLOCK_16X8, 92 0x0000000000000101ULL, // BLOCK_16X16, 93 0x0000000001010101ULL, // BLOCK_16X32, 94 0x0000000000000101ULL, // BLOCK_32X16, 95 0x0000000001010101ULL, // BLOCK_32X32, 96 0x0101010101010101ULL, // BLOCK_32X64, 97 0x0000000001010101ULL, // BLOCK_64X32, 98 0x0101010101010101ULL, // BLOCK_64X64 99 }; 100 101 // 64 bit mask to shift and set for each prediction size. 102 static const uint64_t above_prediction_mask[BLOCK_SIZES] = { 103 0x0000000000000001ULL, // BLOCK_4X4 104 0x0000000000000001ULL, // BLOCK_4X8 105 0x0000000000000001ULL, // BLOCK_8X4 106 0x0000000000000001ULL, // BLOCK_8X8 107 0x0000000000000001ULL, // BLOCK_8X16, 108 0x0000000000000003ULL, // BLOCK_16X8 109 0x0000000000000003ULL, // BLOCK_16X16 110 0x0000000000000003ULL, // BLOCK_16X32, 111 0x000000000000000fULL, // BLOCK_32X16, 112 0x000000000000000fULL, // BLOCK_32X32, 113 0x000000000000000fULL, // BLOCK_32X64, 114 0x00000000000000ffULL, // BLOCK_64X32, 115 0x00000000000000ffULL, // BLOCK_64X64 116 }; 117 // 64 bit mask to shift and set for each prediction size. A bit is set for 118 // each 8x8 block that would be in the left most block of the given block 119 // size in the 64x64 block. 120 static const uint64_t size_mask[BLOCK_SIZES] = { 121 0x0000000000000001ULL, // BLOCK_4X4 122 0x0000000000000001ULL, // BLOCK_4X8 123 0x0000000000000001ULL, // BLOCK_8X4 124 0x0000000000000001ULL, // BLOCK_8X8 125 0x0000000000000101ULL, // BLOCK_8X16, 126 0x0000000000000003ULL, // BLOCK_16X8 127 0x0000000000000303ULL, // BLOCK_16X16 128 0x0000000003030303ULL, // BLOCK_16X32, 129 0x0000000000000f0fULL, // BLOCK_32X16, 130 0x000000000f0f0f0fULL, // BLOCK_32X32, 131 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, 132 0x00000000ffffffffULL, // BLOCK_64X32, 133 0xffffffffffffffffULL, // BLOCK_64X64 134 }; 135 136 // These are used for masking the left and above borders. 137 static const uint64_t left_border = 0x1111111111111111ULL; 138 static const uint64_t above_border = 0x000000ff000000ffULL; 139 140 // 16 bit masks for uv transform sizes. 141 static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = { 142 0xffff, // TX_4X4 143 0xffff, // TX_8x8 144 0x5555, // TX_16x16 145 0x1111, // TX_32x32 146 }; 147 148 static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = { 149 0xffff, // TX_4X4 150 0xffff, // TX_8x8 151 0x0f0f, // TX_16x16 152 0x000f, // TX_32x32 153 }; 154 155 // 16 bit left mask to shift and set for each uv prediction size. 156 static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { 157 0x0001, // BLOCK_4X4, 158 0x0001, // BLOCK_4X8, 159 0x0001, // BLOCK_8X4, 160 0x0001, // BLOCK_8X8, 161 0x0001, // BLOCK_8X16, 162 0x0001, // BLOCK_16X8, 163 0x0001, // BLOCK_16X16, 164 0x0011, // BLOCK_16X32, 165 0x0001, // BLOCK_32X16, 166 0x0011, // BLOCK_32X32, 167 0x1111, // BLOCK_32X64 168 0x0011, // BLOCK_64X32, 169 0x1111, // BLOCK_64X64 170 }; 171 // 16 bit above mask to shift and set for uv each prediction size. 172 static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { 173 0x0001, // BLOCK_4X4 174 0x0001, // BLOCK_4X8 175 0x0001, // BLOCK_8X4 176 0x0001, // BLOCK_8X8 177 0x0001, // BLOCK_8X16, 178 0x0001, // BLOCK_16X8 179 0x0001, // BLOCK_16X16 180 0x0001, // BLOCK_16X32, 181 0x0003, // BLOCK_32X16, 182 0x0003, // BLOCK_32X32, 183 0x0003, // BLOCK_32X64, 184 0x000f, // BLOCK_64X32, 185 0x000f, // BLOCK_64X64 186 }; 187 188 // 64 bit mask to shift and set for each uv prediction size 189 static const uint16_t size_mask_uv[BLOCK_SIZES] = { 190 0x0001, // BLOCK_4X4 191 0x0001, // BLOCK_4X8 192 0x0001, // BLOCK_8X4 193 0x0001, // BLOCK_8X8 194 0x0001, // BLOCK_8X16, 195 0x0001, // BLOCK_16X8 196 0x0001, // BLOCK_16X16 197 0x0011, // BLOCK_16X32, 198 0x0003, // BLOCK_32X16, 199 0x0033, // BLOCK_32X32, 200 0x3333, // BLOCK_32X64, 201 0x00ff, // BLOCK_64X32, 202 0xffff, // BLOCK_64X64 203 }; 204 static const uint16_t left_border_uv = 0x1111; 205 static const uint16_t above_border_uv = 0x000f; 206 207 static const int mode_lf_lut[MB_MODE_COUNT] = { 208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES 209 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) 210 }; 211 212 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { 213 int lvl; 214 215 // For each possible value for the loop filter fill out limits 216 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { 217 // Set loop filter parameters that control sharpness. 218 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); 219 220 if (sharpness_lvl > 0) { 221 if (block_inside_limit > (9 - sharpness_lvl)) 222 block_inside_limit = (9 - sharpness_lvl); 223 } 224 225 if (block_inside_limit < 1) block_inside_limit = 1; 226 227 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); 228 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), 229 SIMD_WIDTH); 230 } 231 } 232 233 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, 234 const MODE_INFO *mi) { 235 return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]][mode_lf_lut[mi->mode]]; 236 } 237 238 void vp9_loop_filter_init(VP9_COMMON *cm) { 239 loop_filter_info_n *lfi = &cm->lf_info; 240 struct loopfilter *lf = &cm->lf; 241 int lvl; 242 243 // init limits for given sharpness 244 update_sharpness(lfi, lf->sharpness_level); 245 lf->last_sharpness_level = lf->sharpness_level; 246 247 // init hev threshold const vectors 248 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) 249 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); 250 } 251 252 void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { 253 int seg_id; 254 // n_shift is the multiplier for lf_deltas 255 // the multiplier is 1 for when filter_lvl is between 0 and 31; 256 // 2 when filter_lvl is between 32 and 63 257 const int scale = 1 << (default_filt_lvl >> 5); 258 loop_filter_info_n *const lfi = &cm->lf_info; 259 struct loopfilter *const lf = &cm->lf; 260 const struct segmentation *const seg = &cm->seg; 261 262 // update limits if sharpness has changed 263 if (lf->last_sharpness_level != lf->sharpness_level) { 264 update_sharpness(lfi, lf->sharpness_level); 265 lf->last_sharpness_level = lf->sharpness_level; 266 } 267 268 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { 269 int lvl_seg = default_filt_lvl; 270 if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { 271 const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); 272 lvl_seg = clamp( 273 seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0, 274 MAX_LOOP_FILTER); 275 } 276 277 if (!lf->mode_ref_delta_enabled) { 278 // we could get rid of this if we assume that deltas are set to 279 // zero when not in use; encoder always uses deltas 280 memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); 281 } else { 282 int ref, mode; 283 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; 284 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); 285 286 for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { 287 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { 288 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + 289 lf->mode_deltas[mode] * scale; 290 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); 291 } 292 } 293 } 294 } 295 } 296 297 static void filter_selectively_vert_row2( 298 int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16, 299 unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, 300 const loop_filter_thresh *lfthr, const uint8_t *lfl) { 301 const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; 302 const int lfl_forward = subsampling_factor ? 4 : 8; 303 const unsigned int dual_one = 1 | (1 << lfl_forward); 304 unsigned int mask; 305 uint8_t *ss[2]; 306 ss[0] = s; 307 308 for (mask = 309 (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; 310 mask; mask = (mask & ~dual_one) >> 1) { 311 if (mask & dual_one) { 312 const loop_filter_thresh *lfis[2]; 313 lfis[0] = lfthr + *lfl; 314 lfis[1] = lfthr + *(lfl + lfl_forward); 315 ss[1] = ss[0] + 8 * pitch; 316 317 if (mask_16x16 & dual_one) { 318 if ((mask_16x16 & dual_one) == dual_one) { 319 vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, 320 lfis[0]->hev_thr); 321 } else { 322 const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; 323 vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, 324 lfi->lim, lfi->hev_thr); 325 } 326 } 327 328 if (mask_8x8 & dual_one) { 329 if ((mask_8x8 & dual_one) == dual_one) { 330 vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, 331 lfis[0]->hev_thr, lfis[1]->mblim, 332 lfis[1]->lim, lfis[1]->hev_thr); 333 } else { 334 const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; 335 vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, 336 lfi->hev_thr); 337 } 338 } 339 340 if (mask_4x4 & dual_one) { 341 if ((mask_4x4 & dual_one) == dual_one) { 342 vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, 343 lfis[0]->hev_thr, lfis[1]->mblim, 344 lfis[1]->lim, lfis[1]->hev_thr); 345 } else { 346 const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; 347 vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, 348 lfi->hev_thr); 349 } 350 } 351 352 if (mask_4x4_int & dual_one) { 353 if ((mask_4x4_int & dual_one) == dual_one) { 354 vpx_lpf_vertical_4_dual( 355 ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, 356 lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr); 357 } else { 358 const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; 359 vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, 360 lfi->lim, lfi->hev_thr); 361 } 362 } 363 } 364 365 ss[0] += 8; 366 lfl += 1; 367 mask_16x16 >>= 1; 368 mask_8x8 >>= 1; 369 mask_4x4 >>= 1; 370 mask_4x4_int >>= 1; 371 } 372 } 373 374 #if CONFIG_VP9_HIGHBITDEPTH 375 static void highbd_filter_selectively_vert_row2( 376 int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16, 377 unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, 378 const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { 379 const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; 380 const int lfl_forward = subsampling_factor ? 4 : 8; 381 const unsigned int dual_one = 1 | (1 << lfl_forward); 382 unsigned int mask; 383 uint16_t *ss[2]; 384 ss[0] = s; 385 386 for (mask = 387 (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; 388 mask; mask = (mask & ~dual_one) >> 1) { 389 if (mask & dual_one) { 390 const loop_filter_thresh *lfis[2]; 391 lfis[0] = lfthr + *lfl; 392 lfis[1] = lfthr + *(lfl + lfl_forward); 393 ss[1] = ss[0] + 8 * pitch; 394 395 if (mask_16x16 & dual_one) { 396 if ((mask_16x16 & dual_one) == dual_one) { 397 vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, 398 lfis[0]->lim, lfis[0]->hev_thr, bd); 399 } else { 400 const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; 401 vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, 402 lfi->lim, lfi->hev_thr, bd); 403 } 404 } 405 406 if (mask_8x8 & dual_one) { 407 if ((mask_8x8 & dual_one) == dual_one) { 408 vpx_highbd_lpf_vertical_8_dual( 409 ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, 410 lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); 411 } else { 412 const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; 413 vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, 414 lfi->lim, lfi->hev_thr, bd); 415 } 416 } 417 418 if (mask_4x4 & dual_one) { 419 if ((mask_4x4 & dual_one) == dual_one) { 420 vpx_highbd_lpf_vertical_4_dual( 421 ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, 422 lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); 423 } else { 424 const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; 425 vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, 426 lfi->lim, lfi->hev_thr, bd); 427 } 428 } 429 430 if (mask_4x4_int & dual_one) { 431 if ((mask_4x4_int & dual_one) == dual_one) { 432 vpx_highbd_lpf_vertical_4_dual( 433 ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, 434 lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); 435 } else { 436 const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; 437 vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, 438 lfi->mblim, lfi->lim, lfi->hev_thr, bd); 439 } 440 } 441 } 442 443 ss[0] += 8; 444 lfl += 1; 445 mask_16x16 >>= 1; 446 mask_8x8 >>= 1; 447 mask_4x4 >>= 1; 448 mask_4x4_int >>= 1; 449 } 450 } 451 #endif // CONFIG_VP9_HIGHBITDEPTH 452 453 static void filter_selectively_horiz( 454 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, 455 unsigned int mask_4x4, unsigned int mask_4x4_int, 456 const loop_filter_thresh *lfthr, const uint8_t *lfl) { 457 unsigned int mask; 458 int count; 459 460 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; 461 mask >>= count) { 462 count = 1; 463 if (mask & 1) { 464 const loop_filter_thresh *lfi = lfthr + *lfl; 465 466 if (mask_16x16 & 1) { 467 if ((mask_16x16 & 3) == 3) { 468 vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, 469 lfi->hev_thr); 470 count = 2; 471 } else { 472 vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 473 } 474 } else if (mask_8x8 & 1) { 475 if ((mask_8x8 & 3) == 3) { 476 // Next block's thresholds. 477 const loop_filter_thresh *lfin = lfthr + *(lfl + 1); 478 479 vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, 480 lfi->hev_thr, lfin->mblim, lfin->lim, 481 lfin->hev_thr); 482 483 if ((mask_4x4_int & 3) == 3) { 484 vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, 485 lfi->lim, lfi->hev_thr, lfin->mblim, 486 lfin->lim, lfin->hev_thr); 487 } else { 488 if (mask_4x4_int & 1) 489 vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 490 lfi->hev_thr); 491 else if (mask_4x4_int & 2) 492 vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 493 lfin->lim, lfin->hev_thr); 494 } 495 count = 2; 496 } else { 497 vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 498 499 if (mask_4x4_int & 1) 500 vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 501 lfi->hev_thr); 502 } 503 } else if (mask_4x4 & 1) { 504 if ((mask_4x4 & 3) == 3) { 505 // Next block's thresholds. 506 const loop_filter_thresh *lfin = lfthr + *(lfl + 1); 507 508 vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, 509 lfi->hev_thr, lfin->mblim, lfin->lim, 510 lfin->hev_thr); 511 if ((mask_4x4_int & 3) == 3) { 512 vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, 513 lfi->lim, lfi->hev_thr, lfin->mblim, 514 lfin->lim, lfin->hev_thr); 515 } else { 516 if (mask_4x4_int & 1) 517 vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 518 lfi->hev_thr); 519 else if (mask_4x4_int & 2) 520 vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 521 lfin->lim, lfin->hev_thr); 522 } 523 count = 2; 524 } else { 525 vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 526 527 if (mask_4x4_int & 1) 528 vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 529 lfi->hev_thr); 530 } 531 } else { 532 vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 533 lfi->hev_thr); 534 } 535 } 536 s += 8 * count; 537 lfl += count; 538 mask_16x16 >>= count; 539 mask_8x8 >>= count; 540 mask_4x4 >>= count; 541 mask_4x4_int >>= count; 542 } 543 } 544 545 #if CONFIG_VP9_HIGHBITDEPTH 546 static void highbd_filter_selectively_horiz( 547 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, 548 unsigned int mask_4x4, unsigned int mask_4x4_int, 549 const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { 550 unsigned int mask; 551 int count; 552 553 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; 554 mask >>= count) { 555 count = 1; 556 if (mask & 1) { 557 const loop_filter_thresh *lfi = lfthr + *lfl; 558 559 if (mask_16x16 & 1) { 560 if ((mask_16x16 & 3) == 3) { 561 vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, 562 lfi->hev_thr, bd); 563 count = 2; 564 } else { 565 vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, 566 lfi->hev_thr, bd); 567 } 568 } else if (mask_8x8 & 1) { 569 if ((mask_8x8 & 3) == 3) { 570 // Next block's thresholds. 571 const loop_filter_thresh *lfin = lfthr + *(lfl + 1); 572 573 vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, 574 lfi->hev_thr, lfin->mblim, lfin->lim, 575 lfin->hev_thr, bd); 576 577 if ((mask_4x4_int & 3) == 3) { 578 vpx_highbd_lpf_horizontal_4_dual( 579 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 580 lfin->mblim, lfin->lim, lfin->hev_thr, bd); 581 } else { 582 if (mask_4x4_int & 1) { 583 vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, 584 lfi->lim, lfi->hev_thr, bd); 585 } else if (mask_4x4_int & 2) { 586 vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 587 lfin->lim, lfin->hev_thr, bd); 588 } 589 } 590 count = 2; 591 } else { 592 vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, 593 lfi->hev_thr, bd); 594 595 if (mask_4x4_int & 1) { 596 vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, 597 lfi->lim, lfi->hev_thr, bd); 598 } 599 } 600 } else if (mask_4x4 & 1) { 601 if ((mask_4x4 & 3) == 3) { 602 // Next block's thresholds. 603 const loop_filter_thresh *lfin = lfthr + *(lfl + 1); 604 605 vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, 606 lfi->hev_thr, lfin->mblim, lfin->lim, 607 lfin->hev_thr, bd); 608 if ((mask_4x4_int & 3) == 3) { 609 vpx_highbd_lpf_horizontal_4_dual( 610 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 611 lfin->mblim, lfin->lim, lfin->hev_thr, bd); 612 } else { 613 if (mask_4x4_int & 1) { 614 vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, 615 lfi->lim, lfi->hev_thr, bd); 616 } else if (mask_4x4_int & 2) { 617 vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, 618 lfin->lim, lfin->hev_thr, bd); 619 } 620 } 621 count = 2; 622 } else { 623 vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, 624 lfi->hev_thr, bd); 625 626 if (mask_4x4_int & 1) { 627 vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, 628 lfi->lim, lfi->hev_thr, bd); 629 } 630 } 631 } else { 632 vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, 633 lfi->hev_thr, bd); 634 } 635 } 636 s += 8 * count; 637 lfl += count; 638 mask_16x16 >>= count; 639 mask_8x8 >>= count; 640 mask_4x4 >>= count; 641 mask_4x4_int >>= count; 642 } 643 } 644 #endif // CONFIG_VP9_HIGHBITDEPTH 645 646 // This function ors into the current lfm structure, where to do loop 647 // filters for the specific mi we are looking at. It uses information 648 // including the block_size_type (32x16, 32x32, etc.), the transform size, 649 // whether there were any coefficients encoded, and the loop filter strength 650 // block we are currently looking at. Shift is used to position the 651 // 1's we produce. 652 static void build_masks(const loop_filter_info_n *const lfi_n, 653 const MODE_INFO *mi, const int shift_y, 654 const int shift_uv, LOOP_FILTER_MASK *lfm) { 655 const BLOCK_SIZE block_size = mi->sb_type; 656 const TX_SIZE tx_size_y = mi->tx_size; 657 const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1]; 658 const int filter_level = get_filter_level(lfi_n, mi); 659 uint64_t *const left_y = &lfm->left_y[tx_size_y]; 660 uint64_t *const above_y = &lfm->above_y[tx_size_y]; 661 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 662 uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; 663 uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; 664 uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; 665 int i; 666 667 // If filter level is 0 we don't loop filter. 668 if (!filter_level) { 669 return; 670 } else { 671 const int w = num_8x8_blocks_wide_lookup[block_size]; 672 const int h = num_8x8_blocks_high_lookup[block_size]; 673 int index = shift_y; 674 for (i = 0; i < h; i++) { 675 memset(&lfm->lfl_y[index], filter_level, w); 676 index += 8; 677 } 678 } 679 680 // These set 1 in the current block size for the block size edges. 681 // For instance if the block size is 32x16, we'll set: 682 // above = 1111 683 // 0000 684 // and 685 // left = 1000 686 // = 1000 687 // NOTE : In this example the low bit is left most ( 1000 ) is stored as 688 // 1, not 8... 689 // 690 // U and V set things on a 16 bit scale. 691 // 692 *above_y |= above_prediction_mask[block_size] << shift_y; 693 *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; 694 *left_y |= left_prediction_mask[block_size] << shift_y; 695 *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; 696 697 // If the block has no coefficients and is not intra we skip applying 698 // the loop filter on block edges. 699 if (mi->skip && is_inter_block(mi)) return; 700 701 // Here we are adding a mask for the transform size. The transform 702 // size mask is set to be correct for a 64x64 prediction block size. We 703 // mask to match the size of the block we are working on and then shift it 704 // into place.. 705 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) 706 << shift_y; 707 *above_uv |= 708 (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv]) 709 << shift_uv; 710 711 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) 712 << shift_y; 713 *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv]) 714 << shift_uv; 715 716 // Here we are trying to determine what to do with the internal 4x4 block 717 // boundaries. These differ from the 4x4 boundaries on the outside edge of 718 // an 8x8 in that the internal ones can be skipped and don't depend on 719 // the prediction block size. 720 if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; 721 722 if (tx_size_uv == TX_4X4) 723 *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; 724 } 725 726 // This function does the same thing as the one above with the exception that 727 // it only affects the y masks. It exists because for blocks < 16x16 in size, 728 // we only update u and v masks on the first block. 729 static void build_y_mask(const loop_filter_info_n *const lfi_n, 730 const MODE_INFO *mi, const int shift_y, 731 LOOP_FILTER_MASK *lfm) { 732 const BLOCK_SIZE block_size = mi->sb_type; 733 const TX_SIZE tx_size_y = mi->tx_size; 734 const int filter_level = get_filter_level(lfi_n, mi); 735 uint64_t *const left_y = &lfm->left_y[tx_size_y]; 736 uint64_t *const above_y = &lfm->above_y[tx_size_y]; 737 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 738 int i; 739 740 if (!filter_level) { 741 return; 742 } else { 743 const int w = num_8x8_blocks_wide_lookup[block_size]; 744 const int h = num_8x8_blocks_high_lookup[block_size]; 745 int index = shift_y; 746 for (i = 0; i < h; i++) { 747 memset(&lfm->lfl_y[index], filter_level, w); 748 index += 8; 749 } 750 } 751 752 *above_y |= above_prediction_mask[block_size] << shift_y; 753 *left_y |= left_prediction_mask[block_size] << shift_y; 754 755 if (mi->skip && is_inter_block(mi)) return; 756 757 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) 758 << shift_y; 759 760 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) 761 << shift_y; 762 763 if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; 764 } 765 766 void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, 767 LOOP_FILTER_MASK *lfm) { 768 int i; 769 770 // The largest loopfilter we have is 16x16 so we use the 16x16 mask 771 // for 32x32 transforms also. 772 lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; 773 lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; 774 lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; 775 lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; 776 777 // We do at least 8 tap filter on every 32x32 even if the transform size 778 // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and 779 // remove it from the 4x4. 780 lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; 781 lfm->left_y[TX_4X4] &= ~left_border; 782 lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; 783 lfm->above_y[TX_4X4] &= ~above_border; 784 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; 785 lfm->left_uv[TX_4X4] &= ~left_border_uv; 786 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; 787 lfm->above_uv[TX_4X4] &= ~above_border_uv; 788 789 // We do some special edge handling. 790 if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { 791 const uint64_t rows = cm->mi_rows - mi_row; 792 793 // Each pixel inside the border gets a 1, 794 const uint64_t mask_y = (((uint64_t)1 << (rows << 3)) - 1); 795 const uint16_t mask_uv = (((uint16_t)1 << (((rows + 1) >> 1) << 2)) - 1); 796 797 // Remove values completely outside our border. 798 for (i = 0; i < TX_32X32; i++) { 799 lfm->left_y[i] &= mask_y; 800 lfm->above_y[i] &= mask_y; 801 lfm->left_uv[i] &= mask_uv; 802 lfm->above_uv[i] &= mask_uv; 803 } 804 lfm->int_4x4_y &= mask_y; 805 lfm->int_4x4_uv &= mask_uv; 806 807 // We don't apply a wide loop filter on the last uv block row. If set 808 // apply the shorter one instead. 809 if (rows == 1) { 810 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; 811 lfm->above_uv[TX_16X16] = 0; 812 } 813 if (rows == 5) { 814 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; 815 lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); 816 } 817 } 818 819 if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { 820 const uint64_t columns = cm->mi_cols - mi_col; 821 822 // Each pixel inside the border gets a 1, the multiply copies the border 823 // to where we need it. 824 const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; 825 const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; 826 827 // Internal edges are not applied on the last column of the image so 828 // we mask 1 more for the internal edges 829 const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; 830 831 // Remove the bits outside the image edge. 832 for (i = 0; i < TX_32X32; i++) { 833 lfm->left_y[i] &= mask_y; 834 lfm->above_y[i] &= mask_y; 835 lfm->left_uv[i] &= mask_uv; 836 lfm->above_uv[i] &= mask_uv; 837 } 838 lfm->int_4x4_y &= mask_y; 839 lfm->int_4x4_uv &= mask_uv_int; 840 841 // We don't apply a wide loop filter on the last uv column. If set 842 // apply the shorter one instead. 843 if (columns == 1) { 844 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; 845 lfm->left_uv[TX_16X16] = 0; 846 } 847 if (columns == 5) { 848 lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); 849 lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); 850 } 851 } 852 // We don't apply a loop filter on the first column in the image, mask that 853 // out. 854 if (mi_col == 0) { 855 for (i = 0; i < TX_32X32; i++) { 856 lfm->left_y[i] &= 0xfefefefefefefefeULL; 857 lfm->left_uv[i] &= 0xeeee; 858 } 859 } 860 861 // Assert if we try to apply 2 different loop filters at the same position. 862 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); 863 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); 864 assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); 865 assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); 866 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8])); 867 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); 868 assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); 869 assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); 870 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); 871 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); 872 assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); 873 assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); 874 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); 875 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); 876 assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); 877 assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); 878 } 879 880 // This function sets up the bit masks for the entire 64x64 region represented 881 // by mi_row, mi_col. 882 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, 883 MODE_INFO **mi8x8, const int mode_info_stride, 884 LOOP_FILTER_MASK *lfm) { 885 int idx_32, idx_16, idx_8; 886 const loop_filter_info_n *const lfi_n = &cm->lf_info; 887 MODE_INFO **mip = mi8x8; 888 MODE_INFO **mip2 = mi8x8; 889 890 // These are offsets to the next mi in the 64x64 block. It is what gets 891 // added to the mi ptr as we go through each loop. It helps us to avoid 892 // setting up special row and column counters for each index. The last step 893 // brings us out back to the starting position. 894 const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4, 895 -(mode_info_stride << 2) - 4 }; 896 const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2, 897 -(mode_info_stride << 1) - 2 }; 898 const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 }; 899 900 // Following variables represent shifts to position the current block 901 // mask over the appropriate block. A shift of 36 to the left will move 902 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left 903 // 4 rows to the appropriate spot. 904 const int shift_32_y[] = { 0, 4, 32, 36 }; 905 const int shift_16_y[] = { 0, 2, 16, 18 }; 906 const int shift_8_y[] = { 0, 1, 8, 9 }; 907 const int shift_32_uv[] = { 0, 2, 8, 10 }; 908 const int shift_16_uv[] = { 0, 1, 4, 5 }; 909 const int max_rows = 910 (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? cm->mi_rows - mi_row 911 : MI_BLOCK_SIZE); 912 const int max_cols = 913 (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col 914 : MI_BLOCK_SIZE); 915 916 vp9_zero(*lfm); 917 assert(mip[0] != NULL); 918 919 switch (mip[0]->sb_type) { 920 case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break; 921 case BLOCK_64X32: 922 build_masks(lfi_n, mip[0], 0, 0, lfm); 923 mip2 = mip + mode_info_stride * 4; 924 if (4 >= max_rows) break; 925 build_masks(lfi_n, mip2[0], 32, 8, lfm); 926 break; 927 case BLOCK_32X64: 928 build_masks(lfi_n, mip[0], 0, 0, lfm); 929 mip2 = mip + 4; 930 if (4 >= max_cols) break; 931 build_masks(lfi_n, mip2[0], 4, 2, lfm); 932 break; 933 default: 934 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { 935 const int shift_y = shift_32_y[idx_32]; 936 const int shift_uv = shift_32_uv[idx_32]; 937 const int mi_32_col_offset = ((idx_32 & 1) << 2); 938 const int mi_32_row_offset = ((idx_32 >> 1) << 2); 939 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) 940 continue; 941 switch (mip[0]->sb_type) { 942 case BLOCK_32X32: 943 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 944 break; 945 case BLOCK_32X16: 946 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 947 if (mi_32_row_offset + 2 >= max_rows) continue; 948 mip2 = mip + mode_info_stride * 2; 949 build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); 950 break; 951 case BLOCK_16X32: 952 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 953 if (mi_32_col_offset + 2 >= max_cols) continue; 954 mip2 = mip + 2; 955 build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); 956 break; 957 default: 958 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { 959 const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; 960 const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; 961 const int mi_16_col_offset = 962 mi_32_col_offset + ((idx_16 & 1) << 1); 963 const int mi_16_row_offset = 964 mi_32_row_offset + ((idx_16 >> 1) << 1); 965 966 if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) 967 continue; 968 969 switch (mip[0]->sb_type) { 970 case BLOCK_16X16: 971 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 972 break; 973 case BLOCK_16X8: 974 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 975 if (mi_16_row_offset + 1 >= max_rows) continue; 976 mip2 = mip + mode_info_stride; 977 build_y_mask(lfi_n, mip2[0], shift_y + 8, lfm); 978 break; 979 case BLOCK_8X16: 980 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 981 if (mi_16_col_offset + 1 >= max_cols) continue; 982 mip2 = mip + 1; 983 build_y_mask(lfi_n, mip2[0], shift_y + 1, lfm); 984 break; 985 default: { 986 const int shift_y = 987 shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; 988 build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); 989 mip += offset[0]; 990 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { 991 const int shift_y = shift_32_y[idx_32] + 992 shift_16_y[idx_16] + shift_8_y[idx_8]; 993 const int mi_8_col_offset = 994 mi_16_col_offset + ((idx_8 & 1)); 995 const int mi_8_row_offset = 996 mi_16_row_offset + ((idx_8 >> 1)); 997 998 if (mi_8_col_offset >= max_cols || 999 mi_8_row_offset >= max_rows) 1000 continue; 1001 build_y_mask(lfi_n, mip[0], shift_y, lfm); 1002 } 1003 break; 1004 } 1005 } 1006 } 1007 break; 1008 } 1009 } 1010 break; 1011 } 1012 } 1013 1014 static void filter_selectively_vert( 1015 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, 1016 unsigned int mask_4x4, unsigned int mask_4x4_int, 1017 const loop_filter_thresh *lfthr, const uint8_t *lfl) { 1018 unsigned int mask; 1019 1020 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; 1021 mask >>= 1) { 1022 const loop_filter_thresh *lfi = lfthr + *lfl; 1023 1024 if (mask & 1) { 1025 if (mask_16x16 & 1) { 1026 vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 1027 } else if (mask_8x8 & 1) { 1028 vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 1029 } else if (mask_4x4 & 1) { 1030 vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 1031 } 1032 } 1033 if (mask_4x4_int & 1) 1034 vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); 1035 s += 8; 1036 lfl += 1; 1037 mask_16x16 >>= 1; 1038 mask_8x8 >>= 1; 1039 mask_4x4 >>= 1; 1040 mask_4x4_int >>= 1; 1041 } 1042 } 1043 1044 #if CONFIG_VP9_HIGHBITDEPTH 1045 static void highbd_filter_selectively_vert( 1046 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, 1047 unsigned int mask_4x4, unsigned int mask_4x4_int, 1048 const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { 1049 unsigned int mask; 1050 1051 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; 1052 mask >>= 1) { 1053 const loop_filter_thresh *lfi = lfthr + *lfl; 1054 1055 if (mask & 1) { 1056 if (mask_16x16 & 1) { 1057 vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1058 bd); 1059 } else if (mask_8x8 & 1) { 1060 vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1061 bd); 1062 } else if (mask_4x4 & 1) { 1063 vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1064 bd); 1065 } 1066 } 1067 if (mask_4x4_int & 1) 1068 vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, 1069 lfi->hev_thr, bd); 1070 s += 8; 1071 lfl += 1; 1072 mask_16x16 >>= 1; 1073 mask_8x8 >>= 1; 1074 mask_4x4 >>= 1; 1075 mask_4x4_int >>= 1; 1076 } 1077 } 1078 #endif // CONFIG_VP9_HIGHBITDEPTH 1079 1080 void vp9_filter_block_plane_non420(VP9_COMMON *cm, 1081 struct macroblockd_plane *plane, 1082 MODE_INFO **mi_8x8, int mi_row, int mi_col) { 1083 const int ss_x = plane->subsampling_x; 1084 const int ss_y = plane->subsampling_y; 1085 const int row_step = 1 << ss_y; 1086 const int col_step = 1 << ss_x; 1087 const int row_step_stride = cm->mi_stride * row_step; 1088 struct buf_2d *const dst = &plane->dst; 1089 uint8_t *const dst0 = dst->buf; 1090 unsigned int mask_16x16[MI_BLOCK_SIZE]; 1091 unsigned int mask_8x8[MI_BLOCK_SIZE]; 1092 unsigned int mask_4x4[MI_BLOCK_SIZE]; 1093 unsigned int mask_4x4_int[MI_BLOCK_SIZE]; 1094 uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; 1095 int r, c; 1096 1097 vp9_zero(mask_16x16); 1098 vp9_zero(mask_8x8); 1099 vp9_zero(mask_4x4); 1100 vp9_zero(mask_4x4_int); 1101 vp9_zero(lfl); 1102 1103 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { 1104 unsigned int mask_16x16_c = 0; 1105 unsigned int mask_8x8_c = 0; 1106 unsigned int mask_4x4_c = 0; 1107 unsigned int border_mask; 1108 1109 // Determine the vertical edges that need filtering 1110 for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { 1111 const MODE_INFO *mi = mi_8x8[c]; 1112 const BLOCK_SIZE sb_type = mi[0].sb_type; 1113 const int skip_this = mi[0].skip && is_inter_block(mi); 1114 // left edge of current unit is block/partition edge -> no skip 1115 const int block_edge_left = 1116 (num_4x4_blocks_wide_lookup[sb_type] > 1) 1117 ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) 1118 : 1; 1119 const int skip_this_c = skip_this && !block_edge_left; 1120 // top edge of current unit is block/partition edge -> no skip 1121 const int block_edge_above = 1122 (num_4x4_blocks_high_lookup[sb_type] > 1) 1123 ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) 1124 : 1; 1125 const int skip_this_r = skip_this && !block_edge_above; 1126 const TX_SIZE tx_size = get_uv_tx_size(mi, plane); 1127 const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; 1128 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; 1129 1130 // Filter level can vary per MI 1131 if (!(lfl[(r << 3) + (c >> ss_x)] = get_filter_level(&cm->lf_info, mi))) 1132 continue; 1133 1134 // Build masks based on the transform size of each block 1135 if (tx_size == TX_32X32) { 1136 if (!skip_this_c && ((c >> ss_x) & 3) == 0) { 1137 if (!skip_border_4x4_c) 1138 mask_16x16_c |= 1 << (c >> ss_x); 1139 else 1140 mask_8x8_c |= 1 << (c >> ss_x); 1141 } 1142 if (!skip_this_r && ((r >> ss_y) & 3) == 0) { 1143 if (!skip_border_4x4_r) 1144 mask_16x16[r] |= 1 << (c >> ss_x); 1145 else 1146 mask_8x8[r] |= 1 << (c >> ss_x); 1147 } 1148 } else if (tx_size == TX_16X16) { 1149 if (!skip_this_c && ((c >> ss_x) & 1) == 0) { 1150 if (!skip_border_4x4_c) 1151 mask_16x16_c |= 1 << (c >> ss_x); 1152 else 1153 mask_8x8_c |= 1 << (c >> ss_x); 1154 } 1155 if (!skip_this_r && ((r >> ss_y) & 1) == 0) { 1156 if (!skip_border_4x4_r) 1157 mask_16x16[r] |= 1 << (c >> ss_x); 1158 else 1159 mask_8x8[r] |= 1 << (c >> ss_x); 1160 } 1161 } else { 1162 // force 8x8 filtering on 32x32 boundaries 1163 if (!skip_this_c) { 1164 if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) 1165 mask_8x8_c |= 1 << (c >> ss_x); 1166 else 1167 mask_4x4_c |= 1 << (c >> ss_x); 1168 } 1169 1170 if (!skip_this_r) { 1171 if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) 1172 mask_8x8[r] |= 1 << (c >> ss_x); 1173 else 1174 mask_4x4[r] |= 1 << (c >> ss_x); 1175 } 1176 1177 if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) 1178 mask_4x4_int[r] |= 1 << (c >> ss_x); 1179 } 1180 } 1181 1182 // Disable filtering on the leftmost column 1183 border_mask = ~(mi_col == 0 ? 1 : 0); 1184 #if CONFIG_VP9_HIGHBITDEPTH 1185 if (cm->use_highbitdepth) { 1186 highbd_filter_selectively_vert( 1187 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, 1188 mask_16x16_c & border_mask, mask_8x8_c & border_mask, 1189 mask_4x4_c & border_mask, mask_4x4_int[r], cm->lf_info.lfthr, 1190 &lfl[r << 3], (int)cm->bit_depth); 1191 } else { 1192 #endif // CONFIG_VP9_HIGHBITDEPTH 1193 filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, 1194 mask_8x8_c & border_mask, 1195 mask_4x4_c & border_mask, mask_4x4_int[r], 1196 cm->lf_info.lfthr, &lfl[r << 3]); 1197 #if CONFIG_VP9_HIGHBITDEPTH 1198 } 1199 #endif // CONFIG_VP9_HIGHBITDEPTH 1200 dst->buf += 8 * dst->stride; 1201 mi_8x8 += row_step_stride; 1202 } 1203 1204 // Now do horizontal pass 1205 dst->buf = dst0; 1206 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { 1207 const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; 1208 const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; 1209 1210 unsigned int mask_16x16_r; 1211 unsigned int mask_8x8_r; 1212 unsigned int mask_4x4_r; 1213 1214 if (mi_row + r == 0) { 1215 mask_16x16_r = 0; 1216 mask_8x8_r = 0; 1217 mask_4x4_r = 0; 1218 } else { 1219 mask_16x16_r = mask_16x16[r]; 1220 mask_8x8_r = mask_8x8[r]; 1221 mask_4x4_r = mask_4x4[r]; 1222 } 1223 #if CONFIG_VP9_HIGHBITDEPTH 1224 if (cm->use_highbitdepth) { 1225 highbd_filter_selectively_horiz( 1226 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, 1227 mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl[r << 3], 1228 (int)cm->bit_depth); 1229 } else { 1230 #endif // CONFIG_VP9_HIGHBITDEPTH 1231 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, 1232 mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, 1233 &lfl[r << 3]); 1234 #if CONFIG_VP9_HIGHBITDEPTH 1235 } 1236 #endif // CONFIG_VP9_HIGHBITDEPTH 1237 dst->buf += 8 * dst->stride; 1238 } 1239 } 1240 1241 void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, 1242 struct macroblockd_plane *const plane, 1243 int mi_row, LOOP_FILTER_MASK *lfm) { 1244 struct buf_2d *const dst = &plane->dst; 1245 uint8_t *const dst0 = dst->buf; 1246 int r; 1247 uint64_t mask_16x16 = lfm->left_y[TX_16X16]; 1248 uint64_t mask_8x8 = lfm->left_y[TX_8X8]; 1249 uint64_t mask_4x4 = lfm->left_y[TX_4X4]; 1250 uint64_t mask_4x4_int = lfm->int_4x4_y; 1251 1252 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); 1253 1254 // Vertical pass: do 2 rows at one time 1255 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { 1256 #if CONFIG_VP9_HIGHBITDEPTH 1257 if (cm->use_highbitdepth) { 1258 // Disable filtering on the leftmost column. 1259 highbd_filter_selectively_vert_row2( 1260 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, 1261 (unsigned int)mask_16x16, (unsigned int)mask_8x8, 1262 (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, 1263 &lfm->lfl_y[r << 3], (int)cm->bit_depth); 1264 } else { 1265 #endif // CONFIG_VP9_HIGHBITDEPTH 1266 // Disable filtering on the leftmost column. 1267 filter_selectively_vert_row2( 1268 plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16, 1269 (unsigned int)mask_8x8, (unsigned int)mask_4x4, 1270 (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); 1271 #if CONFIG_VP9_HIGHBITDEPTH 1272 } 1273 #endif // CONFIG_VP9_HIGHBITDEPTH 1274 dst->buf += 16 * dst->stride; 1275 mask_16x16 >>= 16; 1276 mask_8x8 >>= 16; 1277 mask_4x4 >>= 16; 1278 mask_4x4_int >>= 16; 1279 } 1280 1281 // Horizontal pass 1282 dst->buf = dst0; 1283 mask_16x16 = lfm->above_y[TX_16X16]; 1284 mask_8x8 = lfm->above_y[TX_8X8]; 1285 mask_4x4 = lfm->above_y[TX_4X4]; 1286 mask_4x4_int = lfm->int_4x4_y; 1287 1288 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { 1289 unsigned int mask_16x16_r; 1290 unsigned int mask_8x8_r; 1291 unsigned int mask_4x4_r; 1292 1293 if (mi_row + r == 0) { 1294 mask_16x16_r = 0; 1295 mask_8x8_r = 0; 1296 mask_4x4_r = 0; 1297 } else { 1298 mask_16x16_r = mask_16x16 & 0xff; 1299 mask_8x8_r = mask_8x8 & 0xff; 1300 mask_4x4_r = mask_4x4 & 0xff; 1301 } 1302 1303 #if CONFIG_VP9_HIGHBITDEPTH 1304 if (cm->use_highbitdepth) { 1305 highbd_filter_selectively_horiz( 1306 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, 1307 mask_4x4_r, mask_4x4_int & 0xff, cm->lf_info.lfthr, 1308 &lfm->lfl_y[r << 3], (int)cm->bit_depth); 1309 } else { 1310 #endif // CONFIG_VP9_HIGHBITDEPTH 1311 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, 1312 mask_4x4_r, mask_4x4_int & 0xff, 1313 cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); 1314 #if CONFIG_VP9_HIGHBITDEPTH 1315 } 1316 #endif // CONFIG_VP9_HIGHBITDEPTH 1317 1318 dst->buf += 8 * dst->stride; 1319 mask_16x16 >>= 8; 1320 mask_8x8 >>= 8; 1321 mask_4x4 >>= 8; 1322 mask_4x4_int >>= 8; 1323 } 1324 } 1325 1326 void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, 1327 struct macroblockd_plane *const plane, 1328 int mi_row, LOOP_FILTER_MASK *lfm) { 1329 struct buf_2d *const dst = &plane->dst; 1330 uint8_t *const dst0 = dst->buf; 1331 int r, c; 1332 uint8_t lfl_uv[16]; 1333 1334 uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; 1335 uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; 1336 uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; 1337 uint16_t mask_4x4_int = lfm->int_4x4_uv; 1338 1339 vp9_zero(lfl_uv); 1340 1341 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); 1342 1343 // Vertical pass: do 2 rows at one time 1344 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { 1345 for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { 1346 lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; 1347 lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; 1348 } 1349 1350 #if CONFIG_VP9_HIGHBITDEPTH 1351 if (cm->use_highbitdepth) { 1352 // Disable filtering on the leftmost column. 1353 highbd_filter_selectively_vert_row2( 1354 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, 1355 (unsigned int)mask_16x16, (unsigned int)mask_8x8, 1356 (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, 1357 &lfl_uv[r << 1], (int)cm->bit_depth); 1358 } else { 1359 #endif // CONFIG_VP9_HIGHBITDEPTH 1360 // Disable filtering on the leftmost column. 1361 filter_selectively_vert_row2( 1362 plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16, 1363 (unsigned int)mask_8x8, (unsigned int)mask_4x4, 1364 (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfl_uv[r << 1]); 1365 #if CONFIG_VP9_HIGHBITDEPTH 1366 } 1367 #endif // CONFIG_VP9_HIGHBITDEPTH 1368 1369 dst->buf += 16 * dst->stride; 1370 mask_16x16 >>= 8; 1371 mask_8x8 >>= 8; 1372 mask_4x4 >>= 8; 1373 mask_4x4_int >>= 8; 1374 } 1375 1376 // Horizontal pass 1377 dst->buf = dst0; 1378 mask_16x16 = lfm->above_uv[TX_16X16]; 1379 mask_8x8 = lfm->above_uv[TX_8X8]; 1380 mask_4x4 = lfm->above_uv[TX_4X4]; 1381 mask_4x4_int = lfm->int_4x4_uv; 1382 1383 for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { 1384 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; 1385 const unsigned int mask_4x4_int_r = 1386 skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); 1387 unsigned int mask_16x16_r; 1388 unsigned int mask_8x8_r; 1389 unsigned int mask_4x4_r; 1390 1391 if (mi_row + r == 0) { 1392 mask_16x16_r = 0; 1393 mask_8x8_r = 0; 1394 mask_4x4_r = 0; 1395 } else { 1396 mask_16x16_r = mask_16x16 & 0xf; 1397 mask_8x8_r = mask_8x8 & 0xf; 1398 mask_4x4_r = mask_4x4 & 0xf; 1399 } 1400 1401 #if CONFIG_VP9_HIGHBITDEPTH 1402 if (cm->use_highbitdepth) { 1403 highbd_filter_selectively_horiz( 1404 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, 1405 mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1], 1406 (int)cm->bit_depth); 1407 } else { 1408 #endif // CONFIG_VP9_HIGHBITDEPTH 1409 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, 1410 mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, 1411 &lfl_uv[r << 1]); 1412 #if CONFIG_VP9_HIGHBITDEPTH 1413 } 1414 #endif // CONFIG_VP9_HIGHBITDEPTH 1415 1416 dst->buf += 8 * dst->stride; 1417 mask_16x16 >>= 4; 1418 mask_8x8 >>= 4; 1419 mask_4x4 >>= 4; 1420 mask_4x4_int >>= 4; 1421 } 1422 } 1423 1424 static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, 1425 struct macroblockd_plane planes[MAX_MB_PLANE], 1426 int start, int stop, int y_only) { 1427 const int num_planes = y_only ? 1 : MAX_MB_PLANE; 1428 enum lf_path path; 1429 int mi_row, mi_col; 1430 1431 if (y_only) 1432 path = LF_PATH_444; 1433 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) 1434 path = LF_PATH_420; 1435 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) 1436 path = LF_PATH_444; 1437 else 1438 path = LF_PATH_SLOW; 1439 1440 for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { 1441 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; 1442 LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); 1443 1444 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { 1445 int plane; 1446 1447 vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); 1448 1449 // TODO(jimbankoski): For 444 only need to do y mask. 1450 vp9_adjust_mask(cm, mi_row, mi_col, lfm); 1451 1452 vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); 1453 for (plane = 1; plane < num_planes; ++plane) { 1454 switch (path) { 1455 case LF_PATH_420: 1456 vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); 1457 break; 1458 case LF_PATH_444: 1459 vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); 1460 break; 1461 case LF_PATH_SLOW: 1462 vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, 1463 mi_row, mi_col); 1464 break; 1465 } 1466 } 1467 } 1468 } 1469 } 1470 1471 void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, 1472 MACROBLOCKD *xd, int frame_filter_level, int y_only, 1473 int partial_frame) { 1474 int start_mi_row, end_mi_row, mi_rows_to_filter; 1475 if (!frame_filter_level) return; 1476 start_mi_row = 0; 1477 mi_rows_to_filter = cm->mi_rows; 1478 if (partial_frame && cm->mi_rows > 8) { 1479 start_mi_row = cm->mi_rows >> 1; 1480 start_mi_row &= 0xfffffff8; 1481 mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); 1482 } 1483 end_mi_row = start_mi_row + mi_rows_to_filter; 1484 loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); 1485 } 1486 1487 // Used by the encoder to build the loopfilter masks. 1488 // TODO(slavarnway): Do the encoder the same way the decoder does it and 1489 // build the masks in line as part of the encode process. 1490 void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, 1491 int partial_frame) { 1492 int start_mi_row, end_mi_row, mi_rows_to_filter; 1493 int mi_col, mi_row; 1494 if (!frame_filter_level) return; 1495 start_mi_row = 0; 1496 mi_rows_to_filter = cm->mi_rows; 1497 if (partial_frame && cm->mi_rows > 8) { 1498 start_mi_row = cm->mi_rows >> 1; 1499 start_mi_row &= 0xfffffff8; 1500 mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); 1501 } 1502 end_mi_row = start_mi_row + mi_rows_to_filter; 1503 1504 vp9_loop_filter_frame_init(cm, frame_filter_level); 1505 1506 for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { 1507 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; 1508 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { 1509 // vp9_setup_mask() zeros lfm 1510 vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, 1511 get_lfm(&cm->lf, mi_row, mi_col)); 1512 } 1513 } 1514 } 1515 1516 // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 1517 // or greater area. 1518 static const uint8_t first_block_in_16x16[8][8] = { 1519 { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, 1520 { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, 1521 { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, 1522 { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } 1523 }; 1524 1525 // This function sets up the bit masks for a block represented 1526 // by mi_row, mi_col in a 64x64 region. 1527 // TODO(SJL): This function only works for yv12. 1528 void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col, 1529 int bw, int bh) { 1530 const BLOCK_SIZE block_size = mi->sb_type; 1531 const TX_SIZE tx_size_y = mi->tx_size; 1532 const loop_filter_info_n *const lfi_n = &cm->lf_info; 1533 const int filter_level = get_filter_level(lfi_n, mi); 1534 const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1]; 1535 LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); 1536 uint64_t *const left_y = &lfm->left_y[tx_size_y]; 1537 uint64_t *const above_y = &lfm->above_y[tx_size_y]; 1538 uint64_t *const int_4x4_y = &lfm->int_4x4_y; 1539 uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; 1540 uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; 1541 uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; 1542 const int row_in_sb = (mi_row & 7); 1543 const int col_in_sb = (mi_col & 7); 1544 const int shift_y = col_in_sb + (row_in_sb << 3); 1545 const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); 1546 const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; 1547 1548 if (!filter_level) { 1549 return; 1550 } else { 1551 int index = shift_y; 1552 int i; 1553 for (i = 0; i < bh; i++) { 1554 memset(&lfm->lfl_y[index], filter_level, bw); 1555 index += 8; 1556 } 1557 } 1558 1559 // These set 1 in the current block size for the block size edges. 1560 // For instance if the block size is 32x16, we'll set: 1561 // above = 1111 1562 // 0000 1563 // and 1564 // left = 1000 1565 // = 1000 1566 // NOTE : In this example the low bit is left most ( 1000 ) is stored as 1567 // 1, not 8... 1568 // 1569 // U and V set things on a 16 bit scale. 1570 // 1571 *above_y |= above_prediction_mask[block_size] << shift_y; 1572 *left_y |= left_prediction_mask[block_size] << shift_y; 1573 1574 if (build_uv) { 1575 *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; 1576 *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; 1577 } 1578 1579 // If the block has no coefficients and is not intra we skip applying 1580 // the loop filter on block edges. 1581 if (mi->skip && is_inter_block(mi)) return; 1582 1583 // Add a mask for the transform size. The transform size mask is set to 1584 // be correct for a 64x64 prediction block size. Mask to match the size of 1585 // the block we are working on and then shift it into place. 1586 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) 1587 << shift_y; 1588 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) 1589 << shift_y; 1590 1591 if (build_uv) { 1592 *above_uv |= 1593 (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv]) 1594 << shift_uv; 1595 1596 *left_uv |= 1597 (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv]) 1598 << shift_uv; 1599 } 1600 1601 // Try to determine what to do with the internal 4x4 block boundaries. These 1602 // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the 1603 // internal ones can be skipped and don't depend on the prediction block size. 1604 if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; 1605 1606 if (build_uv && tx_size_uv == TX_4X4) 1607 *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; 1608 } 1609 1610 void vp9_loop_filter_data_reset( 1611 LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, 1612 struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { 1613 lf_data->frame_buffer = frame_buffer; 1614 lf_data->cm = cm; 1615 lf_data->start = 0; 1616 lf_data->stop = 0; 1617 lf_data->y_only = 0; 1618 memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); 1619 } 1620 1621 void vp9_reset_lfm(VP9_COMMON *const cm) { 1622 if (cm->lf.filter_level) { 1623 memset(cm->lf.lfm, 0, 1624 ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * 1625 sizeof(*cm->lf.lfm)); 1626 } 1627 } 1628 1629 int vp9_loop_filter_worker(void *arg1, void *unused) { 1630 LFWorkerData *const lf_data = (LFWorkerData *)arg1; 1631 (void)unused; 1632 loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, 1633 lf_data->start, lf_data->stop, lf_data->y_only); 1634 return 1; 1635 } 1636