1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <limits.h> 12 #include <string.h> 13 14 #include "vpx_config.h" 15 #include "vp8_rtcd.h" 16 #include "vpx/vpx_integer.h" 17 #include "blockd.h" 18 #include "reconinter.h" 19 #if CONFIG_RUNTIME_CPU_DETECT 20 #include "onyxc_int.h" 21 #endif 22 23 void vp8_copy_mem16x16_c(unsigned char *src, int src_stride, unsigned char *dst, 24 int dst_stride) { 25 int r; 26 27 for (r = 0; r < 16; ++r) { 28 memcpy(dst, src, 16); 29 30 src += src_stride; 31 dst += dst_stride; 32 } 33 } 34 35 void vp8_copy_mem8x8_c(unsigned char *src, int src_stride, unsigned char *dst, 36 int dst_stride) { 37 int r; 38 39 for (r = 0; r < 8; ++r) { 40 memcpy(dst, src, 8); 41 42 src += src_stride; 43 dst += dst_stride; 44 } 45 } 46 47 void vp8_copy_mem8x4_c(unsigned char *src, int src_stride, unsigned char *dst, 48 int dst_stride) { 49 int r; 50 51 for (r = 0; r < 4; ++r) { 52 memcpy(dst, src, 8); 53 54 src += src_stride; 55 dst += dst_stride; 56 } 57 } 58 59 void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, 60 int pre_stride, vp8_subpix_fn_t sppf) { 61 int r; 62 unsigned char *pred_ptr = d->predictor; 63 unsigned char *ptr; 64 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + 65 (d->bmi.mv.as_mv.col >> 3); 66 67 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { 68 sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, 69 pred_ptr, pitch); 70 } else { 71 for (r = 0; r < 4; ++r) { 72 pred_ptr[0] = ptr[0]; 73 pred_ptr[1] = ptr[1]; 74 pred_ptr[2] = ptr[2]; 75 pred_ptr[3] = ptr[3]; 76 pred_ptr += pitch; 77 ptr += pre_stride; 78 } 79 } 80 } 81 82 static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, 83 unsigned char *dst, int dst_stride, 84 unsigned char *base_pre, int pre_stride) { 85 unsigned char *ptr; 86 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + 87 (d->bmi.mv.as_mv.col >> 3); 88 89 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { 90 x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, 91 d->bmi.mv.as_mv.row & 7, dst, dst_stride); 92 } else { 93 vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); 94 } 95 } 96 97 static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, 98 unsigned char *dst, int dst_stride, 99 unsigned char *base_pre, int pre_stride) { 100 unsigned char *ptr; 101 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + 102 (d->bmi.mv.as_mv.col >> 3); 103 104 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { 105 x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, 106 d->bmi.mv.as_mv.row & 7, dst, dst_stride); 107 } else { 108 vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); 109 } 110 } 111 112 static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, 113 int dst_stride, unsigned char *base_pre, 114 int pre_stride, vp8_subpix_fn_t sppf) { 115 int r; 116 unsigned char *ptr; 117 ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + 118 (d->bmi.mv.as_mv.col >> 3); 119 120 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { 121 sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, 122 dst_stride); 123 } else { 124 for (r = 0; r < 4; ++r) { 125 dst[0] = ptr[0]; 126 dst[1] = ptr[1]; 127 dst[2] = ptr[2]; 128 dst[3] = ptr[3]; 129 dst += dst_stride; 130 ptr += pre_stride; 131 } 132 } 133 } 134 135 /*encoder only*/ 136 void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) { 137 unsigned char *uptr, *vptr; 138 unsigned char *upred_ptr = &x->predictor[256]; 139 unsigned char *vpred_ptr = &x->predictor[320]; 140 141 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; 142 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; 143 int offset; 144 int pre_stride = x->pre.uv_stride; 145 146 /* calc uv motion vectors */ 147 mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); 148 mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); 149 mv_row /= 2; 150 mv_col /= 2; 151 mv_row &= x->fullpixel_mask; 152 mv_col &= x->fullpixel_mask; 153 154 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); 155 uptr = x->pre.u_buffer + offset; 156 vptr = x->pre.v_buffer + offset; 157 158 if ((mv_row | mv_col) & 7) { 159 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 160 8); 161 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 162 8); 163 } else { 164 vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); 165 vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); 166 } 167 } 168 169 /*encoder only*/ 170 void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) { 171 int i, j; 172 int pre_stride = x->pre.uv_stride; 173 unsigned char *base_pre; 174 175 /* build uv mvs */ 176 for (i = 0; i < 2; ++i) { 177 for (j = 0; j < 2; ++j) { 178 int yoffset = i * 8 + j * 2; 179 int uoffset = 16 + i * 2 + j; 180 int voffset = 20 + i * 2 + j; 181 182 int temp; 183 184 temp = x->block[yoffset].bmi.mv.as_mv.row + 185 x->block[yoffset + 1].bmi.mv.as_mv.row + 186 x->block[yoffset + 4].bmi.mv.as_mv.row + 187 x->block[yoffset + 5].bmi.mv.as_mv.row; 188 189 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 190 191 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; 192 193 temp = x->block[yoffset].bmi.mv.as_mv.col + 194 x->block[yoffset + 1].bmi.mv.as_mv.col + 195 x->block[yoffset + 4].bmi.mv.as_mv.col + 196 x->block[yoffset + 5].bmi.mv.as_mv.col; 197 198 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 199 200 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; 201 202 x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; 203 } 204 } 205 206 base_pre = x->pre.u_buffer; 207 for (i = 16; i < 20; i += 2) { 208 BLOCKD *d0 = &x->block[i]; 209 BLOCKD *d1 = &x->block[i + 1]; 210 211 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { 212 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); 213 } else { 214 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, 215 x->subpixel_predict); 216 vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, 217 x->subpixel_predict); 218 } 219 } 220 221 base_pre = x->pre.v_buffer; 222 for (i = 20; i < 24; i += 2) { 223 BLOCKD *d0 = &x->block[i]; 224 BLOCKD *d1 = &x->block[i + 1]; 225 226 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { 227 build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); 228 } else { 229 vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, 230 x->subpixel_predict); 231 vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, 232 x->subpixel_predict); 233 } 234 } 235 } 236 237 /*encoder only*/ 238 void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y, 239 int dst_ystride) { 240 unsigned char *ptr_base; 241 unsigned char *ptr; 242 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; 243 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; 244 int pre_stride = x->pre.y_stride; 245 246 ptr_base = x->pre.y_buffer; 247 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); 248 249 if ((mv_row | mv_col) & 7) { 250 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, 251 dst_ystride); 252 } else { 253 vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); 254 } 255 } 256 257 static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { 258 /* If the MV points so far into the UMV border that no visible pixels 259 * are used for reconstruction, the subpel part of the MV can be 260 * discarded and the MV limited to 16 pixels with equivalent results. 261 * 262 * This limit kicks in at 19 pixels for the top and left edges, for 263 * the 16 pixels plus 3 taps right of the central pixel when subpel 264 * filtering. The bottom and right edges use 16 pixels plus 2 pixels 265 * left of the central pixel when filtering. 266 */ 267 if (mv->col < (xd->mb_to_left_edge - (19 << 3))) { 268 mv->col = xd->mb_to_left_edge - (16 << 3); 269 } else if (mv->col > xd->mb_to_right_edge + (18 << 3)) { 270 mv->col = xd->mb_to_right_edge + (16 << 3); 271 } 272 273 if (mv->row < (xd->mb_to_top_edge - (19 << 3))) { 274 mv->row = xd->mb_to_top_edge - (16 << 3); 275 } else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) { 276 mv->row = xd->mb_to_bottom_edge + (16 << 3); 277 } 278 } 279 280 /* A version of the above function for chroma block MVs.*/ 281 static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { 282 mv->col = (2 * mv->col < (xd->mb_to_left_edge - (19 << 3))) 283 ? (xd->mb_to_left_edge - (16 << 3)) >> 1 284 : mv->col; 285 mv->col = (2 * mv->col > xd->mb_to_right_edge + (18 << 3)) 286 ? (xd->mb_to_right_edge + (16 << 3)) >> 1 287 : mv->col; 288 289 mv->row = (2 * mv->row < (xd->mb_to_top_edge - (19 << 3))) 290 ? (xd->mb_to_top_edge - (16 << 3)) >> 1 291 : mv->row; 292 mv->row = (2 * mv->row > xd->mb_to_bottom_edge + (18 << 3)) 293 ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 294 : mv->row; 295 } 296 297 void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y, 298 unsigned char *dst_u, 299 unsigned char *dst_v, int dst_ystride, 300 int dst_uvstride) { 301 int offset; 302 unsigned char *ptr; 303 unsigned char *uptr, *vptr; 304 305 int_mv _16x16mv; 306 307 unsigned char *ptr_base = x->pre.y_buffer; 308 int pre_stride = x->pre.y_stride; 309 310 _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; 311 312 if (x->mode_info_context->mbmi.need_to_clamp_mvs) { 313 clamp_mv_to_umv_border(&_16x16mv.as_mv, x); 314 } 315 316 ptr = ptr_base + (_16x16mv.as_mv.row >> 3) * pre_stride + 317 (_16x16mv.as_mv.col >> 3); 318 319 if (_16x16mv.as_int & 0x00070007) { 320 x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, 321 _16x16mv.as_mv.row & 7, dst_y, dst_ystride); 322 } else { 323 vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); 324 } 325 326 /* calc uv motion vectors */ 327 _16x16mv.as_mv.row += 328 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); 329 _16x16mv.as_mv.col += 330 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); 331 _16x16mv.as_mv.row /= 2; 332 _16x16mv.as_mv.col /= 2; 333 _16x16mv.as_mv.row &= x->fullpixel_mask; 334 _16x16mv.as_mv.col &= x->fullpixel_mask; 335 336 pre_stride >>= 1; 337 offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); 338 uptr = x->pre.u_buffer + offset; 339 vptr = x->pre.v_buffer + offset; 340 341 if (_16x16mv.as_int & 0x00070007) { 342 x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, 343 _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); 344 x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, 345 _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); 346 } else { 347 vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); 348 vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); 349 } 350 } 351 352 static void build_inter4x4_predictors_mb(MACROBLOCKD *x) { 353 int i; 354 unsigned char *base_dst = x->dst.y_buffer; 355 unsigned char *base_pre = x->pre.y_buffer; 356 357 if (x->mode_info_context->mbmi.partitioning < 3) { 358 BLOCKD *b; 359 int dst_stride = x->dst.y_stride; 360 361 x->block[0].bmi = x->mode_info_context->bmi[0]; 362 x->block[2].bmi = x->mode_info_context->bmi[2]; 363 x->block[8].bmi = x->mode_info_context->bmi[8]; 364 x->block[10].bmi = x->mode_info_context->bmi[10]; 365 if (x->mode_info_context->mbmi.need_to_clamp_mvs) { 366 clamp_mv_to_umv_border(&x->block[0].bmi.mv.as_mv, x); 367 clamp_mv_to_umv_border(&x->block[2].bmi.mv.as_mv, x); 368 clamp_mv_to_umv_border(&x->block[8].bmi.mv.as_mv, x); 369 clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); 370 } 371 372 b = &x->block[0]; 373 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, 374 dst_stride); 375 b = &x->block[2]; 376 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, 377 dst_stride); 378 b = &x->block[8]; 379 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, 380 dst_stride); 381 b = &x->block[10]; 382 build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, 383 dst_stride); 384 } else { 385 for (i = 0; i < 16; i += 2) { 386 BLOCKD *d0 = &x->block[i]; 387 BLOCKD *d1 = &x->block[i + 1]; 388 int dst_stride = x->dst.y_stride; 389 390 x->block[i + 0].bmi = x->mode_info_context->bmi[i + 0]; 391 x->block[i + 1].bmi = x->mode_info_context->bmi[i + 1]; 392 if (x->mode_info_context->mbmi.need_to_clamp_mvs) { 393 clamp_mv_to_umv_border(&x->block[i + 0].bmi.mv.as_mv, x); 394 clamp_mv_to_umv_border(&x->block[i + 1].bmi.mv.as_mv, x); 395 } 396 397 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { 398 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, 399 base_pre, dst_stride); 400 } else { 401 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, 402 base_pre, dst_stride, x->subpixel_predict); 403 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, 404 base_pre, dst_stride, x->subpixel_predict); 405 } 406 } 407 } 408 base_dst = x->dst.u_buffer; 409 base_pre = x->pre.u_buffer; 410 for (i = 16; i < 20; i += 2) { 411 BLOCKD *d0 = &x->block[i]; 412 BLOCKD *d1 = &x->block[i + 1]; 413 int dst_stride = x->dst.uv_stride; 414 415 /* Note: uv mvs already clamped in build_4x4uvmvs() */ 416 417 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { 418 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, 419 base_pre, dst_stride); 420 } else { 421 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, 422 dst_stride, x->subpixel_predict); 423 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, 424 dst_stride, x->subpixel_predict); 425 } 426 } 427 428 base_dst = x->dst.v_buffer; 429 base_pre = x->pre.v_buffer; 430 for (i = 20; i < 24; i += 2) { 431 BLOCKD *d0 = &x->block[i]; 432 BLOCKD *d1 = &x->block[i + 1]; 433 int dst_stride = x->dst.uv_stride; 434 435 /* Note: uv mvs already clamped in build_4x4uvmvs() */ 436 437 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { 438 build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, 439 base_pre, dst_stride); 440 } else { 441 build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, 442 dst_stride, x->subpixel_predict); 443 build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, 444 dst_stride, x->subpixel_predict); 445 } 446 } 447 } 448 449 static void build_4x4uvmvs(MACROBLOCKD *x) { 450 int i, j; 451 452 for (i = 0; i < 2; ++i) { 453 for (j = 0; j < 2; ++j) { 454 int yoffset = i * 8 + j * 2; 455 int uoffset = 16 + i * 2 + j; 456 int voffset = 20 + i * 2 + j; 457 458 int temp; 459 460 temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row + 461 x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row + 462 x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row + 463 x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; 464 465 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 466 467 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; 468 469 temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col + 470 x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col + 471 x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col + 472 x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; 473 474 temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); 475 476 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; 477 478 if (x->mode_info_context->mbmi.need_to_clamp_mvs) { 479 clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); 480 } 481 482 x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; 483 } 484 } 485 } 486 487 void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) { 488 if (xd->mode_info_context->mbmi.mode != SPLITMV) { 489 vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, 490 xd->dst.v_buffer, xd->dst.y_stride, 491 xd->dst.uv_stride); 492 } else { 493 build_4x4uvmvs(xd); 494 build_inter4x4_predictors_mb(xd); 495 } 496 } 497