1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "./vp9_rtcd.h" 12 13 #include "vpx_ports/mem.h" 14 #include "vpx/vpx_integer.h" 15 16 #include "vp9/common/vp9_common.h" 17 #include "vp9/common/vp9_filter.h" 18 19 #include "vp9/encoder/vp9_variance.h" 20 21 void variance(const uint8_t *src_ptr, 22 int source_stride, 23 const uint8_t *ref_ptr, 24 int recon_stride, 25 int w, 26 int h, 27 unsigned int *sse, 28 int *sum) { 29 int i, j; 30 int diff; 31 32 *sum = 0; 33 *sse = 0; 34 35 for (i = 0; i < h; i++) { 36 for (j = 0; j < w; j++) { 37 diff = src_ptr[j] - ref_ptr[j]; 38 *sum += diff; 39 *sse += diff * diff; 40 } 41 42 src_ptr += source_stride; 43 ref_ptr += recon_stride; 44 } 45 } 46 47 /**************************************************************************** 48 * 49 * ROUTINE : filter_block2d_bil_first_pass 50 * 51 * INPUTS : uint8_t *src_ptr : Pointer to source block. 52 * uint32_t src_pixels_per_line : Stride of input block. 53 * uint32_t pixel_step : Offset between filter input 54 * samples (see notes). 55 * uint32_t output_height : Input block height. 56 * uint32_t output_width : Input block width. 57 * int32_t *vp9_filter : Array of 2 bi-linear filter 58 * taps. 59 * 60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. 61 * 62 * RETURNS : void 63 * 64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 65 * either horizontal or vertical direction to produce the 66 * filtered output block. Used to implement first-pass 67 * of 2-D separable filter. 68 * 69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. 70 * Two filter taps should sum to VP9_FILTER_WEIGHT. 71 * pixel_step defines whether the filter is applied 72 * horizontally (pixel_step=1) or vertically (pixel_step= 73 * stride). 74 * It defines the offset required to move from one input 75 * to the next. 76 * 77 ****************************************************************************/ 78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, 79 uint16_t *output_ptr, 80 unsigned int src_pixels_per_line, 81 int pixel_step, 82 unsigned int output_height, 83 unsigned int output_width, 84 const int16_t *vp9_filter) { 85 unsigned int i, j; 86 87 for (i = 0; i < output_height; i++) { 88 for (j = 0; j < output_width; j++) { 89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 90 (int)src_ptr[pixel_step] * vp9_filter[1], 91 FILTER_BITS); 92 93 src_ptr++; 94 } 95 96 // Next row... 97 src_ptr += src_pixels_per_line - output_width; 98 output_ptr += output_width; 99 } 100 } 101 102 /**************************************************************************** 103 * 104 * ROUTINE : filter_block2d_bil_second_pass 105 * 106 * INPUTS : int32_t *src_ptr : Pointer to source block. 107 * uint32_t src_pixels_per_line : Stride of input block. 108 * uint32_t pixel_step : Offset between filter input 109 * samples (see notes). 110 * uint32_t output_height : Input block height. 111 * uint32_t output_width : Input block width. 112 * int32_t *vp9_filter : Array of 2 bi-linear filter 113 * taps. 114 * 115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. 116 * 117 * RETURNS : void 118 * 119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 120 * either horizontal or vertical direction to produce the 121 * filtered output block. Used to implement second-pass 122 * of 2-D separable filter. 123 * 124 * SPECIAL NOTES : Requires 32-bit input as produced by 125 * filter_block2d_bil_first_pass. 126 * Two filter taps should sum to VP9_FILTER_WEIGHT. 127 * pixel_step defines whether the filter is applied 128 * horizontally (pixel_step=1) or vertically (pixel_step= 129 * stride). 130 * It defines the offset required to move from one input 131 * to the next. 132 * 133 ****************************************************************************/ 134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, 135 uint8_t *output_ptr, 136 unsigned int src_pixels_per_line, 137 unsigned int pixel_step, 138 unsigned int output_height, 139 unsigned int output_width, 140 const int16_t *vp9_filter) { 141 unsigned int i, j; 142 143 for (i = 0; i < output_height; i++) { 144 for (j = 0; j < output_width; j++) { 145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + 146 (int)src_ptr[pixel_step] * vp9_filter[1], 147 FILTER_BITS); 148 src_ptr++; 149 } 150 151 src_ptr += src_pixels_per_line - output_width; 152 output_ptr += output_width; 153 } 154 } 155 156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { 157 unsigned int i, sum = 0; 158 159 for (i = 0; i < 256; i++) { 160 sum += (src_ptr[i] * src_ptr[i]); 161 } 162 163 return sum; 164 } 165 166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, 167 int source_stride, 168 const uint8_t *ref_ptr, 169 int recon_stride, 170 unsigned int *sse) { 171 unsigned int var; 172 int avg; 173 174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); 175 *sse = var; 176 return (var - (((int64_t)avg * avg) >> 11)); 177 } 178 179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, 180 int src_pixels_per_line, 181 int xoffset, 182 int yoffset, 183 const uint8_t *dst_ptr, 184 int dst_pixels_per_line, 185 unsigned int *sse) { 186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 187 uint8_t temp2[68 * 64]; 188 const int16_t *hfilter, *vfilter; 189 190 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 191 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 192 193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 194 1, 33, 64, hfilter); 195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 196 197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 198 } 199 200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, 201 int src_pixels_per_line, 202 int xoffset, 203 int yoffset, 204 const uint8_t *dst_ptr, 205 int dst_pixels_per_line, 206 unsigned int *sse, 207 const uint8_t *second_pred) { 208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 209 uint8_t temp2[68 * 64]; 210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 211 const int16_t *hfilter, *vfilter; 212 213 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 214 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 215 216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 217 1, 33, 64, hfilter); 218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 219 vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); 220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 221 } 222 223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, 224 int source_stride, 225 const uint8_t *ref_ptr, 226 int recon_stride, 227 unsigned int *sse) { 228 unsigned int var; 229 int avg; 230 231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); 232 *sse = var; 233 return (var - (((int64_t)avg * avg) >> 11)); 234 } 235 236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, 237 int src_pixels_per_line, 238 int xoffset, 239 int yoffset, 240 const uint8_t *dst_ptr, 241 int dst_pixels_per_line, 242 unsigned int *sse) { 243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 244 uint8_t temp2[68 * 64]; 245 const int16_t *hfilter, *vfilter; 246 247 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 248 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 249 250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 251 1, 65, 32, hfilter); 252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 253 254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 255 } 256 257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, 258 int src_pixels_per_line, 259 int xoffset, 260 int yoffset, 261 const uint8_t *dst_ptr, 262 int dst_pixels_per_line, 263 unsigned int *sse, 264 const uint8_t *second_pred) { 265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 266 uint8_t temp2[68 * 64]; 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer 268 const int16_t *hfilter, *vfilter; 269 270 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 271 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 272 273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 274 1, 65, 32, hfilter); 275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 276 vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); 277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 278 } 279 280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, 281 int source_stride, 282 const uint8_t *ref_ptr, 283 int recon_stride, 284 unsigned int *sse) { 285 unsigned int var; 286 int avg; 287 288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); 289 *sse = var; 290 return (var - (((int64_t)avg * avg) >> 9)); 291 } 292 293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, 294 int src_pixels_per_line, 295 int xoffset, 296 int yoffset, 297 const uint8_t *dst_ptr, 298 int dst_pixels_per_line, 299 unsigned int *sse) { 300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 301 uint8_t temp2[36 * 32]; 302 const int16_t *hfilter, *vfilter; 303 304 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 305 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 306 307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 308 1, 17, 32, hfilter); 309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 310 311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 312 } 313 314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, 315 int src_pixels_per_line, 316 int xoffset, 317 int yoffset, 318 const uint8_t *dst_ptr, 319 int dst_pixels_per_line, 320 unsigned int *sse, 321 const uint8_t *second_pred) { 322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 323 uint8_t temp2[36 * 32]; 324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer 325 const int16_t *hfilter, *vfilter; 326 327 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 328 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 329 330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 331 1, 17, 32, hfilter); 332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 333 vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); 334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 335 } 336 337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, 338 int source_stride, 339 const uint8_t *ref_ptr, 340 int recon_stride, 341 unsigned int *sse) { 342 unsigned int var; 343 int avg; 344 345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); 346 *sse = var; 347 return (var - (((int64_t)avg * avg) >> 9)); 348 } 349 350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, 351 int src_pixels_per_line, 352 int xoffset, 353 int yoffset, 354 const uint8_t *dst_ptr, 355 int dst_pixels_per_line, 356 unsigned int *sse) { 357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 358 uint8_t temp2[36 * 32]; 359 const int16_t *hfilter, *vfilter; 360 361 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 362 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 363 364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 365 1, 33, 16, hfilter); 366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 367 368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 369 } 370 371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, 372 int src_pixels_per_line, 373 int xoffset, 374 int yoffset, 375 const uint8_t *dst_ptr, 376 int dst_pixels_per_line, 377 unsigned int *sse, 378 const uint8_t *second_pred) { 379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 380 uint8_t temp2[36 * 32]; 381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer 382 const int16_t *hfilter, *vfilter; 383 384 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 385 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 386 387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 388 1, 33, 16, hfilter); 389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 390 vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); 391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 392 } 393 394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, 395 int source_stride, 396 const uint8_t *ref_ptr, 397 int recon_stride, 398 unsigned int *sse) { 399 unsigned int var; 400 int avg; 401 402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); 403 *sse = var; 404 return (var - (((int64_t)avg * avg) >> 12)); 405 } 406 407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, 408 int source_stride, 409 const uint8_t *ref_ptr, 410 int recon_stride, 411 unsigned int *sse) { 412 unsigned int var; 413 int avg; 414 415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); 416 *sse = var; 417 return (var - (((int64_t)avg * avg) >> 10)); 418 } 419 420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, 421 int source_stride, 422 const uint8_t *ref_ptr, 423 int recon_stride, 424 unsigned int *sse) { 425 unsigned int var; 426 int avg; 427 428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 429 *sse = var; 430 return (var - (((unsigned int)avg * avg) >> 8)); 431 } 432 433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, 434 int source_stride, 435 const uint8_t *ref_ptr, 436 int recon_stride, 437 unsigned int *sse) { 438 unsigned int var; 439 int avg; 440 441 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 442 *sse = var; 443 return (var - (((unsigned int)avg * avg) >> 7)); 444 } 445 446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, 447 int source_stride, 448 const uint8_t *ref_ptr, 449 int recon_stride, 450 unsigned int *sse) { 451 unsigned int var; 452 int avg; 453 454 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 455 *sse = var; 456 return (var - (((unsigned int)avg * avg) >> 7)); 457 } 458 459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, 460 const uint8_t *ref_ptr, int ref_stride, 461 unsigned int *sse, int *sum) { 462 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); 463 } 464 465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, 466 int source_stride, 467 const uint8_t *ref_ptr, 468 int recon_stride, 469 unsigned int *sse) { 470 unsigned int var; 471 int avg; 472 473 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 474 *sse = var; 475 return (var - (((unsigned int)avg * avg) >> 6)); 476 } 477 478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, 479 int source_stride, 480 const uint8_t *ref_ptr, 481 int recon_stride, 482 unsigned int *sse) { 483 unsigned int var; 484 int avg; 485 486 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); 487 *sse = var; 488 return (var - (((unsigned int)avg * avg) >> 5)); 489 } 490 491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, 492 int source_stride, 493 const uint8_t *ref_ptr, 494 int recon_stride, 495 unsigned int *sse) { 496 unsigned int var; 497 int avg; 498 499 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); 500 *sse = var; 501 return (var - (((unsigned int)avg * avg) >> 5)); 502 } 503 504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, 505 int source_stride, 506 const uint8_t *ref_ptr, 507 int recon_stride, 508 unsigned int *sse) { 509 unsigned int var; 510 int avg; 511 512 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 513 *sse = var; 514 return (var - (((unsigned int)avg * avg) >> 4)); 515 } 516 517 518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, 519 int source_stride, 520 const uint8_t *ref_ptr, 521 int recon_stride, 522 unsigned int *sse) { 523 unsigned int var; 524 int avg; 525 526 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 527 *sse = var; 528 return var; 529 } 530 531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, 532 int source_stride, 533 const uint8_t *ref_ptr, 534 int recon_stride, 535 unsigned int *sse) { 536 unsigned int var; 537 int avg; 538 539 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 540 *sse = var; 541 return var; 542 } 543 544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, 545 int source_stride, 546 const uint8_t *ref_ptr, 547 int recon_stride, 548 unsigned int *sse) { 549 unsigned int var; 550 int avg; 551 552 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 553 *sse = var; 554 return var; 555 } 556 557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, 558 int source_stride, 559 const uint8_t *ref_ptr, 560 int recon_stride, 561 unsigned int *sse) { 562 unsigned int var; 563 int avg; 564 565 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 566 *sse = var; 567 return var; 568 } 569 570 571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, 572 int src_pixels_per_line, 573 int xoffset, 574 int yoffset, 575 const uint8_t *dst_ptr, 576 int dst_pixels_per_line, 577 unsigned int *sse) { 578 uint8_t temp2[20 * 16]; 579 const int16_t *hfilter, *vfilter; 580 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 581 582 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 583 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 584 585 // First filter 1d Horizontal 586 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 587 1, 5, 4, hfilter); 588 589 // Now filter Verticaly 590 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 591 592 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 593 } 594 595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, 596 int src_pixels_per_line, 597 int xoffset, 598 int yoffset, 599 const uint8_t *dst_ptr, 600 int dst_pixels_per_line, 601 unsigned int *sse, 602 const uint8_t *second_pred) { 603 uint8_t temp2[20 * 16]; 604 const int16_t *hfilter, *vfilter; 605 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer 606 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 607 608 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 609 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 610 611 // First filter 1d Horizontal 612 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 613 1, 5, 4, hfilter); 614 615 // Now filter Verticaly 616 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 617 vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); 618 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 619 } 620 621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, 622 int src_pixels_per_line, 623 int xoffset, 624 int yoffset, 625 const uint8_t *dst_ptr, 626 int dst_pixels_per_line, 627 unsigned int *sse) { 628 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 629 uint8_t temp2[20 * 16]; 630 const int16_t *hfilter, *vfilter; 631 632 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 633 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 634 635 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 636 1, 9, 8, hfilter); 637 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 638 639 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 640 } 641 642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, 643 int src_pixels_per_line, 644 int xoffset, 645 int yoffset, 646 const uint8_t *dst_ptr, 647 int dst_pixels_per_line, 648 unsigned int *sse, 649 const uint8_t *second_pred) { 650 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 651 uint8_t temp2[20 * 16]; 652 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer 653 const int16_t *hfilter, *vfilter; 654 655 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 656 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 657 658 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 659 1, 9, 8, hfilter); 660 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 661 vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); 662 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 663 } 664 665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, 666 int src_pixels_per_line, 667 int xoffset, 668 int yoffset, 669 const uint8_t *dst_ptr, 670 int dst_pixels_per_line, 671 unsigned int *sse) { 672 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering 673 uint8_t temp2[20 * 16]; 674 const int16_t *hfilter, *vfilter; 675 676 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 677 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 678 679 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 680 1, 17, 16, hfilter); 681 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 682 683 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 684 } 685 686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 687 int src_pixels_per_line, 688 int xoffset, 689 int yoffset, 690 const uint8_t *dst_ptr, 691 int dst_pixels_per_line, 692 unsigned int *sse, 693 const uint8_t *second_pred) { 694 uint16_t fdata3[17 * 16]; 695 uint8_t temp2[20 * 16]; 696 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer 697 const int16_t *hfilter, *vfilter; 698 699 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 700 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 701 702 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 703 1, 17, 16, hfilter); 704 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 705 706 vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); 707 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 708 } 709 710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, 711 int src_pixels_per_line, 712 int xoffset, 713 int yoffset, 714 const uint8_t *dst_ptr, 715 int dst_pixels_per_line, 716 unsigned int *sse) { 717 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 718 uint8_t temp2[68 * 64]; 719 const int16_t *hfilter, *vfilter; 720 721 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 722 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 723 724 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 725 1, 65, 64, hfilter); 726 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 727 728 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 729 } 730 731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, 732 int src_pixels_per_line, 733 int xoffset, 734 int yoffset, 735 const uint8_t *dst_ptr, 736 int dst_pixels_per_line, 737 unsigned int *sse, 738 const uint8_t *second_pred) { 739 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 740 uint8_t temp2[68 * 64]; 741 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 742 const int16_t *hfilter, *vfilter; 743 744 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 745 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 746 747 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 748 1, 65, 64, hfilter); 749 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 750 vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); 751 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 752 } 753 754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, 755 int src_pixels_per_line, 756 int xoffset, 757 int yoffset, 758 const uint8_t *dst_ptr, 759 int dst_pixels_per_line, 760 unsigned int *sse) { 761 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 762 uint8_t temp2[36 * 32]; 763 const int16_t *hfilter, *vfilter; 764 765 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 766 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 767 768 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 769 1, 33, 32, hfilter); 770 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 771 772 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 773 } 774 775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, 776 int src_pixels_per_line, 777 int xoffset, 778 int yoffset, 779 const uint8_t *dst_ptr, 780 int dst_pixels_per_line, 781 unsigned int *sse, 782 const uint8_t *second_pred) { 783 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 784 uint8_t temp2[36 * 32]; 785 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer 786 const int16_t *hfilter, *vfilter; 787 788 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 789 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 790 791 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 792 1, 33, 32, hfilter); 793 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 794 vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); 795 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 796 } 797 798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, 799 int source_stride, 800 const uint8_t *ref_ptr, 801 int recon_stride, 802 unsigned int *sse) { 803 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, 804 ref_ptr, recon_stride, sse); 805 } 806 807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, 808 int source_stride, 809 const uint8_t *ref_ptr, 810 int recon_stride, 811 unsigned int *sse) { 812 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, 813 ref_ptr, recon_stride, sse); 814 } 815 816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, 817 int source_stride, 818 const uint8_t *ref_ptr, 819 int recon_stride, 820 unsigned int *sse) { 821 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, 822 ref_ptr, recon_stride, sse); 823 } 824 825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, 826 int source_stride, 827 const uint8_t *ref_ptr, 828 int recon_stride, 829 unsigned int *sse) { 830 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, 831 ref_ptr, recon_stride, sse); 832 } 833 834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, 835 int source_stride, 836 const uint8_t *ref_ptr, 837 int recon_stride, 838 unsigned int *sse) { 839 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, 840 ref_ptr, recon_stride, sse); 841 } 842 843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, 844 int source_stride, 845 const uint8_t *ref_ptr, 846 int recon_stride, 847 unsigned int *sse) { 848 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, 849 ref_ptr, recon_stride, sse); 850 } 851 852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, 853 int source_stride, 854 const uint8_t *ref_ptr, 855 int recon_stride, 856 unsigned int *sse) { 857 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, 858 ref_ptr, recon_stride, sse); 859 } 860 861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, 862 int source_stride, 863 const uint8_t *ref_ptr, 864 int recon_stride, 865 unsigned int *sse) { 866 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, 867 ref_ptr, recon_stride, sse); 868 } 869 870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, 871 int source_stride, 872 const uint8_t *ref_ptr, 873 int recon_stride, 874 unsigned int *sse) { 875 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, 876 ref_ptr, recon_stride, sse); 877 } 878 879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, 880 int src_pixels_per_line, 881 int xoffset, 882 int yoffset, 883 const uint8_t *dst_ptr, 884 int dst_pixels_per_line, 885 unsigned int *sse) { 886 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, 887 xoffset, yoffset, dst_ptr, 888 dst_pixels_per_line, sse); 889 return *sse; 890 } 891 892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, 893 int src_pixels_per_line, 894 int xoffset, 895 int yoffset, 896 const uint8_t *dst_ptr, 897 int dst_pixels_per_line, 898 unsigned int *sse) { 899 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, 900 xoffset, yoffset, dst_ptr, 901 dst_pixels_per_line, sse); 902 return *sse; 903 } 904 905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, 906 int src_pixels_per_line, 907 int xoffset, 908 int yoffset, 909 const uint8_t *dst_ptr, 910 int dst_pixels_per_line, 911 unsigned int *sse) { 912 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, 913 xoffset, yoffset, dst_ptr, 914 dst_pixels_per_line, sse); 915 return *sse; 916 } 917 918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, 919 int src_pixels_per_line, 920 int xoffset, 921 int yoffset, 922 const uint8_t *dst_ptr, 923 int dst_pixels_per_line, 924 unsigned int *sse) { 925 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 926 uint8_t temp2[20 * 16]; 927 const int16_t *hfilter, *vfilter; 928 929 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 930 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 931 932 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 933 1, 9, 16, hfilter); 934 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 935 936 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 937 } 938 939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, 940 int src_pixels_per_line, 941 int xoffset, 942 int yoffset, 943 const uint8_t *dst_ptr, 944 int dst_pixels_per_line, 945 unsigned int *sse, 946 const uint8_t *second_pred) { 947 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 948 uint8_t temp2[20 * 16]; 949 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer 950 const int16_t *hfilter, *vfilter; 951 952 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 953 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 954 955 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 956 1, 9, 16, hfilter); 957 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 958 vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); 959 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 960 } 961 962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, 963 int src_pixels_per_line, 964 int xoffset, 965 int yoffset, 966 const uint8_t *dst_ptr, 967 int dst_pixels_per_line, 968 unsigned int *sse) { 969 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 970 uint8_t temp2[20 * 16]; 971 const int16_t *hfilter, *vfilter; 972 973 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 974 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 975 976 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 977 1, 17, 8, hfilter); 978 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 979 980 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 981 } 982 983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, 984 int src_pixels_per_line, 985 int xoffset, 986 int yoffset, 987 const uint8_t *dst_ptr, 988 int dst_pixels_per_line, 989 unsigned int *sse, 990 const uint8_t *second_pred) { 991 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 992 uint8_t temp2[20 * 16]; 993 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer 994 const int16_t *hfilter, *vfilter; 995 996 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 997 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 998 999 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1000 1, 17, 8, hfilter); 1001 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 1002 vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); 1003 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1004 } 1005 1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, 1007 int src_pixels_per_line, 1008 int xoffset, 1009 int yoffset, 1010 const uint8_t *dst_ptr, 1011 int dst_pixels_per_line, 1012 unsigned int *sse) { 1013 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1014 uint8_t temp2[20 * 16]; 1015 const int16_t *hfilter, *vfilter; 1016 1017 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1018 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1019 1020 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1021 1, 5, 8, hfilter); 1022 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1023 1024 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 1025 } 1026 1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, 1028 int src_pixels_per_line, 1029 int xoffset, 1030 int yoffset, 1031 const uint8_t *dst_ptr, 1032 int dst_pixels_per_line, 1033 unsigned int *sse, 1034 const uint8_t *second_pred) { 1035 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 1036 uint8_t temp2[20 * 16]; 1037 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer 1038 const int16_t *hfilter, *vfilter; 1039 1040 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1041 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1042 1043 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1044 1, 5, 8, hfilter); 1045 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 1046 vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); 1047 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 1048 } 1049 1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, 1051 int src_pixels_per_line, 1052 int xoffset, 1053 int yoffset, 1054 const uint8_t *dst_ptr, 1055 int dst_pixels_per_line, 1056 unsigned int *sse) { 1057 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1058 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be 1059 // of this big? same issue appears in all other block size settings. 1060 uint8_t temp2[20 * 16]; 1061 const int16_t *hfilter, *vfilter; 1062 1063 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1064 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1065 1066 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1067 1, 9, 4, hfilter); 1068 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1069 1070 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 1071 } 1072 1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, 1074 int src_pixels_per_line, 1075 int xoffset, 1076 int yoffset, 1077 const uint8_t *dst_ptr, 1078 int dst_pixels_per_line, 1079 unsigned int *sse, 1080 const uint8_t *second_pred) { 1081 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 1082 uint8_t temp2[20 * 16]; 1083 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer 1084 const int16_t *hfilter, *vfilter; 1085 1086 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 1087 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 1088 1089 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1090 1, 9, 4, hfilter); 1091 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 1092 vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); 1093 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 1094 } 1095 1096 1097 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, 1098 int height, const uint8_t *ref, int ref_stride) { 1099 int i, j; 1100 1101 for (i = 0; i < height; i++) { 1102 for (j = 0; j < width; j++) { 1103 int tmp; 1104 tmp = pred[j] + ref[j]; 1105 comp_pred[j] = (tmp + 1) >> 1; 1106 } 1107 comp_pred += width; 1108 pred += width; 1109 ref += ref_stride; 1110 } 1111 } 1112