1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "vp9/encoder/vp9_variance.h" 13 #include "vp9/common/vp9_filter.h" 14 #include "vp9/common/vp9_subpelvar.h" 15 #include "vpx/vpx_integer.h" 16 #include "vpx_ports/mem.h" 17 #include "./vp9_rtcd.h" 18 19 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { 20 unsigned int i, sum = 0; 21 22 for (i = 0; i < 256; i++) { 23 sum += (src_ptr[i] * src_ptr[i]); 24 } 25 26 return sum; 27 } 28 29 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, 30 int source_stride, 31 const uint8_t *ref_ptr, 32 int recon_stride, 33 unsigned int *sse) { 34 unsigned int var; 35 int avg; 36 37 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); 38 *sse = var; 39 return (var - (((int64_t)avg * avg) >> 11)); 40 } 41 42 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, 43 int src_pixels_per_line, 44 int xoffset, 45 int yoffset, 46 const uint8_t *dst_ptr, 47 int dst_pixels_per_line, 48 unsigned int *sse) { 49 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 50 uint8_t temp2[68 * 64]; 51 const int16_t *hfilter, *vfilter; 52 53 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 54 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 55 56 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 57 1, 33, 64, hfilter); 58 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 59 60 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 61 } 62 63 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, 64 int src_pixels_per_line, 65 int xoffset, 66 int yoffset, 67 const uint8_t *dst_ptr, 68 int dst_pixels_per_line, 69 unsigned int *sse, 70 const uint8_t *second_pred) { 71 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 72 uint8_t temp2[68 * 64]; 73 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 74 const int16_t *hfilter, *vfilter; 75 76 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 77 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 78 79 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 80 1, 33, 64, hfilter); 81 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); 82 comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); 83 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 84 } 85 86 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, 87 int source_stride, 88 const uint8_t *ref_ptr, 89 int recon_stride, 90 unsigned int *sse) { 91 unsigned int var; 92 int avg; 93 94 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); 95 *sse = var; 96 return (var - (((int64_t)avg * avg) >> 11)); 97 } 98 99 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, 100 int src_pixels_per_line, 101 int xoffset, 102 int yoffset, 103 const uint8_t *dst_ptr, 104 int dst_pixels_per_line, 105 unsigned int *sse) { 106 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 107 uint8_t temp2[68 * 64]; 108 const int16_t *hfilter, *vfilter; 109 110 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 111 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 112 113 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 114 1, 65, 32, hfilter); 115 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 116 117 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 118 } 119 120 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, 121 int src_pixels_per_line, 122 int xoffset, 123 int yoffset, 124 const uint8_t *dst_ptr, 125 int dst_pixels_per_line, 126 unsigned int *sse, 127 const uint8_t *second_pred) { 128 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 129 uint8_t temp2[68 * 64]; 130 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer 131 const int16_t *hfilter, *vfilter; 132 133 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 134 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 135 136 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 137 1, 65, 32, hfilter); 138 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); 139 comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); 140 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 141 } 142 143 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, 144 int source_stride, 145 const uint8_t *ref_ptr, 146 int recon_stride, 147 unsigned int *sse) { 148 unsigned int var; 149 int avg; 150 151 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); 152 *sse = var; 153 return (var - (((int64_t)avg * avg) >> 9)); 154 } 155 156 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, 157 int src_pixels_per_line, 158 int xoffset, 159 int yoffset, 160 const uint8_t *dst_ptr, 161 int dst_pixels_per_line, 162 unsigned int *sse) { 163 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 164 uint8_t temp2[36 * 32]; 165 const int16_t *hfilter, *vfilter; 166 167 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 168 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 169 170 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 171 1, 17, 32, hfilter); 172 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 173 174 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 175 } 176 177 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, 178 int src_pixels_per_line, 179 int xoffset, 180 int yoffset, 181 const uint8_t *dst_ptr, 182 int dst_pixels_per_line, 183 unsigned int *sse, 184 const uint8_t *second_pred) { 185 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 186 uint8_t temp2[36 * 32]; 187 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer 188 const int16_t *hfilter, *vfilter; 189 190 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 191 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 192 193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 194 1, 17, 32, hfilter); 195 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); 196 comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); 197 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 198 } 199 200 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, 201 int source_stride, 202 const uint8_t *ref_ptr, 203 int recon_stride, 204 unsigned int *sse) { 205 unsigned int var; 206 int avg; 207 208 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); 209 *sse = var; 210 return (var - (((int64_t)avg * avg) >> 9)); 211 } 212 213 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, 214 int src_pixels_per_line, 215 int xoffset, 216 int yoffset, 217 const uint8_t *dst_ptr, 218 int dst_pixels_per_line, 219 unsigned int *sse) { 220 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 221 uint8_t temp2[36 * 32]; 222 const int16_t *hfilter, *vfilter; 223 224 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 225 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 226 227 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 228 1, 33, 16, hfilter); 229 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 230 231 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 232 } 233 234 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, 235 int src_pixels_per_line, 236 int xoffset, 237 int yoffset, 238 const uint8_t *dst_ptr, 239 int dst_pixels_per_line, 240 unsigned int *sse, 241 const uint8_t *second_pred) { 242 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 243 uint8_t temp2[36 * 32]; 244 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer 245 const int16_t *hfilter, *vfilter; 246 247 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 248 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 249 250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 251 1, 33, 16, hfilter); 252 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); 253 comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); 254 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 255 } 256 257 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, 258 int source_stride, 259 const uint8_t *ref_ptr, 260 int recon_stride, 261 unsigned int *sse) { 262 unsigned int var; 263 int avg; 264 265 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); 266 *sse = var; 267 return (var - (((int64_t)avg * avg) >> 12)); 268 } 269 270 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, 271 int source_stride, 272 const uint8_t *ref_ptr, 273 int recon_stride, 274 unsigned int *sse) { 275 unsigned int var; 276 int avg; 277 278 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); 279 *sse = var; 280 return (var - (((int64_t)avg * avg) >> 10)); 281 } 282 283 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, 284 int source_stride, 285 const uint8_t *ref_ptr, 286 int recon_stride, 287 unsigned int *sse) { 288 unsigned int var; 289 int avg; 290 291 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 292 *sse = var; 293 return (var - (((unsigned int)avg * avg) >> 8)); 294 } 295 296 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, 297 int source_stride, 298 const uint8_t *ref_ptr, 299 int recon_stride, 300 unsigned int *sse) { 301 unsigned int var; 302 int avg; 303 304 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 305 *sse = var; 306 return (var - (((unsigned int)avg * avg) >> 7)); 307 } 308 309 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, 310 int source_stride, 311 const uint8_t *ref_ptr, 312 int recon_stride, 313 unsigned int *sse) { 314 unsigned int var; 315 int avg; 316 317 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 318 *sse = var; 319 return (var - (((unsigned int)avg * avg) >> 7)); 320 } 321 322 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, 323 const uint8_t *ref_ptr, int ref_stride, 324 unsigned int *sse, int *sum) { 325 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); 326 } 327 328 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, 329 int source_stride, 330 const uint8_t *ref_ptr, 331 int recon_stride, 332 unsigned int *sse) { 333 unsigned int var; 334 int avg; 335 336 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 337 *sse = var; 338 return (var - (((unsigned int)avg * avg) >> 6)); 339 } 340 341 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, 342 int source_stride, 343 const uint8_t *ref_ptr, 344 int recon_stride, 345 unsigned int *sse) { 346 unsigned int var; 347 int avg; 348 349 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); 350 *sse = var; 351 return (var - (((unsigned int)avg * avg) >> 5)); 352 } 353 354 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, 355 int source_stride, 356 const uint8_t *ref_ptr, 357 int recon_stride, 358 unsigned int *sse) { 359 unsigned int var; 360 int avg; 361 362 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); 363 *sse = var; 364 return (var - (((unsigned int)avg * avg) >> 5)); 365 } 366 367 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, 368 int source_stride, 369 const uint8_t *ref_ptr, 370 int recon_stride, 371 unsigned int *sse) { 372 unsigned int var; 373 int avg; 374 375 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 376 *sse = var; 377 return (var - (((unsigned int)avg * avg) >> 4)); 378 } 379 380 381 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, 382 int source_stride, 383 const uint8_t *ref_ptr, 384 int recon_stride, 385 unsigned int *sse) { 386 unsigned int var; 387 int avg; 388 389 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 390 *sse = var; 391 return var; 392 } 393 394 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, 395 int source_stride, 396 const uint8_t *ref_ptr, 397 int recon_stride, 398 unsigned int *sse) { 399 unsigned int var; 400 int avg; 401 402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 403 *sse = var; 404 return var; 405 } 406 407 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, 408 int source_stride, 409 const uint8_t *ref_ptr, 410 int recon_stride, 411 unsigned int *sse) { 412 unsigned int var; 413 int avg; 414 415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 416 *sse = var; 417 return var; 418 } 419 420 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, 421 int source_stride, 422 const uint8_t *ref_ptr, 423 int recon_stride, 424 unsigned int *sse) { 425 unsigned int var; 426 int avg; 427 428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 429 *sse = var; 430 return var; 431 } 432 433 434 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, 435 int src_pixels_per_line, 436 int xoffset, 437 int yoffset, 438 const uint8_t *dst_ptr, 439 int dst_pixels_per_line, 440 unsigned int *sse) { 441 uint8_t temp2[20 * 16]; 442 const int16_t *hfilter, *vfilter; 443 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 444 445 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 446 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 447 448 // First filter 1d Horizontal 449 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 450 1, 5, 4, hfilter); 451 452 // Now filter Verticaly 453 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 454 455 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 456 } 457 458 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, 459 int src_pixels_per_line, 460 int xoffset, 461 int yoffset, 462 const uint8_t *dst_ptr, 463 int dst_pixels_per_line, 464 unsigned int *sse, 465 const uint8_t *second_pred) { 466 uint8_t temp2[20 * 16]; 467 const int16_t *hfilter, *vfilter; 468 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer 469 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering 470 471 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 472 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 473 474 // First filter 1d Horizontal 475 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 476 1, 5, 4, hfilter); 477 478 // Now filter Verticaly 479 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); 480 comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); 481 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 482 } 483 484 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, 485 int src_pixels_per_line, 486 int xoffset, 487 int yoffset, 488 const uint8_t *dst_ptr, 489 int dst_pixels_per_line, 490 unsigned int *sse) { 491 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 492 uint8_t temp2[20 * 16]; 493 const int16_t *hfilter, *vfilter; 494 495 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 496 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 497 498 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 499 1, 9, 8, hfilter); 500 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 501 502 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 503 } 504 505 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, 506 int src_pixels_per_line, 507 int xoffset, 508 int yoffset, 509 const uint8_t *dst_ptr, 510 int dst_pixels_per_line, 511 unsigned int *sse, 512 const uint8_t *second_pred) { 513 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering 514 uint8_t temp2[20 * 16]; 515 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer 516 const int16_t *hfilter, *vfilter; 517 518 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 519 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 520 521 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 522 1, 9, 8, hfilter); 523 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); 524 comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); 525 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 526 } 527 528 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, 529 int src_pixels_per_line, 530 int xoffset, 531 int yoffset, 532 const uint8_t *dst_ptr, 533 int dst_pixels_per_line, 534 unsigned int *sse) { 535 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering 536 uint8_t temp2[20 * 16]; 537 const int16_t *hfilter, *vfilter; 538 539 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 540 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 541 542 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 543 1, 17, 16, hfilter); 544 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 545 546 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 547 } 548 549 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 550 int src_pixels_per_line, 551 int xoffset, 552 int yoffset, 553 const uint8_t *dst_ptr, 554 int dst_pixels_per_line, 555 unsigned int *sse, 556 const uint8_t *second_pred) { 557 uint16_t fdata3[17 * 16]; 558 uint8_t temp2[20 * 16]; 559 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer 560 const int16_t *hfilter, *vfilter; 561 562 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 563 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 564 565 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 566 1, 17, 16, hfilter); 567 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); 568 569 comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); 570 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 571 } 572 573 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, 574 int src_pixels_per_line, 575 int xoffset, 576 int yoffset, 577 const uint8_t *dst_ptr, 578 int dst_pixels_per_line, 579 unsigned int *sse) { 580 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 581 uint8_t temp2[68 * 64]; 582 const int16_t *hfilter, *vfilter; 583 584 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 585 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 586 587 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 588 1, 65, 64, hfilter); 589 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 590 591 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); 592 } 593 594 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, 595 int src_pixels_per_line, 596 int xoffset, 597 int yoffset, 598 const uint8_t *dst_ptr, 599 int dst_pixels_per_line, 600 unsigned int *sse, 601 const uint8_t *second_pred) { 602 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering 603 uint8_t temp2[68 * 64]; 604 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer 605 const int16_t *hfilter, *vfilter; 606 607 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 608 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 609 610 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 611 1, 65, 64, hfilter); 612 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); 613 comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); 614 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); 615 } 616 617 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, 618 int src_pixels_per_line, 619 int xoffset, 620 int yoffset, 621 const uint8_t *dst_ptr, 622 int dst_pixels_per_line, 623 unsigned int *sse) { 624 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 625 uint8_t temp2[36 * 32]; 626 const int16_t *hfilter, *vfilter; 627 628 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 629 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 630 631 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 632 1, 33, 32, hfilter); 633 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 634 635 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); 636 } 637 638 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, 639 int src_pixels_per_line, 640 int xoffset, 641 int yoffset, 642 const uint8_t *dst_ptr, 643 int dst_pixels_per_line, 644 unsigned int *sse, 645 const uint8_t *second_pred) { 646 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering 647 uint8_t temp2[36 * 32]; 648 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer 649 const int16_t *hfilter, *vfilter; 650 651 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 652 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 653 654 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 655 1, 33, 32, hfilter); 656 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); 657 comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); 658 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); 659 } 660 661 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, 662 int source_stride, 663 const uint8_t *ref_ptr, 664 int recon_stride, 665 unsigned int *sse) { 666 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, 667 ref_ptr, recon_stride, sse); 668 } 669 670 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, 671 int source_stride, 672 const uint8_t *ref_ptr, 673 int recon_stride, 674 unsigned int *sse) { 675 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, 676 ref_ptr, recon_stride, sse); 677 } 678 679 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, 680 int source_stride, 681 const uint8_t *ref_ptr, 682 int recon_stride, 683 unsigned int *sse) { 684 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, 685 ref_ptr, recon_stride, sse); 686 } 687 688 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, 689 int source_stride, 690 const uint8_t *ref_ptr, 691 int recon_stride, 692 unsigned int *sse) { 693 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, 694 ref_ptr, recon_stride, sse); 695 } 696 697 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, 698 int source_stride, 699 const uint8_t *ref_ptr, 700 int recon_stride, 701 unsigned int *sse) { 702 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, 703 ref_ptr, recon_stride, sse); 704 } 705 706 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, 707 int source_stride, 708 const uint8_t *ref_ptr, 709 int recon_stride, 710 unsigned int *sse) { 711 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, 712 ref_ptr, recon_stride, sse); 713 } 714 715 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, 716 int source_stride, 717 const uint8_t *ref_ptr, 718 int recon_stride, 719 unsigned int *sse) { 720 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, 721 ref_ptr, recon_stride, sse); 722 } 723 724 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, 725 int source_stride, 726 const uint8_t *ref_ptr, 727 int recon_stride, 728 unsigned int *sse) { 729 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, 730 ref_ptr, recon_stride, sse); 731 } 732 733 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, 734 int source_stride, 735 const uint8_t *ref_ptr, 736 int recon_stride, 737 unsigned int *sse) { 738 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, 739 ref_ptr, recon_stride, sse); 740 } 741 742 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, 743 int src_pixels_per_line, 744 int xoffset, 745 int yoffset, 746 const uint8_t *dst_ptr, 747 int dst_pixels_per_line, 748 unsigned int *sse) { 749 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, 750 xoffset, yoffset, dst_ptr, 751 dst_pixels_per_line, sse); 752 return *sse; 753 } 754 755 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, 756 int src_pixels_per_line, 757 int xoffset, 758 int yoffset, 759 const uint8_t *dst_ptr, 760 int dst_pixels_per_line, 761 unsigned int *sse) { 762 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, 763 xoffset, yoffset, dst_ptr, 764 dst_pixels_per_line, sse); 765 return *sse; 766 } 767 768 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, 769 int src_pixels_per_line, 770 int xoffset, 771 int yoffset, 772 const uint8_t *dst_ptr, 773 int dst_pixels_per_line, 774 unsigned int *sse) { 775 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, 776 xoffset, yoffset, dst_ptr, 777 dst_pixels_per_line, sse); 778 return *sse; 779 } 780 781 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, 782 int src_pixels_per_line, 783 int xoffset, 784 int yoffset, 785 const uint8_t *dst_ptr, 786 int dst_pixels_per_line, 787 unsigned int *sse) { 788 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 789 uint8_t temp2[20 * 16]; 790 const int16_t *hfilter, *vfilter; 791 792 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 793 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 794 795 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 796 1, 9, 16, hfilter); 797 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 798 799 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 800 } 801 802 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, 803 int src_pixels_per_line, 804 int xoffset, 805 int yoffset, 806 const uint8_t *dst_ptr, 807 int dst_pixels_per_line, 808 unsigned int *sse, 809 const uint8_t *second_pred) { 810 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering 811 uint8_t temp2[20 * 16]; 812 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer 813 const int16_t *hfilter, *vfilter; 814 815 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 816 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 817 818 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 819 1, 9, 16, hfilter); 820 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); 821 comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); 822 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); 823 } 824 825 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, 826 int src_pixels_per_line, 827 int xoffset, 828 int yoffset, 829 const uint8_t *dst_ptr, 830 int dst_pixels_per_line, 831 unsigned int *sse) { 832 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 833 uint8_t temp2[20 * 16]; 834 const int16_t *hfilter, *vfilter; 835 836 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 837 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 838 839 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 840 1, 17, 8, hfilter); 841 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 842 843 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 844 } 845 846 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, 847 int src_pixels_per_line, 848 int xoffset, 849 int yoffset, 850 const uint8_t *dst_ptr, 851 int dst_pixels_per_line, 852 unsigned int *sse, 853 const uint8_t *second_pred) { 854 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering 855 uint8_t temp2[20 * 16]; 856 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer 857 const int16_t *hfilter, *vfilter; 858 859 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 860 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 861 862 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 863 1, 17, 8, hfilter); 864 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); 865 comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); 866 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 867 } 868 869 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, 870 int src_pixels_per_line, 871 int xoffset, 872 int yoffset, 873 const uint8_t *dst_ptr, 874 int dst_pixels_per_line, 875 unsigned int *sse) { 876 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 877 uint8_t temp2[20 * 16]; 878 const int16_t *hfilter, *vfilter; 879 880 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 881 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 882 883 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 884 1, 5, 8, hfilter); 885 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 886 887 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 888 } 889 890 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, 891 int src_pixels_per_line, 892 int xoffset, 893 int yoffset, 894 const uint8_t *dst_ptr, 895 int dst_pixels_per_line, 896 unsigned int *sse, 897 const uint8_t *second_pred) { 898 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering 899 uint8_t temp2[20 * 16]; 900 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer 901 const int16_t *hfilter, *vfilter; 902 903 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 904 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 905 906 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 907 1, 5, 8, hfilter); 908 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); 909 comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); 910 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); 911 } 912 913 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, 914 int src_pixels_per_line, 915 int xoffset, 916 int yoffset, 917 const uint8_t *dst_ptr, 918 int dst_pixels_per_line, 919 unsigned int *sse) { 920 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 921 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be 922 // of this big? same issue appears in all other block size settings. 923 uint8_t temp2[20 * 16]; 924 const int16_t *hfilter, *vfilter; 925 926 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 927 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 928 929 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 930 1, 9, 4, hfilter); 931 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 932 933 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 934 } 935 936 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, 937 int src_pixels_per_line, 938 int xoffset, 939 int yoffset, 940 const uint8_t *dst_ptr, 941 int dst_pixels_per_line, 942 unsigned int *sse, 943 const uint8_t *second_pred) { 944 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering 945 uint8_t temp2[20 * 16]; 946 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer 947 const int16_t *hfilter, *vfilter; 948 949 hfilter = BILINEAR_FILTERS_2TAP(xoffset); 950 vfilter = BILINEAR_FILTERS_2TAP(yoffset); 951 952 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 953 1, 9, 4, hfilter); 954 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); 955 comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); 956 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); 957 } 958