1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "variance.h" 13 14 const int vp8_six_tap[8][6] = 15 { 16 { 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic 17 { 0, -6, 123, 12, -1, 0 }, 18 { 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter 19 { 0, -9, 93, 50, -6, 0 }, 20 { 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter 21 { 0, -6, 50, 93, -9, 0 }, 22 { 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter 23 { 0, -1, 12, 123, -6, 0 } 24 }; 25 26 27 const int VP8_FILTER_WEIGHT = 128; 28 const int VP8_FILTER_SHIFT = 7; 29 const int vp8_bilinear_taps[8][2] = 30 { 31 { 128, 0 }, 32 { 112, 16 }, 33 { 96, 32 }, 34 { 80, 48 }, 35 { 64, 64 }, 36 { 48, 80 }, 37 { 32, 96 }, 38 { 16, 112 } 39 }; 40 41 unsigned int vp8_get_mb_ss_c 42 ( 43 const short *src_ptr 44 ) 45 { 46 unsigned int i = 0, sum = 0; 47 48 do 49 { 50 sum += (src_ptr[i] * src_ptr[i]); 51 i++; 52 } 53 while (i < 256); 54 55 return sum; 56 } 57 58 59 void vp8_variance( 60 const unsigned char *src_ptr, 61 int source_stride, 62 const unsigned char *ref_ptr, 63 int recon_stride, 64 int w, 65 int h, 66 unsigned int *sse, 67 int *sum) 68 { 69 int i, j; 70 int diff; 71 72 *sum = 0; 73 *sse = 0; 74 75 for (i = 0; i < h; i++) 76 { 77 for (j = 0; j < w; j++) 78 { 79 diff = src_ptr[j] - ref_ptr[j]; 80 *sum += diff; 81 *sse += diff * diff; 82 } 83 84 src_ptr += source_stride; 85 ref_ptr += recon_stride; 86 } 87 } 88 89 unsigned int 90 vp8_get8x8var_c 91 ( 92 const unsigned char *src_ptr, 93 int source_stride, 94 const unsigned char *ref_ptr, 95 int recon_stride, 96 unsigned int *SSE, 97 int *Sum 98 ) 99 { 100 101 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum); 102 return (*SSE - (((*Sum) * (*Sum)) >> 6)); 103 } 104 105 unsigned int 106 vp8_get16x16var_c 107 ( 108 const unsigned char *src_ptr, 109 int source_stride, 110 const unsigned char *ref_ptr, 111 int recon_stride, 112 unsigned int *SSE, 113 int *Sum 114 ) 115 { 116 117 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum); 118 return (*SSE - (((*Sum) * (*Sum)) >> 8)); 119 120 } 121 122 123 124 unsigned int vp8_variance16x16_c( 125 const unsigned char *src_ptr, 126 int source_stride, 127 const unsigned char *ref_ptr, 128 int recon_stride, 129 unsigned int *sse) 130 { 131 unsigned int var; 132 int avg; 133 134 135 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 136 *sse = var; 137 return (var - ((avg * avg) >> 8)); 138 } 139 140 unsigned int vp8_variance8x16_c( 141 const unsigned char *src_ptr, 142 int source_stride, 143 const unsigned char *ref_ptr, 144 int recon_stride, 145 unsigned int *sse) 146 { 147 unsigned int var; 148 int avg; 149 150 151 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 152 *sse = var; 153 return (var - ((avg * avg) >> 7)); 154 } 155 156 unsigned int vp8_variance16x8_c( 157 const unsigned char *src_ptr, 158 int source_stride, 159 const unsigned char *ref_ptr, 160 int recon_stride, 161 unsigned int *sse) 162 { 163 unsigned int var; 164 int avg; 165 166 167 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 168 *sse = var; 169 return (var - ((avg * avg) >> 7)); 170 } 171 172 173 unsigned int vp8_variance8x8_c( 174 const unsigned char *src_ptr, 175 int source_stride, 176 const unsigned char *ref_ptr, 177 int recon_stride, 178 unsigned int *sse) 179 { 180 unsigned int var; 181 int avg; 182 183 184 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 185 *sse = var; 186 return (var - ((avg * avg) >> 6)); 187 } 188 189 unsigned int vp8_variance4x4_c( 190 const unsigned char *src_ptr, 191 int source_stride, 192 const unsigned char *ref_ptr, 193 int recon_stride, 194 unsigned int *sse) 195 { 196 unsigned int var; 197 int avg; 198 199 200 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 201 *sse = var; 202 return (var - ((avg * avg) >> 4)); 203 } 204 205 206 unsigned int vp8_mse16x16_c( 207 const unsigned char *src_ptr, 208 int source_stride, 209 const unsigned char *ref_ptr, 210 int recon_stride, 211 unsigned int *sse) 212 { 213 unsigned int var; 214 int avg; 215 216 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 217 *sse = var; 218 return var; 219 } 220 221 222 /**************************************************************************** 223 * 224 * ROUTINE : filter_block2d_bil_first_pass 225 * 226 * INPUTS : UINT8 *src_ptr : Pointer to source block. 227 * UINT32 src_pixels_per_line : Stride of input block. 228 * UINT32 pixel_step : Offset between filter input samples (see notes). 229 * UINT32 output_height : Input block height. 230 * UINT32 output_width : Input block width. 231 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 232 * 233 * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. 234 * 235 * RETURNS : void 236 * 237 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 238 * either horizontal or vertical direction to produce the 239 * filtered output block. Used to implement first-pass 240 * of 2-D separable filter. 241 * 242 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 243 * Two filter taps should sum to VP8_FILTER_WEIGHT. 244 * pixel_step defines whether the filter is applied 245 * horizontally (pixel_step=1) or vertically (pixel_step=stride). 246 * It defines the offset required to move from one input 247 * to the next. 248 * 249 ****************************************************************************/ 250 void vp8e_filter_block2d_bil_first_pass 251 ( 252 const unsigned char *src_ptr, 253 unsigned short *output_ptr, 254 unsigned int src_pixels_per_line, 255 int pixel_step, 256 unsigned int output_height, 257 unsigned int output_width, 258 const int *vp8_filter 259 ) 260 { 261 unsigned int i, j; 262 263 for (i = 0; i < output_height; i++) 264 { 265 for (j = 0; j < output_width; j++) 266 { 267 // Apply bilinear filter 268 output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 269 ((int)src_ptr[pixel_step] * vp8_filter[1]) + 270 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 271 src_ptr++; 272 } 273 274 // Next row... 275 src_ptr += src_pixels_per_line - output_width; 276 output_ptr += output_width; 277 } 278 } 279 280 /**************************************************************************** 281 * 282 * ROUTINE : filter_block2d_bil_second_pass 283 * 284 * INPUTS : INT32 *src_ptr : Pointer to source block. 285 * UINT32 src_pixels_per_line : Stride of input block. 286 * UINT32 pixel_step : Offset between filter input samples (see notes). 287 * UINT32 output_height : Input block height. 288 * UINT32 output_width : Input block width. 289 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 290 * 291 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. 292 * 293 * RETURNS : void 294 * 295 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 296 * either horizontal or vertical direction to produce the 297 * filtered output block. Used to implement second-pass 298 * of 2-D separable filter. 299 * 300 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 301 * Two filter taps should sum to VP8_FILTER_WEIGHT. 302 * pixel_step defines whether the filter is applied 303 * horizontally (pixel_step=1) or vertically (pixel_step=stride). 304 * It defines the offset required to move from one input 305 * to the next. 306 * 307 ****************************************************************************/ 308 void vp8e_filter_block2d_bil_second_pass 309 ( 310 const unsigned short *src_ptr, 311 unsigned char *output_ptr, 312 unsigned int src_pixels_per_line, 313 unsigned int pixel_step, 314 unsigned int output_height, 315 unsigned int output_width, 316 const int *vp8_filter 317 ) 318 { 319 unsigned int i, j; 320 int Temp; 321 322 for (i = 0; i < output_height; i++) 323 { 324 for (j = 0; j < output_width; j++) 325 { 326 // Apply filter 327 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 328 ((int)src_ptr[pixel_step] * vp8_filter[1]) + 329 (VP8_FILTER_WEIGHT / 2); 330 output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 331 src_ptr++; 332 } 333 334 // Next row... 335 src_ptr += src_pixels_per_line - output_width; 336 output_ptr += output_width; 337 } 338 } 339 340 341 /**************************************************************************** 342 * 343 * ROUTINE : filter_block2d_bil 344 * 345 * INPUTS : UINT8 *src_ptr : Pointer to source block. 346 * UINT32 src_pixels_per_line : Stride of input block. 347 * INT32 *HFilter : Array of 2 horizontal filter taps. 348 * INT32 *VFilter : Array of 2 vertical filter taps. 349 * 350 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. 351 * 352 * RETURNS : void 353 * 354 * FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap 355 * bi-linear filter horizontally followed by a 2-tap 356 * bi-linear filter vertically on the result. 357 * 358 * SPECIAL NOTES : The intermediate horizontally filtered block must produce 359 * 1 more point than the input block in each column. This 360 * is to ensure that the 2-tap filter has one extra data-point 361 * at the top of each column so filter taps do not extend 362 * beyond data. Thus the output of the first stage filter 363 * is an 8x9 (hx_v) block. 364 * 365 ****************************************************************************/ 366 void vp8e_filter_block2d_bil 367 ( 368 const unsigned char *src_ptr, 369 unsigned char *output_ptr, 370 unsigned int src_pixels_per_line, 371 int *HFilter, 372 int *VFilter 373 ) 374 { 375 376 unsigned short FData[20*16]; // Temp data bufffer used in filtering 377 378 // First filter 1-D horizontally... 379 vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter); 380 381 // then 1-D vertically... 382 vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter); 383 } 384 385 386 387 unsigned int vp8_sub_pixel_variance4x4_c 388 ( 389 const unsigned char *src_ptr, 390 int src_pixels_per_line, 391 int xoffset, 392 int yoffset, 393 const unsigned char *dst_ptr, 394 int dst_pixels_per_line, 395 unsigned int *sse 396 ) 397 { 398 unsigned char temp2[20*16]; 399 const int *HFilter, *VFilter; 400 unsigned short FData3[5*4]; // Temp data bufffer used in filtering 401 402 HFilter = vp8_bilinear_taps[xoffset]; 403 VFilter = vp8_bilinear_taps[yoffset]; 404 405 // First filter 1d Horizontal 406 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); 407 408 // Now filter Verticaly 409 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); 410 411 return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 412 } 413 414 415 unsigned int vp8_sub_pixel_variance8x8_c 416 ( 417 const unsigned char *src_ptr, 418 int src_pixels_per_line, 419 int xoffset, 420 int yoffset, 421 const unsigned char *dst_ptr, 422 int dst_pixels_per_line, 423 unsigned int *sse 424 ) 425 { 426 unsigned short FData3[9*8]; // Temp data bufffer used in filtering 427 unsigned char temp2[20*16]; 428 const int *HFilter, *VFilter; 429 430 HFilter = vp8_bilinear_taps[xoffset]; 431 VFilter = vp8_bilinear_taps[yoffset]; 432 433 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); 434 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); 435 436 return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 437 } 438 439 unsigned int vp8_sub_pixel_variance16x16_c 440 ( 441 const unsigned char *src_ptr, 442 int src_pixels_per_line, 443 int xoffset, 444 int yoffset, 445 const unsigned char *dst_ptr, 446 int dst_pixels_per_line, 447 unsigned int *sse 448 ) 449 { 450 unsigned short FData3[17*16]; // Temp data bufffer used in filtering 451 unsigned char temp2[20*16]; 452 const int *HFilter, *VFilter; 453 454 HFilter = vp8_bilinear_taps[xoffset]; 455 VFilter = vp8_bilinear_taps[yoffset]; 456 457 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); 458 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); 459 460 return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 461 } 462 463 464 unsigned int vp8_variance_halfpixvar16x16_h_c( 465 const unsigned char *src_ptr, 466 int source_stride, 467 const unsigned char *ref_ptr, 468 int recon_stride, 469 unsigned int *sse) 470 { 471 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, 472 ref_ptr, recon_stride, sse); 473 } 474 475 476 unsigned int vp8_variance_halfpixvar16x16_v_c( 477 const unsigned char *src_ptr, 478 int source_stride, 479 const unsigned char *ref_ptr, 480 int recon_stride, 481 unsigned int *sse) 482 { 483 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, 484 ref_ptr, recon_stride, sse); 485 } 486 487 488 unsigned int vp8_variance_halfpixvar16x16_hv_c( 489 const unsigned char *src_ptr, 490 int source_stride, 491 const unsigned char *ref_ptr, 492 int recon_stride, 493 unsigned int *sse) 494 { 495 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, 496 ref_ptr, recon_stride, sse); 497 } 498 499 500 unsigned int vp8_sub_pixel_mse16x16_c 501 ( 502 const unsigned char *src_ptr, 503 int src_pixels_per_line, 504 int xoffset, 505 int yoffset, 506 const unsigned char *dst_ptr, 507 int dst_pixels_per_line, 508 unsigned int *sse 509 ) 510 { 511 vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 512 return *sse; 513 } 514 515 unsigned int vp8_sub_pixel_variance16x8_c 516 ( 517 const unsigned char *src_ptr, 518 int src_pixels_per_line, 519 int xoffset, 520 int yoffset, 521 const unsigned char *dst_ptr, 522 int dst_pixels_per_line, 523 unsigned int *sse 524 ) 525 { 526 unsigned short FData3[16*9]; // Temp data bufffer used in filtering 527 unsigned char temp2[20*16]; 528 const int *HFilter, *VFilter; 529 530 HFilter = vp8_bilinear_taps[xoffset]; 531 VFilter = vp8_bilinear_taps[yoffset]; 532 533 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); 534 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); 535 536 return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 537 } 538 539 unsigned int vp8_sub_pixel_variance8x16_c 540 ( 541 const unsigned char *src_ptr, 542 int src_pixels_per_line, 543 int xoffset, 544 int yoffset, 545 const unsigned char *dst_ptr, 546 int dst_pixels_per_line, 547 unsigned int *sse 548 ) 549 { 550 unsigned short FData3[9*16]; // Temp data bufffer used in filtering 551 unsigned char temp2[20*16]; 552 const int *HFilter, *VFilter; 553 554 555 HFilter = vp8_bilinear_taps[xoffset]; 556 VFilter = vp8_bilinear_taps[yoffset]; 557 558 559 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); 560 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); 561 562 return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 563 } 564