1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include "variance.h" 13 14 const int vp8_six_tap[8][6] = 15 { 16 { 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic 17 { 0, -6, 123, 12, -1, 0 }, 18 { 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter 19 { 0, -9, 93, 50, -6, 0 }, 20 { 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter 21 { 0, -6, 50, 93, -9, 0 }, 22 { 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter 23 { 0, -1, 12, 123, -6, 0 } 24 }; 25 26 27 #ifdef USEBILINEAR 28 const int VP8_FILTER_WEIGHT = 128; 29 const int VP8_FILTER_SHIFT = 7; 30 const int vp8_bilinear_taps[8][2] = 31 { 32 { 128, 0 }, 33 { 112, 16 }, 34 { 96, 32 }, 35 { 80, 48 }, 36 { 64, 64 }, 37 { 48, 80 }, 38 { 32, 96 }, 39 { 16, 112 } 40 }; 41 42 unsigned int vp8_get_mb_ss_c 43 ( 44 short *src_ptr 45 ) 46 { 47 unsigned int i = 0, sum = 0; 48 49 do 50 { 51 sum += (src_ptr[i] * src_ptr[i]); 52 i++; 53 } 54 while (i < 256); 55 56 return sum; 57 } 58 59 60 void vp8_variance( 61 unsigned char *src_ptr, 62 int source_stride, 63 unsigned char *ref_ptr, 64 int recon_stride, 65 int w, 66 int h, 67 unsigned int *sse, 68 int *sum) 69 { 70 int i, j; 71 int diff; 72 73 *sum = 0; 74 *sse = 0; 75 76 for (i = 0; i < h; i++) 77 { 78 for (j = 0; j < w; j++) 79 { 80 diff = src_ptr[j] - ref_ptr[j]; 81 *sum += diff; 82 *sse += diff * diff; 83 } 84 85 src_ptr += source_stride; 86 ref_ptr += recon_stride; 87 } 88 } 89 90 unsigned int 91 vp8_get8x8var_c 92 ( 93 unsigned char *src_ptr, 94 int source_stride, 95 unsigned char *ref_ptr, 96 int recon_stride, 97 unsigned int *SSE, 98 int *Sum 99 ) 100 { 101 102 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum); 103 return (*SSE - (((*Sum) * (*Sum)) >> 6)); 104 } 105 106 unsigned int 107 vp8_get16x16var_c 108 ( 109 unsigned char *src_ptr, 110 int source_stride, 111 unsigned char *ref_ptr, 112 int recon_stride, 113 unsigned int *SSE, 114 int *Sum 115 ) 116 { 117 118 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum); 119 return (*SSE - (((*Sum) * (*Sum)) >> 8)); 120 121 } 122 123 124 125 unsigned int vp8_variance16x16_c( 126 unsigned char *src_ptr, 127 int source_stride, 128 unsigned char *ref_ptr, 129 int recon_stride, 130 unsigned int *sse) 131 { 132 unsigned int var; 133 int avg; 134 135 136 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 137 *sse = var; 138 return (var - ((avg * avg) >> 8)); 139 } 140 141 unsigned int vp8_variance8x16_c( 142 unsigned char *src_ptr, 143 int source_stride, 144 unsigned char *ref_ptr, 145 int recon_stride, 146 unsigned int *sse) 147 { 148 unsigned int var; 149 int avg; 150 151 152 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); 153 *sse = var; 154 return (var - ((avg * avg) >> 7)); 155 } 156 157 unsigned int vp8_variance16x8_c( 158 unsigned char *src_ptr, 159 int source_stride, 160 unsigned char *ref_ptr, 161 int recon_stride, 162 unsigned int *sse) 163 { 164 unsigned int var; 165 int avg; 166 167 168 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); 169 *sse = var; 170 return (var - ((avg * avg) >> 7)); 171 } 172 173 174 unsigned int vp8_variance8x8_c( 175 unsigned char *src_ptr, 176 int source_stride, 177 unsigned char *ref_ptr, 178 int recon_stride, 179 unsigned int *sse) 180 { 181 unsigned int var; 182 int avg; 183 184 185 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); 186 *sse = var; 187 return (var - ((avg * avg) >> 6)); 188 } 189 190 unsigned int vp8_variance4x4_c( 191 unsigned char *src_ptr, 192 int source_stride, 193 unsigned char *ref_ptr, 194 int recon_stride, 195 unsigned int *sse) 196 { 197 unsigned int var; 198 int avg; 199 200 201 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); 202 *sse = var; 203 return (var - ((avg * avg) >> 4)); 204 } 205 206 207 unsigned int vp8_mse16x16_c( 208 unsigned char *src_ptr, 209 int source_stride, 210 unsigned char *ref_ptr, 211 int recon_stride, 212 unsigned int *sse) 213 { 214 unsigned int var; 215 int avg; 216 217 vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); 218 *sse = var; 219 return var; 220 } 221 222 223 /**************************************************************************** 224 * 225 * ROUTINE : filter_block2d_bil_first_pass 226 * 227 * INPUTS : UINT8 *src_ptr : Pointer to source block. 228 * UINT32 src_pixels_per_line : Stride of input block. 229 * UINT32 pixel_step : Offset between filter input samples (see notes). 230 * UINT32 output_height : Input block height. 231 * UINT32 output_width : Input block width. 232 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 233 * 234 * OUTPUTS : INT32 *output_ptr : Pointer to filtered block. 235 * 236 * RETURNS : void 237 * 238 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 239 * either horizontal or vertical direction to produce the 240 * filtered output block. Used to implement first-pass 241 * of 2-D separable filter. 242 * 243 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 244 * Two filter taps should sum to VP8_FILTER_WEIGHT. 245 * pixel_step defines whether the filter is applied 246 * horizontally (pixel_step=1) or vertically (pixel_step=stride). 247 * It defines the offset required to move from one input 248 * to the next. 249 * 250 ****************************************************************************/ 251 void vp8e_filter_block2d_bil_first_pass 252 ( 253 unsigned char *src_ptr, 254 unsigned short *output_ptr, 255 unsigned int src_pixels_per_line, 256 int pixel_step, 257 unsigned int output_height, 258 unsigned int output_width, 259 const int *vp8_filter 260 ) 261 { 262 unsigned int i, j; 263 264 for (i = 0; i < output_height; i++) 265 { 266 for (j = 0; j < output_width; j++) 267 { 268 // Apply bilinear filter 269 output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 270 ((int)src_ptr[pixel_step] * vp8_filter[1]) + 271 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 272 src_ptr++; 273 } 274 275 // Next row... 276 src_ptr += src_pixels_per_line - output_width; 277 output_ptr += output_width; 278 } 279 } 280 281 /**************************************************************************** 282 * 283 * ROUTINE : filter_block2d_bil_second_pass 284 * 285 * INPUTS : INT32 *src_ptr : Pointer to source block. 286 * UINT32 src_pixels_per_line : Stride of input block. 287 * UINT32 pixel_step : Offset between filter input samples (see notes). 288 * UINT32 output_height : Input block height. 289 * UINT32 output_width : Input block width. 290 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 291 * 292 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. 293 * 294 * RETURNS : void 295 * 296 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in 297 * either horizontal or vertical direction to produce the 298 * filtered output block. Used to implement second-pass 299 * of 2-D separable filter. 300 * 301 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 302 * Two filter taps should sum to VP8_FILTER_WEIGHT. 303 * pixel_step defines whether the filter is applied 304 * horizontally (pixel_step=1) or vertically (pixel_step=stride). 305 * It defines the offset required to move from one input 306 * to the next. 307 * 308 ****************************************************************************/ 309 void vp8e_filter_block2d_bil_second_pass 310 ( 311 unsigned short *src_ptr, 312 unsigned char *output_ptr, 313 unsigned int src_pixels_per_line, 314 unsigned int pixel_step, 315 unsigned int output_height, 316 unsigned int output_width, 317 const int *vp8_filter 318 ) 319 { 320 unsigned int i, j; 321 int Temp; 322 323 for (i = 0; i < output_height; i++) 324 { 325 for (j = 0; j < output_width; j++) 326 { 327 // Apply filter 328 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 329 ((int)src_ptr[pixel_step] * vp8_filter[1]) + 330 (VP8_FILTER_WEIGHT / 2); 331 output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 332 src_ptr++; 333 } 334 335 // Next row... 336 src_ptr += src_pixels_per_line - output_width; 337 output_ptr += output_width; 338 } 339 } 340 341 342 /**************************************************************************** 343 * 344 * ROUTINE : filter_block2d_bil 345 * 346 * INPUTS : UINT8 *src_ptr : Pointer to source block. 347 * UINT32 src_pixels_per_line : Stride of input block. 348 * INT32 *HFilter : Array of 2 horizontal filter taps. 349 * INT32 *VFilter : Array of 2 vertical filter taps. 350 * 351 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block. 352 * 353 * RETURNS : void 354 * 355 * FUNCTION : 2-D filters an 8x8 input block by applying a 2-tap 356 * bi-linear filter horizontally followed by a 2-tap 357 * bi-linear filter vertically on the result. 358 * 359 * SPECIAL NOTES : The intermediate horizontally filtered block must produce 360 * 1 more point than the input block in each column. This 361 * is to ensure that the 2-tap filter has one extra data-point 362 * at the top of each column so filter taps do not extend 363 * beyond data. Thus the output of the first stage filter 364 * is an 8x9 (hx_v) block. 365 * 366 ****************************************************************************/ 367 void vp8e_filter_block2d_bil 368 ( 369 unsigned char *src_ptr, 370 unsigned char *output_ptr, 371 unsigned int src_pixels_per_line, 372 int *HFilter, 373 int *VFilter 374 ) 375 { 376 377 unsigned short FData[20*16]; // Temp data bufffer used in filtering 378 379 // First filter 1-D horizontally... 380 vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter); 381 382 // then 1-D vertically... 383 vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter); 384 } 385 386 387 388 unsigned int vp8_sub_pixel_variance4x4_c 389 ( 390 unsigned char *src_ptr, 391 int src_pixels_per_line, 392 int xoffset, 393 int yoffset, 394 unsigned char *dst_ptr, 395 int dst_pixels_per_line, 396 unsigned int *sse 397 ) 398 { 399 unsigned char temp2[20*16]; 400 const int *HFilter, *VFilter; 401 unsigned short FData3[5*4]; // Temp data bufffer used in filtering 402 403 HFilter = vp8_bilinear_taps[xoffset]; 404 VFilter = vp8_bilinear_taps[yoffset]; 405 406 // First filter 1d Horizontal 407 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter); 408 409 // Now filter Verticaly 410 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter); 411 412 return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); 413 } 414 415 416 unsigned int vp8_sub_pixel_variance8x8_c 417 ( 418 unsigned char *src_ptr, 419 int src_pixels_per_line, 420 int xoffset, 421 int yoffset, 422 unsigned char *dst_ptr, 423 int dst_pixels_per_line, 424 unsigned int *sse 425 ) 426 { 427 unsigned short FData3[9*8]; // Temp data bufffer used in filtering 428 unsigned char temp2[20*16]; 429 const int *HFilter, *VFilter; 430 431 HFilter = vp8_bilinear_taps[xoffset]; 432 VFilter = vp8_bilinear_taps[yoffset]; 433 434 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter); 435 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter); 436 437 return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 438 } 439 440 unsigned int vp8_sub_pixel_variance16x16_c 441 ( 442 unsigned char *src_ptr, 443 int src_pixels_per_line, 444 int xoffset, 445 int yoffset, 446 unsigned char *dst_ptr, 447 int dst_pixels_per_line, 448 unsigned int *sse 449 ) 450 { 451 unsigned short FData3[17*16]; // Temp data bufffer used in filtering 452 unsigned char temp2[20*16]; 453 const int *HFilter, *VFilter; 454 455 HFilter = vp8_bilinear_taps[xoffset]; 456 VFilter = vp8_bilinear_taps[yoffset]; 457 458 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter); 459 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter); 460 461 return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 462 } 463 464 unsigned int vp8_sub_pixel_mse16x16_c 465 ( 466 unsigned char *src_ptr, 467 int src_pixels_per_line, 468 int xoffset, 469 int yoffset, 470 unsigned char *dst_ptr, 471 int dst_pixels_per_line, 472 unsigned int *sse 473 ) 474 { 475 vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 476 return *sse; 477 } 478 479 unsigned int vp8_sub_pixel_variance16x8_c 480 ( 481 unsigned char *src_ptr, 482 int src_pixels_per_line, 483 int xoffset, 484 int yoffset, 485 unsigned char *dst_ptr, 486 int dst_pixels_per_line, 487 unsigned int *sse 488 ) 489 { 490 unsigned short FData3[16*9]; // Temp data bufffer used in filtering 491 unsigned char temp2[20*16]; 492 const int *HFilter, *VFilter; 493 494 HFilter = vp8_bilinear_taps[xoffset]; 495 VFilter = vp8_bilinear_taps[yoffset]; 496 497 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter); 498 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter); 499 500 return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); 501 } 502 503 unsigned int vp8_sub_pixel_variance8x16_c 504 ( 505 unsigned char *src_ptr, 506 int src_pixels_per_line, 507 int xoffset, 508 int yoffset, 509 unsigned char *dst_ptr, 510 int dst_pixels_per_line, 511 unsigned int *sse 512 ) 513 { 514 unsigned short FData3[9*16]; // Temp data bufffer used in filtering 515 unsigned char temp2[20*16]; 516 const int *HFilter, *VFilter; 517 518 519 HFilter = vp8_bilinear_taps[xoffset]; 520 VFilter = vp8_bilinear_taps[yoffset]; 521 522 523 vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter); 524 vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter); 525 526 return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); 527 } 528 #endif 529