1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 #include <stdlib.h> 13 #include "filter.h" 14 #include "vpx_ports/mem.h" 15 16 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = 17 { 18 { 128, 0 }, 19 { 112, 16 }, 20 { 96, 32 }, 21 { 80, 48 }, 22 { 64, 64 }, 23 { 48, 80 }, 24 { 32, 96 }, 25 { 16, 112 } 26 }; 27 28 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = 29 { 30 31 { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ 32 { 0, -6, 123, 12, -1, 0 }, 33 { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ 34 { 0, -9, 93, 50, -6, 0 }, 35 { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ 36 { 0, -6, 50, 93, -9, 0 }, 37 { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ 38 { 0, -1, 12, 123, -6, 0 }, 39 }; 40 41 static void filter_block2d_first_pass 42 ( 43 unsigned char *src_ptr, 44 int *output_ptr, 45 unsigned int src_pixels_per_line, 46 unsigned int pixel_step, 47 unsigned int output_height, 48 unsigned int output_width, 49 const short *vp8_filter 50 ) 51 { 52 unsigned int i, j; 53 int Temp; 54 55 for (i = 0; i < output_height; i++) 56 { 57 for (j = 0; j < output_width; j++) 58 { 59 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 60 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 61 ((int)src_ptr[0] * vp8_filter[2]) + 62 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 63 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 64 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 65 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 66 67 /* Normalize back to 0-255 */ 68 Temp = Temp >> VP8_FILTER_SHIFT; 69 70 if (Temp < 0) 71 Temp = 0; 72 else if (Temp > 255) 73 Temp = 255; 74 75 output_ptr[j] = Temp; 76 src_ptr++; 77 } 78 79 /* Next row... */ 80 src_ptr += src_pixels_per_line - output_width; 81 output_ptr += output_width; 82 } 83 } 84 85 static void filter_block2d_second_pass 86 ( 87 int *src_ptr, 88 unsigned char *output_ptr, 89 int output_pitch, 90 unsigned int src_pixels_per_line, 91 unsigned int pixel_step, 92 unsigned int output_height, 93 unsigned int output_width, 94 const short *vp8_filter 95 ) 96 { 97 unsigned int i, j; 98 int Temp; 99 100 for (i = 0; i < output_height; i++) 101 { 102 for (j = 0; j < output_width; j++) 103 { 104 /* Apply filter */ 105 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 106 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 107 ((int)src_ptr[0] * vp8_filter[2]) + 108 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 109 ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + 110 ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + 111 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 112 113 /* Normalize back to 0-255 */ 114 Temp = Temp >> VP8_FILTER_SHIFT; 115 116 if (Temp < 0) 117 Temp = 0; 118 else if (Temp > 255) 119 Temp = 255; 120 121 output_ptr[j] = (unsigned char)Temp; 122 src_ptr++; 123 } 124 125 /* Start next row */ 126 src_ptr += src_pixels_per_line - output_width; 127 output_ptr += output_pitch; 128 } 129 } 130 131 132 static void filter_block2d 133 ( 134 unsigned char *src_ptr, 135 unsigned char *output_ptr, 136 unsigned int src_pixels_per_line, 137 int output_pitch, 138 const short *HFilter, 139 const short *VFilter 140 ) 141 { 142 int FData[9*4]; /* Temp data buffer used in filtering */ 143 144 /* First filter 1-D horizontally... */ 145 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); 146 147 /* then filter verticaly... */ 148 filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); 149 } 150 151 152 void vp8_sixtap_predict_c 153 ( 154 unsigned char *src_ptr, 155 int src_pixels_per_line, 156 int xoffset, 157 int yoffset, 158 unsigned char *dst_ptr, 159 int dst_pitch 160 ) 161 { 162 const short *HFilter; 163 const short *VFilter; 164 165 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 166 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 167 168 filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); 169 } 170 void vp8_sixtap_predict8x8_c 171 ( 172 unsigned char *src_ptr, 173 int src_pixels_per_line, 174 int xoffset, 175 int yoffset, 176 unsigned char *dst_ptr, 177 int dst_pitch 178 ) 179 { 180 const short *HFilter; 181 const short *VFilter; 182 int FData[13*16]; /* Temp data buffer used in filtering */ 183 184 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 185 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 186 187 /* First filter 1-D horizontally... */ 188 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); 189 190 191 /* then filter verticaly... */ 192 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); 193 194 } 195 196 void vp8_sixtap_predict8x4_c 197 ( 198 unsigned char *src_ptr, 199 int src_pixels_per_line, 200 int xoffset, 201 int yoffset, 202 unsigned char *dst_ptr, 203 int dst_pitch 204 ) 205 { 206 const short *HFilter; 207 const short *VFilter; 208 int FData[13*16]; /* Temp data buffer used in filtering */ 209 210 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 211 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 212 213 /* First filter 1-D horizontally... */ 214 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); 215 216 217 /* then filter verticaly... */ 218 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); 219 220 } 221 222 void vp8_sixtap_predict16x16_c 223 ( 224 unsigned char *src_ptr, 225 int src_pixels_per_line, 226 int xoffset, 227 int yoffset, 228 unsigned char *dst_ptr, 229 int dst_pitch 230 ) 231 { 232 const short *HFilter; 233 const short *VFilter; 234 int FData[21*24]; /* Temp data buffer used in filtering */ 235 236 237 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 238 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 239 240 /* First filter 1-D horizontally... */ 241 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); 242 243 /* then filter verticaly... */ 244 filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); 245 246 } 247 248 249 /**************************************************************************** 250 * 251 * ROUTINE : filter_block2d_bil_first_pass 252 * 253 * INPUTS : UINT8 *src_ptr : Pointer to source block. 254 * UINT32 src_stride : Stride of source block. 255 * UINT32 height : Block height. 256 * UINT32 width : Block width. 257 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 258 * 259 * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. 260 * 261 * RETURNS : void 262 * 263 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 264 * in the horizontal direction to produce the filtered output 265 * block. Used to implement first-pass of 2-D separable filter. 266 * 267 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 268 * Two filter taps should sum to VP8_FILTER_WEIGHT. 269 * 270 ****************************************************************************/ 271 static void filter_block2d_bil_first_pass 272 ( 273 unsigned char *src_ptr, 274 unsigned short *dst_ptr, 275 unsigned int src_stride, 276 unsigned int height, 277 unsigned int width, 278 const short *vp8_filter 279 ) 280 { 281 unsigned int i, j; 282 283 for (i = 0; i < height; i++) 284 { 285 for (j = 0; j < width; j++) 286 { 287 /* Apply bilinear filter */ 288 dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + 289 ((int)src_ptr[1] * vp8_filter[1]) + 290 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; 291 src_ptr++; 292 } 293 294 /* Next row... */ 295 src_ptr += src_stride - width; 296 dst_ptr += width; 297 } 298 } 299 300 /**************************************************************************** 301 * 302 * ROUTINE : filter_block2d_bil_second_pass 303 * 304 * INPUTS : INT32 *src_ptr : Pointer to source block. 305 * UINT32 dst_pitch : Destination block pitch. 306 * UINT32 height : Block height. 307 * UINT32 width : Block width. 308 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 309 * 310 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 311 * 312 * RETURNS : void 313 * 314 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 315 * in the vertical direction to produce the filtered output 316 * block. Used to implement second-pass of 2-D separable filter. 317 * 318 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass. 319 * Two filter taps should sum to VP8_FILTER_WEIGHT. 320 * 321 ****************************************************************************/ 322 static void filter_block2d_bil_second_pass 323 ( 324 unsigned short *src_ptr, 325 unsigned char *dst_ptr, 326 int dst_pitch, 327 unsigned int height, 328 unsigned int width, 329 const short *vp8_filter 330 ) 331 { 332 unsigned int i, j; 333 int Temp; 334 335 for (i = 0; i < height; i++) 336 { 337 for (j = 0; j < width; j++) 338 { 339 /* Apply filter */ 340 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 341 ((int)src_ptr[width] * vp8_filter[1]) + 342 (VP8_FILTER_WEIGHT / 2); 343 dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 344 src_ptr++; 345 } 346 347 /* Next row... */ 348 dst_ptr += dst_pitch; 349 } 350 } 351 352 353 /**************************************************************************** 354 * 355 * ROUTINE : filter_block2d_bil 356 * 357 * INPUTS : UINT8 *src_ptr : Pointer to source block. 358 * UINT32 src_pitch : Stride of source block. 359 * UINT32 dst_pitch : Stride of destination block. 360 * INT32 *HFilter : Array of 2 horizontal filter taps. 361 * INT32 *VFilter : Array of 2 vertical filter taps. 362 * INT32 Width : Block width 363 * INT32 Height : Block height 364 * 365 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 366 * 367 * RETURNS : void 368 * 369 * FUNCTION : 2-D filters an input block by applying a 2-tap 370 * bi-linear filter horizontally followed by a 2-tap 371 * bi-linear filter vertically on the result. 372 * 373 * SPECIAL NOTES : The largest block size can be handled here is 16x16 374 * 375 ****************************************************************************/ 376 static void filter_block2d_bil 377 ( 378 unsigned char *src_ptr, 379 unsigned char *dst_ptr, 380 unsigned int src_pitch, 381 unsigned int dst_pitch, 382 const short *HFilter, 383 const short *VFilter, 384 int Width, 385 int Height 386 ) 387 { 388 389 unsigned short FData[17*16]; /* Temp data buffer used in filtering */ 390 391 /* First filter 1-D horizontally... */ 392 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); 393 394 /* then 1-D vertically... */ 395 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); 396 } 397 398 399 void vp8_bilinear_predict4x4_c 400 ( 401 unsigned char *src_ptr, 402 int src_pixels_per_line, 403 int xoffset, 404 int yoffset, 405 unsigned char *dst_ptr, 406 int dst_pitch 407 ) 408 { 409 const short *HFilter; 410 const short *VFilter; 411 412 HFilter = vp8_bilinear_filters[xoffset]; 413 VFilter = vp8_bilinear_filters[yoffset]; 414 #if 0 415 { 416 int i; 417 unsigned char temp1[16]; 418 unsigned char temp2[16]; 419 420 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 421 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 422 423 for (i = 0; i < 16; i++) 424 { 425 if (temp1[i] != temp2[i]) 426 { 427 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4); 428 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4); 429 } 430 } 431 } 432 #endif 433 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); 434 435 } 436 437 void vp8_bilinear_predict8x8_c 438 ( 439 unsigned char *src_ptr, 440 int src_pixels_per_line, 441 int xoffset, 442 int yoffset, 443 unsigned char *dst_ptr, 444 int dst_pitch 445 ) 446 { 447 const short *HFilter; 448 const short *VFilter; 449 450 HFilter = vp8_bilinear_filters[xoffset]; 451 VFilter = vp8_bilinear_filters[yoffset]; 452 453 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); 454 455 } 456 457 void vp8_bilinear_predict8x4_c 458 ( 459 unsigned char *src_ptr, 460 int src_pixels_per_line, 461 int xoffset, 462 int yoffset, 463 unsigned char *dst_ptr, 464 int dst_pitch 465 ) 466 { 467 const short *HFilter; 468 const short *VFilter; 469 470 HFilter = vp8_bilinear_filters[xoffset]; 471 VFilter = vp8_bilinear_filters[yoffset]; 472 473 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); 474 475 } 476 477 void vp8_bilinear_predict16x16_c 478 ( 479 unsigned char *src_ptr, 480 int src_pixels_per_line, 481 int xoffset, 482 int yoffset, 483 unsigned char *dst_ptr, 484 int dst_pitch 485 ) 486 { 487 const short *HFilter; 488 const short *VFilter; 489 490 HFilter = vp8_bilinear_filters[xoffset]; 491 VFilter = vp8_bilinear_filters[yoffset]; 492 493 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); 494 } 495