1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include "./vp8_rtcd.h" 13 #include "vp8/common/filter.h" 14 15 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { 16 { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, 17 { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 } 18 }; 19 20 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { 21 22 { 0, 0, 128, 0, 0, 23 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ 24 { 0, -6, 123, 12, -1, 0 }, 25 { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ 26 { 0, -9, 93, 50, -6, 0 }, 27 { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ 28 { 0, -6, 50, 93, -9, 0 }, 29 { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ 30 { 0, -1, 12, 123, -6, 0 }, 31 }; 32 33 static void filter_block2d_first_pass(unsigned char *src_ptr, int *output_ptr, 34 unsigned int src_pixels_per_line, 35 unsigned int pixel_step, 36 unsigned int output_height, 37 unsigned int output_width, 38 const short *vp8_filter) { 39 unsigned int i, j; 40 int Temp; 41 42 for (i = 0; i < output_height; ++i) { 43 for (j = 0; j < output_width; ++j) { 44 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 45 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 46 ((int)src_ptr[0] * vp8_filter[2]) + 47 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 48 ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) + 49 ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) + 50 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 51 52 /* Normalize back to 0-255 */ 53 Temp = Temp >> VP8_FILTER_SHIFT; 54 55 if (Temp < 0) { 56 Temp = 0; 57 } else if (Temp > 255) { 58 Temp = 255; 59 } 60 61 output_ptr[j] = Temp; 62 src_ptr++; 63 } 64 65 /* Next row... */ 66 src_ptr += src_pixels_per_line - output_width; 67 output_ptr += output_width; 68 } 69 } 70 71 static void filter_block2d_second_pass(int *src_ptr, unsigned char *output_ptr, 72 int output_pitch, 73 unsigned int src_pixels_per_line, 74 unsigned int pixel_step, 75 unsigned int output_height, 76 unsigned int output_width, 77 const short *vp8_filter) { 78 unsigned int i, j; 79 int Temp; 80 81 for (i = 0; i < output_height; ++i) { 82 for (j = 0; j < output_width; ++j) { 83 /* Apply filter */ 84 Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + 85 ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + 86 ((int)src_ptr[0] * vp8_filter[2]) + 87 ((int)src_ptr[pixel_step] * vp8_filter[3]) + 88 ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) + 89 ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) + 90 (VP8_FILTER_WEIGHT >> 1); /* Rounding */ 91 92 /* Normalize back to 0-255 */ 93 Temp = Temp >> VP8_FILTER_SHIFT; 94 95 if (Temp < 0) { 96 Temp = 0; 97 } else if (Temp > 255) { 98 Temp = 255; 99 } 100 101 output_ptr[j] = (unsigned char)Temp; 102 src_ptr++; 103 } 104 105 /* Start next row */ 106 src_ptr += src_pixels_per_line - output_width; 107 output_ptr += output_pitch; 108 } 109 } 110 111 static void filter_block2d(unsigned char *src_ptr, unsigned char *output_ptr, 112 unsigned int src_pixels_per_line, int output_pitch, 113 const short *HFilter, const short *VFilter) { 114 int FData[9 * 4]; /* Temp data buffer used in filtering */ 115 116 /* First filter 1-D horizontally... */ 117 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, 118 src_pixels_per_line, 1, 9, 4, HFilter); 119 120 /* then filter verticaly... */ 121 filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, 122 VFilter); 123 } 124 125 void vp8_sixtap_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, 126 int xoffset, int yoffset, unsigned char *dst_ptr, 127 int dst_pitch) { 128 const short *HFilter; 129 const short *VFilter; 130 131 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 132 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 133 134 filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, 135 VFilter); 136 } 137 void vp8_sixtap_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, 138 int xoffset, int yoffset, unsigned char *dst_ptr, 139 int dst_pitch) { 140 const short *HFilter; 141 const short *VFilter; 142 int FData[13 * 16]; /* Temp data buffer used in filtering */ 143 144 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 145 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 146 147 /* First filter 1-D horizontally... */ 148 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, 149 src_pixels_per_line, 1, 13, 8, HFilter); 150 151 /* then filter verticaly... */ 152 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, 153 VFilter); 154 } 155 156 void vp8_sixtap_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, 157 int xoffset, int yoffset, unsigned char *dst_ptr, 158 int dst_pitch) { 159 const short *HFilter; 160 const short *VFilter; 161 int FData[13 * 16]; /* Temp data buffer used in filtering */ 162 163 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 164 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 165 166 /* First filter 1-D horizontally... */ 167 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, 168 src_pixels_per_line, 1, 9, 8, HFilter); 169 170 /* then filter verticaly... */ 171 filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, 172 VFilter); 173 } 174 175 void vp8_sixtap_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line, 176 int xoffset, int yoffset, unsigned char *dst_ptr, 177 int dst_pitch) { 178 const short *HFilter; 179 const short *VFilter; 180 int FData[21 * 24]; /* Temp data buffer used in filtering */ 181 182 HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ 183 VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ 184 185 /* First filter 1-D horizontally... */ 186 filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, 187 src_pixels_per_line, 1, 21, 16, HFilter); 188 189 /* then filter verticaly... */ 190 filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, 191 VFilter); 192 } 193 194 /**************************************************************************** 195 * 196 * ROUTINE : filter_block2d_bil_first_pass 197 * 198 * INPUTS : UINT8 *src_ptr : Pointer to source block. 199 * UINT32 src_stride : Stride of source block. 200 * UINT32 height : Block height. 201 * UINT32 width : Block width. 202 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 203 * 204 * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. 205 * 206 * RETURNS : void 207 * 208 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 209 * in the horizontal direction to produce the filtered output 210 * block. Used to implement first-pass of 2-D separable filter. 211 * 212 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. 213 * Two filter taps should sum to VP8_FILTER_WEIGHT. 214 * 215 ****************************************************************************/ 216 static void filter_block2d_bil_first_pass( 217 unsigned char *src_ptr, unsigned short *dst_ptr, unsigned int src_stride, 218 unsigned int height, unsigned int width, const short *vp8_filter) { 219 unsigned int i, j; 220 221 for (i = 0; i < height; ++i) { 222 for (j = 0; j < width; ++j) { 223 /* Apply bilinear filter */ 224 dst_ptr[j] = 225 (((int)src_ptr[0] * vp8_filter[0]) + 226 ((int)src_ptr[1] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> 227 VP8_FILTER_SHIFT; 228 src_ptr++; 229 } 230 231 /* Next row... */ 232 src_ptr += src_stride - width; 233 dst_ptr += width; 234 } 235 } 236 237 /**************************************************************************** 238 * 239 * ROUTINE : filter_block2d_bil_second_pass 240 * 241 * INPUTS : INT32 *src_ptr : Pointer to source block. 242 * UINT32 dst_pitch : Destination block pitch. 243 * UINT32 height : Block height. 244 * UINT32 width : Block width. 245 * INT32 *vp8_filter : Array of 2 bi-linear filter taps. 246 * 247 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 248 * 249 * RETURNS : void 250 * 251 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block 252 * in the vertical direction to produce the filtered output 253 * block. Used to implement second-pass of 2-D separable 254 * filter. 255 * 256 * SPECIAL NOTES : Requires 32-bit input as produced by 257 * filter_block2d_bil_first_pass. 258 * Two filter taps should sum to VP8_FILTER_WEIGHT. 259 * 260 ****************************************************************************/ 261 static void filter_block2d_bil_second_pass(unsigned short *src_ptr, 262 unsigned char *dst_ptr, 263 int dst_pitch, unsigned int height, 264 unsigned int width, 265 const short *vp8_filter) { 266 unsigned int i, j; 267 int Temp; 268 269 for (i = 0; i < height; ++i) { 270 for (j = 0; j < width; ++j) { 271 /* Apply filter */ 272 Temp = ((int)src_ptr[0] * vp8_filter[0]) + 273 ((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); 274 dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); 275 src_ptr++; 276 } 277 278 /* Next row... */ 279 dst_ptr += dst_pitch; 280 } 281 } 282 283 /**************************************************************************** 284 * 285 * ROUTINE : filter_block2d_bil 286 * 287 * INPUTS : UINT8 *src_ptr : Pointer to source block. 288 * UINT32 src_pitch : Stride of source block. 289 * UINT32 dst_pitch : Stride of destination block. 290 * INT32 *HFilter : Array of 2 horizontal filter 291 * taps. 292 * INT32 *VFilter : Array of 2 vertical filter taps. 293 * INT32 Width : Block width 294 * INT32 Height : Block height 295 * 296 * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. 297 * 298 * RETURNS : void 299 * 300 * FUNCTION : 2-D filters an input block by applying a 2-tap 301 * bi-linear filter horizontally followed by a 2-tap 302 * bi-linear filter vertically on the result. 303 * 304 * SPECIAL NOTES : The largest block size can be handled here is 16x16 305 * 306 ****************************************************************************/ 307 static void filter_block2d_bil(unsigned char *src_ptr, unsigned char *dst_ptr, 308 unsigned int src_pitch, unsigned int dst_pitch, 309 const short *HFilter, const short *VFilter, 310 int Width, int Height) { 311 unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */ 312 313 /* First filter 1-D horizontally... */ 314 filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, 315 HFilter); 316 317 /* then 1-D vertically... */ 318 filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, 319 VFilter); 320 } 321 322 void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, 323 int xoffset, int yoffset, unsigned char *dst_ptr, 324 int dst_pitch) { 325 const short *HFilter; 326 const short *VFilter; 327 328 // This represents a copy and is not required to be handled by optimizations. 329 assert((xoffset | yoffset) != 0); 330 331 HFilter = vp8_bilinear_filters[xoffset]; 332 VFilter = vp8_bilinear_filters[yoffset]; 333 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, 334 VFilter, 4, 4); 335 } 336 337 void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, 338 int xoffset, int yoffset, unsigned char *dst_ptr, 339 int dst_pitch) { 340 const short *HFilter; 341 const short *VFilter; 342 343 assert((xoffset | yoffset) != 0); 344 345 HFilter = vp8_bilinear_filters[xoffset]; 346 VFilter = vp8_bilinear_filters[yoffset]; 347 348 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, 349 VFilter, 8, 8); 350 } 351 352 void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, 353 int xoffset, int yoffset, unsigned char *dst_ptr, 354 int dst_pitch) { 355 const short *HFilter; 356 const short *VFilter; 357 358 assert((xoffset | yoffset) != 0); 359 360 HFilter = vp8_bilinear_filters[xoffset]; 361 VFilter = vp8_bilinear_filters[yoffset]; 362 363 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, 364 VFilter, 8, 4); 365 } 366 367 void vp8_bilinear_predict16x16_c(unsigned char *src_ptr, 368 int src_pixels_per_line, int xoffset, 369 int yoffset, unsigned char *dst_ptr, 370 int dst_pitch) { 371 const short *HFilter; 372 const short *VFilter; 373 374 assert((xoffset | yoffset) != 0); 375 376 HFilter = vp8_bilinear_filters[xoffset]; 377 VFilter = vp8_bilinear_filters[yoffset]; 378 379 filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, 380 VFilter, 16, 16); 381 } 382