1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "vpx_config.h" 12 #include "vp8/common/variance.h" 13 #include "vp8/common/pragmas.h" 14 #include "vpx_ports/mem.h" 15 #include "vp8/common/x86/filter_x86.h" 16 17 extern void filter_block1d_h6_mmx 18 ( 19 const unsigned char *src_ptr, 20 unsigned short *output_ptr, 21 unsigned int src_pixels_per_line, 22 unsigned int pixel_step, 23 unsigned int output_height, 24 unsigned int output_width, 25 short *filter 26 ); 27 extern void filter_block1d_v6_mmx 28 ( 29 const short *src_ptr, 30 unsigned char *output_ptr, 31 unsigned int pixels_per_line, 32 unsigned int pixel_step, 33 unsigned int output_height, 34 unsigned int output_width, 35 short *filter 36 ); 37 38 extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); 39 extern unsigned int vp8_get8x8var_mmx 40 ( 41 const unsigned char *src_ptr, 42 int source_stride, 43 const unsigned char *ref_ptr, 44 int recon_stride, 45 unsigned int *SSE, 46 int *Sum 47 ); 48 extern unsigned int vp8_get4x4var_mmx 49 ( 50 const unsigned char *src_ptr, 51 int source_stride, 52 const unsigned char *ref_ptr, 53 int recon_stride, 54 unsigned int *SSE, 55 int *Sum 56 ); 57 extern void vp8_filter_block2d_bil4x4_var_mmx 58 ( 59 const unsigned char *ref_ptr, 60 int ref_pixels_per_line, 61 const unsigned char *src_ptr, 62 int src_pixels_per_line, 63 const short *HFilter, 64 const short *VFilter, 65 int *sum, 66 unsigned int *sumsquared 67 ); 68 extern void vp8_filter_block2d_bil_var_mmx 69 ( 70 const unsigned char *ref_ptr, 71 int ref_pixels_per_line, 72 const unsigned char *src_ptr, 73 int src_pixels_per_line, 74 unsigned int Height, 75 const short *HFilter, 76 const short *VFilter, 77 int *sum, 78 unsigned int *sumsquared 79 ); 80 81 82 unsigned int vp8_variance4x4_mmx( 83 const unsigned char *src_ptr, 84 int source_stride, 85 const unsigned char *ref_ptr, 86 int recon_stride, 87 unsigned int *sse) 88 { 89 unsigned int var; 90 int avg; 91 92 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 93 *sse = var; 94 return (var - (((unsigned int)avg * avg) >> 4)); 95 96 } 97 98 unsigned int vp8_variance8x8_mmx( 99 const unsigned char *src_ptr, 100 int source_stride, 101 const unsigned char *ref_ptr, 102 int recon_stride, 103 unsigned int *sse) 104 { 105 unsigned int var; 106 int avg; 107 108 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; 109 *sse = var; 110 111 return (var - (((unsigned int)avg * avg) >> 6)); 112 113 } 114 115 unsigned int vp8_mse16x16_mmx( 116 const unsigned char *src_ptr, 117 int source_stride, 118 const unsigned char *ref_ptr, 119 int recon_stride, 120 unsigned int *sse) 121 { 122 unsigned int sse0, sse1, sse2, sse3, var; 123 int sum0, sum1, sum2, sum3; 124 125 126 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 127 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 128 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 129 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 130 131 var = sse0 + sse1 + sse2 + sse3; 132 *sse = var; 133 return var; 134 } 135 136 137 unsigned int vp8_variance16x16_mmx( 138 const unsigned char *src_ptr, 139 int source_stride, 140 const unsigned char *ref_ptr, 141 int recon_stride, 142 unsigned int *sse) 143 { 144 unsigned int sse0, sse1, sse2, sse3, var; 145 int sum0, sum1, sum2, sum3, avg; 146 147 148 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 149 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; 151 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); 152 153 var = sse0 + sse1 + sse2 + sse3; 154 avg = sum0 + sum1 + sum2 + sum3; 155 *sse = var; 156 return (var - (((unsigned int)avg * avg) >> 8)); 157 } 158 159 unsigned int vp8_variance16x8_mmx( 160 const unsigned char *src_ptr, 161 int source_stride, 162 const unsigned char *ref_ptr, 163 int recon_stride, 164 unsigned int *sse) 165 { 166 unsigned int sse0, sse1, var; 167 int sum0, sum1, avg; 168 169 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 170 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); 171 172 var = sse0 + sse1; 173 avg = sum0 + sum1; 174 *sse = var; 175 return (var - (((unsigned int)avg * avg) >> 7)); 176 177 } 178 179 180 unsigned int vp8_variance8x16_mmx( 181 const unsigned char *src_ptr, 182 int source_stride, 183 const unsigned char *ref_ptr, 184 int recon_stride, 185 unsigned int *sse) 186 { 187 unsigned int sse0, sse1, var; 188 int sum0, sum1, avg; 189 190 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; 191 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; 192 193 var = sse0 + sse1; 194 avg = sum0 + sum1; 195 *sse = var; 196 197 return (var - (((unsigned int)avg * avg) >> 7)); 198 199 } 200 201 202 unsigned int vp8_sub_pixel_variance4x4_mmx 203 ( 204 const unsigned char *src_ptr, 205 int src_pixels_per_line, 206 int xoffset, 207 int yoffset, 208 const unsigned char *dst_ptr, 209 int dst_pixels_per_line, 210 unsigned int *sse) 211 212 { 213 int xsum; 214 unsigned int xxsum; 215 vp8_filter_block2d_bil4x4_var_mmx( 216 src_ptr, src_pixels_per_line, 217 dst_ptr, dst_pixels_per_line, 218 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 219 &xsum, &xxsum 220 ); 221 *sse = xxsum; 222 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); 223 } 224 225 226 unsigned int vp8_sub_pixel_variance8x8_mmx 227 ( 228 const unsigned char *src_ptr, 229 int src_pixels_per_line, 230 int xoffset, 231 int yoffset, 232 const unsigned char *dst_ptr, 233 int dst_pixels_per_line, 234 unsigned int *sse 235 ) 236 { 237 238 int xsum; 239 unsigned int xxsum; 240 vp8_filter_block2d_bil_var_mmx( 241 src_ptr, src_pixels_per_line, 242 dst_ptr, dst_pixels_per_line, 8, 243 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 244 &xsum, &xxsum 245 ); 246 *sse = xxsum; 247 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); 248 } 249 250 unsigned int vp8_sub_pixel_variance16x16_mmx 251 ( 252 const unsigned char *src_ptr, 253 int src_pixels_per_line, 254 int xoffset, 255 int yoffset, 256 const unsigned char *dst_ptr, 257 int dst_pixels_per_line, 258 unsigned int *sse 259 ) 260 { 261 262 int xsum0, xsum1; 263 unsigned int xxsum0, xxsum1; 264 265 266 vp8_filter_block2d_bil_var_mmx( 267 src_ptr, src_pixels_per_line, 268 dst_ptr, dst_pixels_per_line, 16, 269 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 270 &xsum0, &xxsum0 271 ); 272 273 274 vp8_filter_block2d_bil_var_mmx( 275 src_ptr + 8, src_pixels_per_line, 276 dst_ptr + 8, dst_pixels_per_line, 16, 277 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 278 &xsum1, &xxsum1 279 ); 280 281 xsum0 += xsum1; 282 xxsum0 += xxsum1; 283 284 *sse = xxsum0; 285 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 286 287 288 } 289 290 unsigned int vp8_sub_pixel_mse16x16_mmx( 291 const unsigned char *src_ptr, 292 int src_pixels_per_line, 293 int xoffset, 294 int yoffset, 295 const unsigned char *dst_ptr, 296 int dst_pixels_per_line, 297 unsigned int *sse 298 ) 299 { 300 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); 301 return *sse; 302 } 303 304 unsigned int vp8_sub_pixel_variance16x8_mmx 305 ( 306 const unsigned char *src_ptr, 307 int src_pixels_per_line, 308 int xoffset, 309 int yoffset, 310 const unsigned char *dst_ptr, 311 int dst_pixels_per_line, 312 unsigned int *sse 313 ) 314 { 315 int xsum0, xsum1; 316 unsigned int xxsum0, xxsum1; 317 318 319 vp8_filter_block2d_bil_var_mmx( 320 src_ptr, src_pixels_per_line, 321 dst_ptr, dst_pixels_per_line, 8, 322 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 323 &xsum0, &xxsum0 324 ); 325 326 327 vp8_filter_block2d_bil_var_mmx( 328 src_ptr + 8, src_pixels_per_line, 329 dst_ptr + 8, dst_pixels_per_line, 8, 330 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 331 &xsum1, &xxsum1 332 ); 333 334 xsum0 += xsum1; 335 xxsum0 += xxsum1; 336 337 *sse = xxsum0; 338 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 339 } 340 341 unsigned int vp8_sub_pixel_variance8x16_mmx 342 ( 343 const unsigned char *src_ptr, 344 int src_pixels_per_line, 345 int xoffset, 346 int yoffset, 347 const unsigned char *dst_ptr, 348 int dst_pixels_per_line, 349 unsigned int *sse 350 ) 351 { 352 int xsum; 353 unsigned int xxsum; 354 vp8_filter_block2d_bil_var_mmx( 355 src_ptr, src_pixels_per_line, 356 dst_ptr, dst_pixels_per_line, 16, 357 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], 358 &xsum, &xxsum 359 ); 360 *sse = xxsum; 361 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); 362 } 363 364 365 unsigned int vp8_variance_halfpixvar16x16_h_mmx( 366 const unsigned char *src_ptr, 367 int source_stride, 368 const unsigned char *ref_ptr, 369 int recon_stride, 370 unsigned int *sse) 371 { 372 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, 373 ref_ptr, recon_stride, sse); 374 } 375 376 377 unsigned int vp8_variance_halfpixvar16x16_v_mmx( 378 const unsigned char *src_ptr, 379 int source_stride, 380 const unsigned char *ref_ptr, 381 int recon_stride, 382 unsigned int *sse) 383 { 384 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, 385 ref_ptr, recon_stride, sse); 386 } 387 388 389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx( 390 const unsigned char *src_ptr, 391 int source_stride, 392 const unsigned char *ref_ptr, 393 int recon_stride, 394 unsigned int *sse) 395 { 396 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, 397 ref_ptr, recon_stride, sse); 398 } 399