1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "vpx_config.h" 12 #include "vp8/common/variance.h" 13 #include "vp8/common/pragmas.h" 14 #include "vpx_ports/mem.h" 15 16 extern unsigned int vp8_get16x16var_sse2 17 ( 18 const unsigned char *src_ptr, 19 int source_stride, 20 const unsigned char *ref_ptr, 21 int recon_stride, 22 unsigned int *SSE, 23 int *Sum 24 ); 25 extern void vp8_half_horiz_vert_variance16x_h_sse2 26 ( 27 const unsigned char *ref_ptr, 28 int ref_pixels_per_line, 29 const unsigned char *src_ptr, 30 int src_pixels_per_line, 31 unsigned int Height, 32 int *sum, 33 unsigned int *sumsquared 34 ); 35 extern void vp8_half_horiz_variance16x_h_sse2 36 ( 37 const unsigned char *ref_ptr, 38 int ref_pixels_per_line, 39 const unsigned char *src_ptr, 40 int src_pixels_per_line, 41 unsigned int Height, 42 int *sum, 43 unsigned int *sumsquared 44 ); 45 extern void vp8_half_vert_variance16x_h_sse2 46 ( 47 const unsigned char *ref_ptr, 48 int ref_pixels_per_line, 49 const unsigned char *src_ptr, 50 int src_pixels_per_line, 51 unsigned int Height, 52 int *sum, 53 unsigned int *sumsquared 54 ); 55 extern void vp8_filter_block2d_bil_var_ssse3 56 ( 57 const unsigned char *ref_ptr, 58 int ref_pixels_per_line, 59 const unsigned char *src_ptr, 60 int src_pixels_per_line, 61 unsigned int Height, 62 int xoffset, 63 int yoffset, 64 int *sum, 65 unsigned int *sumsquared 66 ); 67 68 unsigned int vp8_sub_pixel_variance16x16_ssse3 69 ( 70 const unsigned char *src_ptr, 71 int src_pixels_per_line, 72 int xoffset, 73 int yoffset, 74 const unsigned char *dst_ptr, 75 int dst_pixels_per_line, 76 unsigned int *sse 77 ) 78 { 79 int xsum0; 80 unsigned int xxsum0; 81 82 /* note we could avoid these if statements if the calling function 83 * just called the appropriate functions inside. 84 */ 85 if (xoffset == 4 && yoffset == 0) 86 { 87 vp8_half_horiz_variance16x_h_sse2( 88 src_ptr, src_pixels_per_line, 89 dst_ptr, dst_pixels_per_line, 16, 90 &xsum0, &xxsum0); 91 } 92 else if (xoffset == 0 && yoffset == 4) 93 { 94 vp8_half_vert_variance16x_h_sse2( 95 src_ptr, src_pixels_per_line, 96 dst_ptr, dst_pixels_per_line, 16, 97 &xsum0, &xxsum0); 98 } 99 else if (xoffset == 4 && yoffset == 4) 100 { 101 vp8_half_horiz_vert_variance16x_h_sse2( 102 src_ptr, src_pixels_per_line, 103 dst_ptr, dst_pixels_per_line, 16, 104 &xsum0, &xxsum0); 105 } 106 else 107 { 108 vp8_filter_block2d_bil_var_ssse3( 109 src_ptr, src_pixels_per_line, 110 dst_ptr, dst_pixels_per_line, 16, 111 xoffset, yoffset, 112 &xsum0, &xxsum0); 113 } 114 115 *sse = xxsum0; 116 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); 117 } 118 119 unsigned int vp8_sub_pixel_variance16x8_ssse3 120 ( 121 const unsigned char *src_ptr, 122 int src_pixels_per_line, 123 int xoffset, 124 int yoffset, 125 const unsigned char *dst_ptr, 126 int dst_pixels_per_line, 127 unsigned int *sse 128 129 ) 130 { 131 int xsum0; 132 unsigned int xxsum0; 133 134 if (xoffset == 4 && yoffset == 0) 135 { 136 vp8_half_horiz_variance16x_h_sse2( 137 src_ptr, src_pixels_per_line, 138 dst_ptr, dst_pixels_per_line, 8, 139 &xsum0, &xxsum0); 140 } 141 else if (xoffset == 0 && yoffset == 4) 142 { 143 vp8_half_vert_variance16x_h_sse2( 144 src_ptr, src_pixels_per_line, 145 dst_ptr, dst_pixels_per_line, 8, 146 &xsum0, &xxsum0); 147 } 148 else if (xoffset == 4 && yoffset == 4) 149 { 150 vp8_half_horiz_vert_variance16x_h_sse2( 151 src_ptr, src_pixels_per_line, 152 dst_ptr, dst_pixels_per_line, 8, 153 &xsum0, &xxsum0); 154 } 155 else 156 { 157 vp8_filter_block2d_bil_var_ssse3( 158 src_ptr, src_pixels_per_line, 159 dst_ptr, dst_pixels_per_line, 8, 160 xoffset, yoffset, 161 &xsum0, &xxsum0); 162 } 163 164 *sse = xxsum0; 165 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); 166 } 167