Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "vpx_config.h"
     12 #include "vp8/common/variance.h"
     13 #include "vp8/common/pragmas.h"
     14 #include "vpx_ports/mem.h"
     15 #include "vp8/common/x86/filter_x86.h"
     16 
     17 extern void filter_block1d_h6_mmx
     18 (
     19     const unsigned char *src_ptr,
     20     unsigned short *output_ptr,
     21     unsigned int src_pixels_per_line,
     22     unsigned int pixel_step,
     23     unsigned int output_height,
     24     unsigned int output_width,
     25     short *filter
     26 );
     27 extern void filter_block1d_v6_mmx
     28 (
     29     const short *src_ptr,
     30     unsigned char *output_ptr,
     31     unsigned int pixels_per_line,
     32     unsigned int pixel_step,
     33     unsigned int output_height,
     34     unsigned int output_width,
     35     short *filter
     36 );
     37 
     38 extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
     39 extern unsigned int vp8_get8x8var_mmx
     40 (
     41     const unsigned char *src_ptr,
     42     int  source_stride,
     43     const unsigned char *ref_ptr,
     44     int  recon_stride,
     45     unsigned int *SSE,
     46     int *Sum
     47 );
     48 extern unsigned int vp8_get4x4var_mmx
     49 (
     50     const unsigned char *src_ptr,
     51     int  source_stride,
     52     const unsigned char *ref_ptr,
     53     int  recon_stride,
     54     unsigned int *SSE,
     55     int *Sum
     56 );
     57 extern void vp8_filter_block2d_bil4x4_var_mmx
     58 (
     59     const unsigned char *ref_ptr,
     60     int ref_pixels_per_line,
     61     const unsigned char *src_ptr,
     62     int src_pixels_per_line,
     63     const short *HFilter,
     64     const short *VFilter,
     65     int *sum,
     66     unsigned int *sumsquared
     67 );
     68 extern void vp8_filter_block2d_bil_var_mmx
     69 (
     70     const unsigned char *ref_ptr,
     71     int ref_pixels_per_line,
     72     const unsigned char *src_ptr,
     73     int src_pixels_per_line,
     74     unsigned int Height,
     75     const short *HFilter,
     76     const short *VFilter,
     77     int *sum,
     78     unsigned int *sumsquared
     79 );
     80 
     81 
     82 unsigned int vp8_variance4x4_mmx(
     83     const unsigned char *src_ptr,
     84     int  source_stride,
     85     const unsigned char *ref_ptr,
     86     int  recon_stride,
     87     unsigned int *sse)
     88 {
     89     unsigned int var;
     90     int avg;
     91 
     92     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
     93     *sse = var;
     94     return (var - (((unsigned int)avg * avg) >> 4));
     95 
     96 }
     97 
     98 unsigned int vp8_variance8x8_mmx(
     99     const unsigned char *src_ptr,
    100     int  source_stride,
    101     const unsigned char *ref_ptr,
    102     int  recon_stride,
    103     unsigned int *sse)
    104 {
    105     unsigned int var;
    106     int avg;
    107 
    108     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
    109     *sse = var;
    110 
    111     return (var - (((unsigned int)avg * avg) >> 6));
    112 
    113 }
    114 
    115 unsigned int vp8_mse16x16_mmx(
    116     const unsigned char *src_ptr,
    117     int  source_stride,
    118     const unsigned char *ref_ptr,
    119     int  recon_stride,
    120     unsigned int *sse)
    121 {
    122     unsigned int sse0, sse1, sse2, sse3, var;
    123     int sum0, sum1, sum2, sum3;
    124 
    125 
    126     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    127     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    128     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    129     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    130 
    131     var = sse0 + sse1 + sse2 + sse3;
    132     *sse = var;
    133     return var;
    134 }
    135 
    136 
    137 unsigned int vp8_variance16x16_mmx(
    138     const unsigned char *src_ptr,
    139     int  source_stride,
    140     const unsigned char *ref_ptr,
    141     int  recon_stride,
    142     unsigned int *sse)
    143 {
    144     unsigned int sse0, sse1, sse2, sse3, var;
    145     int sum0, sum1, sum2, sum3, avg;
    146 
    147 
    148     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    149     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    150     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    151     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    152 
    153     var = sse0 + sse1 + sse2 + sse3;
    154     avg = sum0 + sum1 + sum2 + sum3;
    155     *sse = var;
    156     return (var - (((unsigned int)avg * avg) >> 8));
    157 }
    158 
    159 unsigned int vp8_variance16x8_mmx(
    160     const unsigned char *src_ptr,
    161     int  source_stride,
    162     const unsigned char *ref_ptr,
    163     int  recon_stride,
    164     unsigned int *sse)
    165 {
    166     unsigned int sse0, sse1, var;
    167     int sum0, sum1, avg;
    168 
    169     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    170     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    171 
    172     var = sse0 + sse1;
    173     avg = sum0 + sum1;
    174     *sse = var;
    175     return (var - (((unsigned int)avg * avg) >> 7));
    176 
    177 }
    178 
    179 
    180 unsigned int vp8_variance8x16_mmx(
    181     const unsigned char *src_ptr,
    182     int  source_stride,
    183     const unsigned char *ref_ptr,
    184     int  recon_stride,
    185     unsigned int *sse)
    186 {
    187     unsigned int sse0, sse1, var;
    188     int sum0, sum1, avg;
    189 
    190     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    191     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
    192 
    193     var = sse0 + sse1;
    194     avg = sum0 + sum1;
    195     *sse = var;
    196 
    197     return (var - (((unsigned int)avg * avg) >> 7));
    198 
    199 }
    200 
    201 
    202 unsigned int vp8_sub_pixel_variance4x4_mmx
    203 (
    204     const unsigned char  *src_ptr,
    205     int  src_pixels_per_line,
    206     int  xoffset,
    207     int  yoffset,
    208     const unsigned char *dst_ptr,
    209     int dst_pixels_per_line,
    210     unsigned int *sse)
    211 
    212 {
    213     int xsum;
    214     unsigned int xxsum;
    215     vp8_filter_block2d_bil4x4_var_mmx(
    216         src_ptr, src_pixels_per_line,
    217         dst_ptr, dst_pixels_per_line,
    218         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    219         &xsum, &xxsum
    220     );
    221     *sse = xxsum;
    222     return (xxsum - (((unsigned int)xsum * xsum) >> 4));
    223 }
    224 
    225 
    226 unsigned int vp8_sub_pixel_variance8x8_mmx
    227 (
    228     const unsigned char  *src_ptr,
    229     int  src_pixels_per_line,
    230     int  xoffset,
    231     int  yoffset,
    232     const unsigned char *dst_ptr,
    233     int dst_pixels_per_line,
    234     unsigned int *sse
    235 )
    236 {
    237 
    238     int xsum;
    239     unsigned int xxsum;
    240     vp8_filter_block2d_bil_var_mmx(
    241         src_ptr, src_pixels_per_line,
    242         dst_ptr, dst_pixels_per_line, 8,
    243         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    244         &xsum, &xxsum
    245     );
    246     *sse = xxsum;
    247     return (xxsum - (((unsigned int)xsum * xsum) >> 6));
    248 }
    249 
    250 unsigned int vp8_sub_pixel_variance16x16_mmx
    251 (
    252     const unsigned char  *src_ptr,
    253     int  src_pixels_per_line,
    254     int  xoffset,
    255     int  yoffset,
    256     const unsigned char *dst_ptr,
    257     int dst_pixels_per_line,
    258     unsigned int *sse
    259 )
    260 {
    261 
    262     int xsum0, xsum1;
    263     unsigned int xxsum0, xxsum1;
    264 
    265 
    266     vp8_filter_block2d_bil_var_mmx(
    267         src_ptr, src_pixels_per_line,
    268         dst_ptr, dst_pixels_per_line, 16,
    269         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    270         &xsum0, &xxsum0
    271     );
    272 
    273 
    274     vp8_filter_block2d_bil_var_mmx(
    275         src_ptr + 8, src_pixels_per_line,
    276         dst_ptr + 8, dst_pixels_per_line, 16,
    277         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    278         &xsum1, &xxsum1
    279     );
    280 
    281     xsum0 += xsum1;
    282     xxsum0 += xxsum1;
    283 
    284     *sse = xxsum0;
    285     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
    286 
    287 
    288 }
    289 
    290 unsigned int vp8_sub_pixel_mse16x16_mmx(
    291     const unsigned char  *src_ptr,
    292     int  src_pixels_per_line,
    293     int  xoffset,
    294     int  yoffset,
    295     const unsigned char *dst_ptr,
    296     int dst_pixels_per_line,
    297     unsigned int *sse
    298 )
    299 {
    300     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
    301     return *sse;
    302 }
    303 
    304 unsigned int vp8_sub_pixel_variance16x8_mmx
    305 (
    306     const unsigned char  *src_ptr,
    307     int  src_pixels_per_line,
    308     int  xoffset,
    309     int  yoffset,
    310     const unsigned char *dst_ptr,
    311     int dst_pixels_per_line,
    312     unsigned int *sse
    313 )
    314 {
    315     int xsum0, xsum1;
    316     unsigned int xxsum0, xxsum1;
    317 
    318 
    319     vp8_filter_block2d_bil_var_mmx(
    320         src_ptr, src_pixels_per_line,
    321         dst_ptr, dst_pixels_per_line, 8,
    322         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    323         &xsum0, &xxsum0
    324     );
    325 
    326 
    327     vp8_filter_block2d_bil_var_mmx(
    328         src_ptr + 8, src_pixels_per_line,
    329         dst_ptr + 8, dst_pixels_per_line, 8,
    330         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    331         &xsum1, &xxsum1
    332     );
    333 
    334     xsum0 += xsum1;
    335     xxsum0 += xxsum1;
    336 
    337     *sse = xxsum0;
    338     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
    339 }
    340 
    341 unsigned int vp8_sub_pixel_variance8x16_mmx
    342 (
    343     const unsigned char  *src_ptr,
    344     int  src_pixels_per_line,
    345     int  xoffset,
    346     int  yoffset,
    347     const unsigned char *dst_ptr,
    348     int dst_pixels_per_line,
    349     unsigned int *sse
    350 )
    351 {
    352     int xsum;
    353     unsigned int xxsum;
    354     vp8_filter_block2d_bil_var_mmx(
    355         src_ptr, src_pixels_per_line,
    356         dst_ptr, dst_pixels_per_line, 16,
    357         vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
    358         &xsum, &xxsum
    359     );
    360     *sse = xxsum;
    361     return (xxsum - (((unsigned int)xsum * xsum) >> 7));
    362 }
    363 
    364 
    365 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
    366     const unsigned char *src_ptr,
    367     int  source_stride,
    368     const unsigned char *ref_ptr,
    369     int  recon_stride,
    370     unsigned int *sse)
    371 {
    372     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
    373                                            ref_ptr, recon_stride, sse);
    374 }
    375 
    376 
    377 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
    378     const unsigned char *src_ptr,
    379     int  source_stride,
    380     const unsigned char *ref_ptr,
    381     int  recon_stride,
    382     unsigned int *sse)
    383 {
    384     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
    385                                            ref_ptr, recon_stride, sse);
    386 }
    387 
    388 
    389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
    390     const unsigned char *src_ptr,
    391     int  source_stride,
    392     const unsigned char *ref_ptr,
    393     int  recon_stride,
    394     unsigned int *sse)
    395 {
    396     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
    397                                            ref_ptr, recon_stride, sse);
    398 }
    399