Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vp8/encoder/variance.h"
     13 #include "vp8/common/pragmas.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 extern void filter_block1d_h6_mmx
     17 (
     18     const unsigned char *src_ptr,
     19     unsigned short *output_ptr,
     20     unsigned int src_pixels_per_line,
     21     unsigned int pixel_step,
     22     unsigned int output_height,
     23     unsigned int output_width,
     24     short *vp7_filter
     25 );
     26 extern void filter_block1d_v6_mmx
     27 (
     28     const short *src_ptr,
     29     unsigned char *output_ptr,
     30     unsigned int pixels_per_line,
     31     unsigned int pixel_step,
     32     unsigned int output_height,
     33     unsigned int output_width,
     34     short *vp7_filter
     35 );
     36 
     37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
     38 extern unsigned int vp8_get8x8var_mmx
     39 (
     40     const unsigned char *src_ptr,
     41     int  source_stride,
     42     const unsigned char *ref_ptr,
     43     int  recon_stride,
     44     unsigned int *SSE,
     45     int *Sum
     46 );
     47 extern unsigned int vp8_get4x4var_mmx
     48 (
     49     const unsigned char *src_ptr,
     50     int  source_stride,
     51     const unsigned char *ref_ptr,
     52     int  recon_stride,
     53     unsigned int *SSE,
     54     int *Sum
     55 );
     56 extern void vp8_filter_block2d_bil4x4_var_mmx
     57 (
     58     const unsigned char *ref_ptr,
     59     int ref_pixels_per_line,
     60     const unsigned char *src_ptr,
     61     int src_pixels_per_line,
     62     const short *HFilter,
     63     const short *VFilter,
     64     int *sum,
     65     unsigned int *sumsquared
     66 );
     67 extern void vp8_filter_block2d_bil_var_mmx
     68 (
     69     const unsigned char *ref_ptr,
     70     int ref_pixels_per_line,
     71     const unsigned char *src_ptr,
     72     int src_pixels_per_line,
     73     unsigned int Height,
     74     const short *HFilter,
     75     const short *VFilter,
     76     int *sum,
     77     unsigned int *sumsquared
     78 );
     79 extern unsigned int vp8_get16x16pred_error_mmx
     80 (
     81     unsigned char *src_ptr,
     82     int src_stride,
     83     unsigned char *ref_ptr,
     84     int ref_stride
     85 );
     86 
     87 
     88 unsigned int vp8_get16x16var_mmx(
     89     const unsigned char *src_ptr,
     90     int  source_stride,
     91     const unsigned char *ref_ptr,
     92     int  recon_stride,
     93     unsigned *SSE,
     94     unsigned *SUM
     95 )
     96 {
     97     unsigned int sse0, sse1, sse2, sse3, var;
     98     int sum0, sum1, sum2, sum3, avg;
     99 
    100 
    101     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    102     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    103     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    104     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    105 
    106     var = sse0 + sse1 + sse2 + sse3;
    107     avg = sum0 + sum1 + sum2 + sum3;
    108 
    109     *SSE = var;
    110     *SUM = avg;
    111     return (var - ((avg * avg) >> 8));
    112 
    113 }
    114 
    115 
    116 
    117 
    118 
    119 unsigned int vp8_variance4x4_mmx(
    120     const unsigned char *src_ptr,
    121     int  source_stride,
    122     const unsigned char *ref_ptr,
    123     int  recon_stride,
    124     unsigned int *sse)
    125 {
    126     unsigned int var;
    127     int avg;
    128 
    129     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
    130     *sse = var;
    131     return (var - ((avg * avg) >> 4));
    132 
    133 }
    134 
    135 unsigned int vp8_variance8x8_mmx(
    136     const unsigned char *src_ptr,
    137     int  source_stride,
    138     const unsigned char *ref_ptr,
    139     int  recon_stride,
    140     unsigned int *sse)
    141 {
    142     unsigned int var;
    143     int avg;
    144 
    145     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
    146     *sse = var;
    147 
    148     return (var - ((avg * avg) >> 6));
    149 
    150 }
    151 
    152 unsigned int vp8_mse16x16_mmx(
    153     const unsigned char *src_ptr,
    154     int  source_stride,
    155     const unsigned char *ref_ptr,
    156     int  recon_stride,
    157     unsigned int *sse)
    158 {
    159     unsigned int sse0, sse1, sse2, sse3, var;
    160     int sum0, sum1, sum2, sum3;
    161 
    162 
    163     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    164     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    165     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    166     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    167 
    168     var = sse0 + sse1 + sse2 + sse3;
    169     *sse = var;
    170     return var;
    171 }
    172 
    173 
    174 unsigned int vp8_variance16x16_mmx(
    175     const unsigned char *src_ptr,
    176     int  source_stride,
    177     const unsigned char *ref_ptr,
    178     int  recon_stride,
    179     int *sse)
    180 {
    181     unsigned int sse0, sse1, sse2, sse3, var;
    182     int sum0, sum1, sum2, sum3, avg;
    183 
    184 
    185     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    186     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    187     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    188     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    189 
    190     var = sse0 + sse1 + sse2 + sse3;
    191     avg = sum0 + sum1 + sum2 + sum3;
    192     *sse = var;
    193     return (var - ((avg * avg) >> 8));
    194 }
    195 
    196 unsigned int vp8_variance16x8_mmx(
    197     const unsigned char *src_ptr,
    198     int  source_stride,
    199     const unsigned char *ref_ptr,
    200     int  recon_stride,
    201     unsigned int *sse)
    202 {
    203     unsigned int sse0, sse1, var;
    204     int sum0, sum1, avg;
    205 
    206     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    207     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    208 
    209     var = sse0 + sse1;
    210     avg = sum0 + sum1;
    211     *sse = var;
    212     return (var - ((avg * avg) >> 7));
    213 
    214 }
    215 
    216 
    217 unsigned int vp8_variance8x16_mmx(
    218     const unsigned char *src_ptr,
    219     int  source_stride,
    220     const unsigned char *ref_ptr,
    221     int  recon_stride,
    222     unsigned int *sse)
    223 {
    224     unsigned int sse0, sse1, var;
    225     int sum0, sum1, avg;
    226 
    227     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    228     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
    229 
    230     var = sse0 + sse1;
    231     avg = sum0 + sum1;
    232     *sse = var;
    233 
    234     return (var - ((avg * avg) >> 7));
    235 
    236 }
    237 
    238 
    239 
    240 
    241 ///////////////////////////////////////////////////////////////////////////
    242 // the mmx function that does the bilinear filtering and var calculation //
    243 // int one pass                                                          //
    244 ///////////////////////////////////////////////////////////////////////////
    245 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
    246 {
    247     { 128, 128, 128, 128,  0,  0,  0,  0 },
    248     { 112, 112, 112, 112, 16, 16, 16, 16 },
    249     {  96, 96, 96, 96, 32, 32, 32, 32 },
    250     {  80, 80, 80, 80, 48, 48, 48, 48 },
    251     {  64, 64, 64, 64, 64, 64, 64, 64 },
    252     {  48, 48, 48, 48, 80, 80, 80, 80 },
    253     {  32, 32, 32, 32, 96, 96, 96, 96 },
    254     {  16, 16, 16, 16, 112, 112, 112, 112 }
    255 };
    256 
    257 unsigned int vp8_sub_pixel_variance4x4_mmx
    258 (
    259     const unsigned char  *src_ptr,
    260     int  src_pixels_per_line,
    261     int  xoffset,
    262     int  yoffset,
    263     const unsigned char *dst_ptr,
    264     int dst_pixels_per_line,
    265     unsigned int *sse)
    266 
    267 {
    268     int xsum;
    269     unsigned int xxsum;
    270     vp8_filter_block2d_bil4x4_var_mmx(
    271         src_ptr, src_pixels_per_line,
    272         dst_ptr, dst_pixels_per_line,
    273         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    274         &xsum, &xxsum
    275     );
    276     *sse = xxsum;
    277     return (xxsum - ((xsum * xsum) >> 4));
    278 }
    279 
    280 
    281 unsigned int vp8_sub_pixel_variance8x8_mmx
    282 (
    283     const unsigned char  *src_ptr,
    284     int  src_pixels_per_line,
    285     int  xoffset,
    286     int  yoffset,
    287     const unsigned char *dst_ptr,
    288     int dst_pixels_per_line,
    289     unsigned int *sse
    290 )
    291 {
    292 
    293     int xsum;
    294     unsigned int xxsum;
    295     vp8_filter_block2d_bil_var_mmx(
    296         src_ptr, src_pixels_per_line,
    297         dst_ptr, dst_pixels_per_line, 8,
    298         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    299         &xsum, &xxsum
    300     );
    301     *sse = xxsum;
    302     return (xxsum - ((xsum * xsum) >> 6));
    303 }
    304 
    305 unsigned int vp8_sub_pixel_variance16x16_mmx
    306 (
    307     const unsigned char  *src_ptr,
    308     int  src_pixels_per_line,
    309     int  xoffset,
    310     int  yoffset,
    311     const unsigned char *dst_ptr,
    312     int dst_pixels_per_line,
    313     unsigned int *sse
    314 )
    315 {
    316 
    317     int xsum0, xsum1;
    318     unsigned int xxsum0, xxsum1;
    319 
    320 
    321     vp8_filter_block2d_bil_var_mmx(
    322         src_ptr, src_pixels_per_line,
    323         dst_ptr, dst_pixels_per_line, 16,
    324         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    325         &xsum0, &xxsum0
    326     );
    327 
    328 
    329     vp8_filter_block2d_bil_var_mmx(
    330         src_ptr + 8, src_pixels_per_line,
    331         dst_ptr + 8, dst_pixels_per_line, 16,
    332         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    333         &xsum1, &xxsum1
    334     );
    335 
    336     xsum0 += xsum1;
    337     xxsum0 += xxsum1;
    338 
    339     *sse = xxsum0;
    340     return (xxsum0 - ((xsum0 * xsum0) >> 8));
    341 
    342 
    343 }
    344 
    345 unsigned int vp8_sub_pixel_mse16x16_mmx(
    346     const unsigned char  *src_ptr,
    347     int  src_pixels_per_line,
    348     int  xoffset,
    349     int  yoffset,
    350     const unsigned char *dst_ptr,
    351     int dst_pixels_per_line,
    352     unsigned int *sse
    353 )
    354 {
    355     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
    356     return *sse;
    357 }
    358 
    359 unsigned int vp8_sub_pixel_variance16x8_mmx
    360 (
    361     const unsigned char  *src_ptr,
    362     int  src_pixels_per_line,
    363     int  xoffset,
    364     int  yoffset,
    365     const unsigned char *dst_ptr,
    366     int dst_pixels_per_line,
    367     unsigned int *sse
    368 )
    369 {
    370     int xsum0, xsum1;
    371     unsigned int xxsum0, xxsum1;
    372 
    373 
    374     vp8_filter_block2d_bil_var_mmx(
    375         src_ptr, src_pixels_per_line,
    376         dst_ptr, dst_pixels_per_line, 8,
    377         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    378         &xsum0, &xxsum0
    379     );
    380 
    381 
    382     vp8_filter_block2d_bil_var_mmx(
    383         src_ptr + 8, src_pixels_per_line,
    384         dst_ptr + 8, dst_pixels_per_line, 8,
    385         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    386         &xsum1, &xxsum1
    387     );
    388 
    389     xsum0 += xsum1;
    390     xxsum0 += xxsum1;
    391 
    392     *sse = xxsum0;
    393     return (xxsum0 - ((xsum0 * xsum0) >> 7));
    394 }
    395 
    396 unsigned int vp8_sub_pixel_variance8x16_mmx
    397 (
    398     const unsigned char  *src_ptr,
    399     int  src_pixels_per_line,
    400     int  xoffset,
    401     int  yoffset,
    402     const unsigned char *dst_ptr,
    403     int dst_pixels_per_line,
    404     int *sse
    405 )
    406 {
    407     int xsum;
    408     unsigned int xxsum;
    409     vp8_filter_block2d_bil_var_mmx(
    410         src_ptr, src_pixels_per_line,
    411         dst_ptr, dst_pixels_per_line, 16,
    412         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    413         &xsum, &xxsum
    414     );
    415     *sse = xxsum;
    416     return (xxsum - ((xsum * xsum) >> 7));
    417 }
    418 
    419 
    420 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
    421     const unsigned char *src_ptr,
    422     int  source_stride,
    423     const unsigned char *ref_ptr,
    424     int  recon_stride,
    425     unsigned int *sse)
    426 {
    427     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
    428                                            ref_ptr, recon_stride, sse);
    429 }
    430 
    431 
    432 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
    433     const unsigned char *src_ptr,
    434     int  source_stride,
    435     const unsigned char *ref_ptr,
    436     int  recon_stride,
    437     unsigned int *sse)
    438 {
    439     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
    440                                            ref_ptr, recon_stride, sse);
    441 }
    442 
    443 
    444 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
    445     const unsigned char *src_ptr,
    446     int  source_stride,
    447     const unsigned char *ref_ptr,
    448     int  recon_stride,
    449     unsigned int *sse)
    450 {
    451     return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
    452                                            ref_ptr, recon_stride, sse);
    453 }
    454