Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "vpx_config.h"
     12 #include "vp8/common/variance.h"
     13 #include "vp8/common/pragmas.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 extern unsigned int vp8_get16x16var_sse2
     17 (
     18     const unsigned char *src_ptr,
     19     int source_stride,
     20     const unsigned char *ref_ptr,
     21     int recon_stride,
     22     unsigned int *SSE,
     23     int *Sum
     24 );
     25 extern void vp8_half_horiz_vert_variance16x_h_sse2
     26 (
     27     const unsigned char *ref_ptr,
     28     int ref_pixels_per_line,
     29     const unsigned char *src_ptr,
     30     int src_pixels_per_line,
     31     unsigned int Height,
     32     int *sum,
     33     unsigned int *sumsquared
     34 );
     35 extern void vp8_half_horiz_variance16x_h_sse2
     36 (
     37     const unsigned char *ref_ptr,
     38     int ref_pixels_per_line,
     39     const unsigned char *src_ptr,
     40     int src_pixels_per_line,
     41     unsigned int Height,
     42     int *sum,
     43     unsigned int *sumsquared
     44 );
     45 extern void vp8_half_vert_variance16x_h_sse2
     46 (
     47     const unsigned char *ref_ptr,
     48     int ref_pixels_per_line,
     49     const unsigned char *src_ptr,
     50     int src_pixels_per_line,
     51     unsigned int Height,
     52     int *sum,
     53     unsigned int *sumsquared
     54 );
     55 extern void vp8_filter_block2d_bil_var_ssse3
     56 (
     57     const unsigned char *ref_ptr,
     58     int ref_pixels_per_line,
     59     const unsigned char *src_ptr,
     60     int src_pixels_per_line,
     61     unsigned int Height,
     62     int  xoffset,
     63     int  yoffset,
     64     int *sum,
     65     unsigned int *sumsquared
     66 );
     67 
     68 unsigned int vp8_sub_pixel_variance16x16_ssse3
     69 (
     70     const unsigned char  *src_ptr,
     71     int  src_pixels_per_line,
     72     int  xoffset,
     73     int  yoffset,
     74     const unsigned char *dst_ptr,
     75     int dst_pixels_per_line,
     76     unsigned int *sse
     77 )
     78 {
     79     int xsum0;
     80     unsigned int xxsum0;
     81 
     82     /* note we could avoid these if statements if the calling function
     83      * just called the appropriate functions inside.
     84      */
     85     if (xoffset == 4 && yoffset == 0)
     86     {
     87         vp8_half_horiz_variance16x_h_sse2(
     88             src_ptr, src_pixels_per_line,
     89             dst_ptr, dst_pixels_per_line, 16,
     90             &xsum0, &xxsum0);
     91     }
     92     else if (xoffset == 0 && yoffset == 4)
     93     {
     94         vp8_half_vert_variance16x_h_sse2(
     95             src_ptr, src_pixels_per_line,
     96             dst_ptr, dst_pixels_per_line, 16,
     97             &xsum0, &xxsum0);
     98     }
     99     else if (xoffset == 4 && yoffset == 4)
    100     {
    101         vp8_half_horiz_vert_variance16x_h_sse2(
    102             src_ptr, src_pixels_per_line,
    103             dst_ptr, dst_pixels_per_line, 16,
    104             &xsum0, &xxsum0);
    105     }
    106     else
    107     {
    108         vp8_filter_block2d_bil_var_ssse3(
    109             src_ptr, src_pixels_per_line,
    110             dst_ptr, dst_pixels_per_line, 16,
    111             xoffset, yoffset,
    112             &xsum0, &xxsum0);
    113     }
    114 
    115     *sse = xxsum0;
    116     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
    117 }
    118 
    119 unsigned int vp8_sub_pixel_variance16x8_ssse3
    120 (
    121     const unsigned char  *src_ptr,
    122     int  src_pixels_per_line,
    123     int  xoffset,
    124     int  yoffset,
    125     const unsigned char *dst_ptr,
    126     int dst_pixels_per_line,
    127     unsigned int *sse
    128 
    129 )
    130 {
    131     int xsum0;
    132     unsigned int xxsum0;
    133 
    134     if (xoffset == 4 && yoffset == 0)
    135     {
    136         vp8_half_horiz_variance16x_h_sse2(
    137             src_ptr, src_pixels_per_line,
    138             dst_ptr, dst_pixels_per_line, 8,
    139             &xsum0, &xxsum0);
    140     }
    141     else if (xoffset == 0 && yoffset == 4)
    142     {
    143         vp8_half_vert_variance16x_h_sse2(
    144             src_ptr, src_pixels_per_line,
    145             dst_ptr, dst_pixels_per_line, 8,
    146             &xsum0, &xxsum0);
    147     }
    148     else if (xoffset == 4 && yoffset == 4)
    149     {
    150         vp8_half_horiz_vert_variance16x_h_sse2(
    151             src_ptr, src_pixels_per_line,
    152             dst_ptr, dst_pixels_per_line, 8,
    153             &xsum0, &xxsum0);
    154     }
    155     else
    156     {
    157         vp8_filter_block2d_bil_var_ssse3(
    158             src_ptr, src_pixels_per_line,
    159             dst_ptr, dst_pixels_per_line, 8,
    160             xoffset, yoffset,
    161             &xsum0, &xxsum0);
    162     }
    163 
    164     *sse = xxsum0;
    165     return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
    166 }
    167