Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vp8/encoder/variance.h"
     13 #include "vp8/common/pragmas.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 extern unsigned int vp8_get16x16var_sse2
     17 (
     18     const unsigned char *src_ptr,
     19     int source_stride,
     20     const unsigned char *ref_ptr,
     21     int recon_stride,
     22     unsigned int *SSE,
     23     int *Sum
     24 );
     25 extern void vp8_half_horiz_vert_variance16x_h_sse2
     26 (
     27     const unsigned char *ref_ptr,
     28     int ref_pixels_per_line,
     29     const unsigned char *src_ptr,
     30     int src_pixels_per_line,
     31     unsigned int Height,
     32     int *sum,
     33     unsigned int *sumsquared
     34 );
     35 extern void vp8_half_horiz_variance16x_h_sse2
     36 (
     37     const unsigned char *ref_ptr,
     38     int ref_pixels_per_line,
     39     const unsigned char *src_ptr,
     40     int src_pixels_per_line,
     41     unsigned int Height,
     42     int *sum,
     43     unsigned int *sumsquared
     44 );
     45 extern void vp8_half_vert_variance16x_h_sse2
     46 (
     47     const unsigned char *ref_ptr,
     48     int ref_pixels_per_line,
     49     const unsigned char *src_ptr,
     50     int src_pixels_per_line,
     51     unsigned int Height,
     52     int *sum,
     53     unsigned int *sumsquared
     54 );
     55 extern void vp8_filter_block2d_bil_var_ssse3
     56 (
     57     const unsigned char *ref_ptr,
     58     int ref_pixels_per_line,
     59     const unsigned char *src_ptr,
     60     int src_pixels_per_line,
     61     unsigned int Height,
     62     int  xoffset,
     63     int  yoffset,
     64     int *sum,
     65     unsigned int *sumsquared
     66 );
     67 
     68 unsigned int vp8_sub_pixel_variance16x16_ssse3
     69 (
     70     const unsigned char  *src_ptr,
     71     int  src_pixels_per_line,
     72     int  xoffset,
     73     int  yoffset,
     74     const unsigned char *dst_ptr,
     75     int dst_pixels_per_line,
     76     unsigned int *sse
     77 )
     78 {
     79     int xsum0;
     80     unsigned int xxsum0;
     81 
     82     // note we could avoid these if statements if the calling function
     83     // just called the appropriate functions inside.
     84     if (xoffset == 4 && yoffset == 0)
     85     {
     86         vp8_half_horiz_variance16x_h_sse2(
     87             src_ptr, src_pixels_per_line,
     88             dst_ptr, dst_pixels_per_line, 16,
     89             &xsum0, &xxsum0);
     90     }
     91     else if (xoffset == 0 && yoffset == 4)
     92     {
     93         vp8_half_vert_variance16x_h_sse2(
     94             src_ptr, src_pixels_per_line,
     95             dst_ptr, dst_pixels_per_line, 16,
     96             &xsum0, &xxsum0);
     97     }
     98     else if (xoffset == 4 && yoffset == 4)
     99     {
    100         vp8_half_horiz_vert_variance16x_h_sse2(
    101             src_ptr, src_pixels_per_line,
    102             dst_ptr, dst_pixels_per_line, 16,
    103             &xsum0, &xxsum0);
    104     }
    105     else
    106     {
    107         vp8_filter_block2d_bil_var_ssse3(
    108             src_ptr, src_pixels_per_line,
    109             dst_ptr, dst_pixels_per_line, 16,
    110             xoffset, yoffset,
    111             &xsum0, &xxsum0);
    112     }
    113 
    114     *sse = xxsum0;
    115     return (xxsum0 - ((xsum0 * xsum0) >> 8));
    116 }
    117 
    118 unsigned int vp8_sub_pixel_variance16x8_ssse3
    119 (
    120     const unsigned char  *src_ptr,
    121     int  src_pixels_per_line,
    122     int  xoffset,
    123     int  yoffset,
    124     const unsigned char *dst_ptr,
    125     int dst_pixels_per_line,
    126     unsigned int *sse
    127 
    128 )
    129 {
    130     int xsum0;
    131     unsigned int xxsum0;
    132 
    133     if (xoffset == 4 && yoffset == 0)
    134     {
    135         vp8_half_horiz_variance16x_h_sse2(
    136             src_ptr, src_pixels_per_line,
    137             dst_ptr, dst_pixels_per_line, 8,
    138             &xsum0, &xxsum0);
    139     }
    140     else if (xoffset == 0 && yoffset == 4)
    141     {
    142         vp8_half_vert_variance16x_h_sse2(
    143             src_ptr, src_pixels_per_line,
    144             dst_ptr, dst_pixels_per_line, 8,
    145             &xsum0, &xxsum0);
    146     }
    147     else if (xoffset == 4 && yoffset == 4)
    148     {
    149         vp8_half_horiz_vert_variance16x_h_sse2(
    150             src_ptr, src_pixels_per_line,
    151             dst_ptr, dst_pixels_per_line, 8,
    152             &xsum0, &xxsum0);
    153     }
    154     else
    155     {
    156         vp8_filter_block2d_bil_var_ssse3(
    157             src_ptr, src_pixels_per_line,
    158             dst_ptr, dst_pixels_per_line, 8,
    159             xoffset, yoffset,
    160             &xsum0, &xxsum0);
    161     }
    162 
    163     *sse = xxsum0;
    164     return (xxsum0 - ((xsum0 * xsum0) >> 7));
    165 }
    166