Home | History | Annotate | Download | only in x86
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "variance.h"
     13 #include "pragmas.h"
     14 #include "vpx_ports/mem.h"
     15 
     16 extern void filter_block1d_h6_mmx
     17 (
     18     unsigned char *src_ptr,
     19     unsigned short *output_ptr,
     20     unsigned int src_pixels_per_line,
     21     unsigned int pixel_step,
     22     unsigned int output_height,
     23     unsigned int output_width,
     24     short *vp7_filter
     25 );
     26 extern void filter_block1d_v6_mmx
     27 (
     28     short *src_ptr,
     29     unsigned char *output_ptr,
     30     unsigned int pixels_per_line,
     31     unsigned int pixel_step,
     32     unsigned int output_height,
     33     unsigned int output_width,
     34     short *vp7_filter
     35 );
     36 
     37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
     38 extern unsigned int vp8_get8x8var_mmx
     39 (
     40     unsigned char *src_ptr,
     41     int  source_stride,
     42     unsigned char *ref_ptr,
     43     int  recon_stride,
     44     unsigned int *SSE,
     45     int *Sum
     46 );
     47 extern unsigned int vp8_get4x4var_mmx
     48 (
     49     unsigned char *src_ptr,
     50     int  source_stride,
     51     unsigned char *ref_ptr,
     52     int  recon_stride,
     53     unsigned int *SSE,
     54     int *Sum
     55 );
     56 extern unsigned int vp8_get4x4sse_cs_mmx
     57 (
     58     unsigned char *src_ptr,
     59     int  source_stride,
     60     unsigned char *ref_ptr,
     61     int  recon_stride
     62 );
     63 extern void vp8_filter_block2d_bil4x4_var_mmx
     64 (
     65     unsigned char *ref_ptr,
     66     int ref_pixels_per_line,
     67     unsigned char *src_ptr,
     68     int src_pixels_per_line,
     69     const short *HFilter,
     70     const short *VFilter,
     71     int *sum,
     72     unsigned int *sumsquared
     73 );
     74 extern void vp8_filter_block2d_bil_var_mmx
     75 (
     76     unsigned char *ref_ptr,
     77     int ref_pixels_per_line,
     78     unsigned char *src_ptr,
     79     int src_pixels_per_line,
     80     unsigned int Height,
     81     const short *HFilter,
     82     const short *VFilter,
     83     int *sum,
     84     unsigned int *sumsquared
     85 );
     86 extern unsigned int vp8_get16x16pred_error_mmx
     87 (
     88     unsigned char *src_ptr,
     89     int src_stride,
     90     unsigned char *ref_ptr,
     91     int ref_stride
     92 );
     93 
     94 
     95 void vp8_test_get_mb_ss(void)
     96 {
     97     short zz[] =
     98     {
     99         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    100         -2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
    101         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    102         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    103         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    104         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    105         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    106         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    107         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    108         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    109         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    110         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    111         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    112         -3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
    113         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    114         -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
    115     };
    116     int s = 0, x = vp8_get_mb_ss_mmx(zz);
    117     {
    118         int y;
    119 
    120         for (y = 0; y < 256; y++)
    121             s += (zz[y] * zz[y]);
    122     }
    123 
    124     x += 0;
    125 }
    126 
    127 
    128 unsigned int vp8_get16x16var_mmx(
    129     unsigned char *src_ptr,
    130     int  source_stride,
    131     unsigned char *ref_ptr,
    132     int  recon_stride,
    133     unsigned *SSE,
    134     unsigned *SUM
    135 )
    136 {
    137     unsigned int sse0, sse1, sse2, sse3, var;
    138     int sum0, sum1, sum2, sum3, avg;
    139 
    140 
    141     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    142     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    143     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    144     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    145 
    146     var = sse0 + sse1 + sse2 + sse3;
    147     avg = sum0 + sum1 + sum2 + sum3;
    148 
    149     *SSE = var;
    150     *SUM = avg;
    151     return (var - ((avg * avg) >> 8));
    152 
    153 }
    154 
    155 
    156 
    157 
    158 
    159 unsigned int vp8_variance4x4_mmx(
    160     unsigned char *src_ptr,
    161     int  source_stride,
    162     unsigned char *ref_ptr,
    163     int  recon_stride,
    164     unsigned int *sse)
    165 {
    166     unsigned int var;
    167     int avg;
    168 
    169     vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
    170     *sse = var;
    171     return (var - ((avg * avg) >> 4));
    172 
    173 }
    174 
    175 unsigned int vp8_variance8x8_mmx(
    176     unsigned char *src_ptr,
    177     int  source_stride,
    178     unsigned char *ref_ptr,
    179     int  recon_stride,
    180     unsigned int *sse)
    181 {
    182     unsigned int var;
    183     int avg;
    184 
    185     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
    186     *sse = var;
    187 
    188     return (var - ((avg * avg) >> 6));
    189 
    190 }
    191 
    192 unsigned int vp8_mse16x16_mmx(
    193     unsigned char *src_ptr,
    194     int  source_stride,
    195     unsigned char *ref_ptr,
    196     int  recon_stride,
    197     unsigned int *sse)
    198 {
    199     unsigned int sse0, sse1, sse2, sse3, var;
    200     int sum0, sum1, sum2, sum3;
    201 
    202 
    203     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    204     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    205     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    206     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    207 
    208     var = sse0 + sse1 + sse2 + sse3;
    209     *sse = var;
    210     return var;
    211 }
    212 
    213 
    214 unsigned int vp8_variance16x16_mmx(
    215     unsigned char *src_ptr,
    216     int  source_stride,
    217     unsigned char *ref_ptr,
    218     int  recon_stride,
    219     int *sse)
    220 {
    221     unsigned int sse0, sse1, sse2, sse3, var;
    222     int sum0, sum1, sum2, sum3, avg;
    223 
    224 
    225     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    226     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    227     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
    228     vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
    229 
    230     var = sse0 + sse1 + sse2 + sse3;
    231     avg = sum0 + sum1 + sum2 + sum3;
    232     *sse = var;
    233     return (var - ((avg * avg) >> 8));
    234 }
    235 
    236 unsigned int vp8_variance16x8_mmx(
    237     unsigned char *src_ptr,
    238     int  source_stride,
    239     unsigned char *ref_ptr,
    240     int  recon_stride,
    241     unsigned int *sse)
    242 {
    243     unsigned int sse0, sse1, var;
    244     int sum0, sum1, avg;
    245 
    246     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    247     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    248 
    249     var = sse0 + sse1;
    250     avg = sum0 + sum1;
    251     *sse = var;
    252     return (var - ((avg * avg) >> 7));
    253 
    254 }
    255 
    256 
    257 unsigned int vp8_variance8x16_mmx(
    258     unsigned char *src_ptr,
    259     int  source_stride,
    260     unsigned char *ref_ptr,
    261     int  recon_stride,
    262     unsigned int *sse)
    263 {
    264     unsigned int sse0, sse1, var;
    265     int sum0, sum1, avg;
    266 
    267     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    268     vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
    269 
    270     var = sse0 + sse1;
    271     avg = sum0 + sum1;
    272     *sse = var;
    273 
    274     return (var - ((avg * avg) >> 7));
    275 
    276 }
    277 
    278 
    279 
    280 
    281 ///////////////////////////////////////////////////////////////////////////
    282 // the mmx function that does the bilinear filtering and var calculation //
    283 // int one pass                                                          //
    284 ///////////////////////////////////////////////////////////////////////////
    285 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
    286 {
    287     { 128, 128, 128, 128,  0,  0,  0,  0 },
    288     { 112, 112, 112, 112, 16, 16, 16, 16 },
    289     {  96, 96, 96, 96, 32, 32, 32, 32 },
    290     {  80, 80, 80, 80, 48, 48, 48, 48 },
    291     {  64, 64, 64, 64, 64, 64, 64, 64 },
    292     {  48, 48, 48, 48, 80, 80, 80, 80 },
    293     {  32, 32, 32, 32, 96, 96, 96, 96 },
    294     {  16, 16, 16, 16, 112, 112, 112, 112 }
    295 };
    296 
    297 unsigned int vp8_sub_pixel_variance4x4_mmx
    298 (
    299     unsigned char  *src_ptr,
    300     int  src_pixels_per_line,
    301     int  xoffset,
    302     int  yoffset,
    303     unsigned char *dst_ptr,
    304     int dst_pixels_per_line,
    305     unsigned int *sse)
    306 
    307 {
    308     int xsum;
    309     unsigned int xxsum;
    310     vp8_filter_block2d_bil4x4_var_mmx(
    311         src_ptr, src_pixels_per_line,
    312         dst_ptr, dst_pixels_per_line,
    313         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    314         &xsum, &xxsum
    315     );
    316     *sse = xxsum;
    317     return (xxsum - ((xsum * xsum) >> 4));
    318 }
    319 
    320 
    321 unsigned int vp8_sub_pixel_variance8x8_mmx
    322 (
    323     unsigned char  *src_ptr,
    324     int  src_pixels_per_line,
    325     int  xoffset,
    326     int  yoffset,
    327     unsigned char *dst_ptr,
    328     int dst_pixels_per_line,
    329     unsigned int *sse
    330 )
    331 {
    332 
    333     int xsum;
    334     unsigned int xxsum;
    335     vp8_filter_block2d_bil_var_mmx(
    336         src_ptr, src_pixels_per_line,
    337         dst_ptr, dst_pixels_per_line, 8,
    338         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    339         &xsum, &xxsum
    340     );
    341     *sse = xxsum;
    342     return (xxsum - ((xsum * xsum) >> 6));
    343 }
    344 
    345 unsigned int vp8_sub_pixel_variance16x16_mmx
    346 (
    347     unsigned char  *src_ptr,
    348     int  src_pixels_per_line,
    349     int  xoffset,
    350     int  yoffset,
    351     unsigned char *dst_ptr,
    352     int dst_pixels_per_line,
    353     unsigned int *sse
    354 )
    355 {
    356 
    357     int xsum0, xsum1;
    358     unsigned int xxsum0, xxsum1;
    359 
    360 
    361     vp8_filter_block2d_bil_var_mmx(
    362         src_ptr, src_pixels_per_line,
    363         dst_ptr, dst_pixels_per_line, 16,
    364         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    365         &xsum0, &xxsum0
    366     );
    367 
    368 
    369     vp8_filter_block2d_bil_var_mmx(
    370         src_ptr + 8, src_pixels_per_line,
    371         dst_ptr + 8, dst_pixels_per_line, 16,
    372         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    373         &xsum1, &xxsum1
    374     );
    375 
    376     xsum0 += xsum1;
    377     xxsum0 += xxsum1;
    378 
    379     *sse = xxsum0;
    380     return (xxsum0 - ((xsum0 * xsum0) >> 8));
    381 
    382 
    383 }
    384 
    385 unsigned int vp8_sub_pixel_mse16x16_mmx(
    386     unsigned char  *src_ptr,
    387     int  src_pixels_per_line,
    388     int  xoffset,
    389     int  yoffset,
    390     unsigned char *dst_ptr,
    391     int dst_pixels_per_line,
    392     unsigned int *sse
    393 )
    394 {
    395     vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
    396     return *sse;
    397 }
    398 
    399 unsigned int vp8_sub_pixel_variance16x8_mmx
    400 (
    401     unsigned char  *src_ptr,
    402     int  src_pixels_per_line,
    403     int  xoffset,
    404     int  yoffset,
    405     unsigned char *dst_ptr,
    406     int dst_pixels_per_line,
    407     unsigned int *sse
    408 )
    409 {
    410     int xsum0, xsum1;
    411     unsigned int xxsum0, xxsum1;
    412 
    413 
    414     vp8_filter_block2d_bil_var_mmx(
    415         src_ptr, src_pixels_per_line,
    416         dst_ptr, dst_pixels_per_line, 8,
    417         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    418         &xsum0, &xxsum0
    419     );
    420 
    421 
    422     vp8_filter_block2d_bil_var_mmx(
    423         src_ptr + 8, src_pixels_per_line,
    424         dst_ptr + 8, dst_pixels_per_line, 8,
    425         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    426         &xsum1, &xxsum1
    427     );
    428 
    429     xsum0 += xsum1;
    430     xxsum0 += xxsum1;
    431 
    432     *sse = xxsum0;
    433     return (xxsum0 - ((xsum0 * xsum0) >> 7));
    434 }
    435 
    436 unsigned int vp8_sub_pixel_variance8x16_mmx
    437 (
    438     unsigned char  *src_ptr,
    439     int  src_pixels_per_line,
    440     int  xoffset,
    441     int  yoffset,
    442     unsigned char *dst_ptr,
    443     int dst_pixels_per_line,
    444     int *sse
    445 )
    446 {
    447     int xsum;
    448     unsigned int xxsum;
    449     vp8_filter_block2d_bil_var_mmx(
    450         src_ptr, src_pixels_per_line,
    451         dst_ptr, dst_pixels_per_line, 16,
    452         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    453         &xsum, &xxsum
    454     );
    455     *sse = xxsum;
    456     return (xxsum - ((xsum * xsum) >> 7));
    457 }
    458 
    459 unsigned int vp8_i_variance16x16_mmx(
    460     unsigned char *src_ptr,
    461     int  source_stride,
    462     unsigned char *ref_ptr,
    463     int  recon_stride,
    464     unsigned int *sse)
    465 {
    466     unsigned int sse0, sse1, sse2, sse3, var;
    467     int sum0, sum1, sum2, sum3, avg;
    468 
    469 
    470     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    471     vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
    472     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
    473     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
    474 
    475     var = sse0 + sse1 + sse2 + sse3;
    476     avg = sum0 + sum1 + sum2 + sum3;
    477     *sse = var;
    478     return (var - ((avg * avg) >> 8));
    479 
    480 }
    481 
    482 unsigned int vp8_i_variance8x16_mmx(
    483     unsigned char *src_ptr,
    484     int  source_stride,
    485     unsigned char *ref_ptr,
    486     int  recon_stride,
    487     unsigned int *sse)
    488 {
    489     unsigned int sse0, sse1, var;
    490     int sum0, sum1, avg;
    491     vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
    492     vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
    493 
    494     var = sse0 + sse1;
    495     avg = sum0 + sum1;
    496 
    497     *sse = var;
    498     return (var - ((avg * avg) >> 7));
    499 
    500 }
    501 
    502 unsigned int vp8_i_sub_pixel_variance16x16_mmx
    503 (
    504     unsigned char  *src_ptr,
    505     int  src_pixels_per_line,
    506     int  xoffset,
    507     int  yoffset,
    508     unsigned char *dst_ptr,
    509     int dst_pixels_per_line,
    510     unsigned int *sse
    511 )
    512 {
    513     int xsum0, xsum1;
    514     unsigned int xxsum0, xxsum1;
    515     int f2soffset = (src_pixels_per_line >> 1);
    516     int f2doffset = (dst_pixels_per_line >> 1);
    517 
    518 
    519     vp8_filter_block2d_bil_var_mmx(
    520         src_ptr, src_pixels_per_line,
    521         dst_ptr, dst_pixels_per_line, 8,
    522         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    523         &xsum0, &xxsum0
    524     );
    525 
    526 
    527     vp8_filter_block2d_bil_var_mmx(
    528         src_ptr + 8, src_pixels_per_line,
    529         dst_ptr + 8, dst_pixels_per_line, 8,
    530         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    531         &xsum1, &xxsum1
    532     );
    533 
    534     xsum0 += xsum1;
    535     xxsum0 += xxsum1;
    536 
    537     vp8_filter_block2d_bil_var_mmx(
    538         src_ptr + f2soffset, src_pixels_per_line,
    539         dst_ptr + f2doffset, dst_pixels_per_line, 8,
    540         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    541         &xsum1, &xxsum1
    542     );
    543 
    544     xsum0 += xsum1;
    545     xxsum0 += xxsum1;
    546 
    547     vp8_filter_block2d_bil_var_mmx(
    548         src_ptr + f2soffset + 8, src_pixels_per_line,
    549         dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
    550         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    551         &xsum1, &xxsum1
    552     );
    553 
    554     xsum0 += xsum1;
    555     xxsum0 += xxsum1;
    556     *sse = xxsum0;
    557     return (xxsum0 - ((xsum0 * xsum0) >> 8));
    558 }
    559 
    560 
    561 unsigned int vp8_i_sub_pixel_variance8x16_mmx
    562 (
    563     unsigned char  *src_ptr,
    564     int  src_pixels_per_line,
    565     int  xoffset,
    566     int  yoffset,
    567     unsigned char *dst_ptr,
    568     int dst_pixels_per_line,
    569     unsigned int *sse
    570 )
    571 {
    572     int xsum0, xsum1;
    573     unsigned int xxsum0, xxsum1;
    574     int f2soffset = (src_pixels_per_line >> 1);
    575     int f2doffset = (dst_pixels_per_line >> 1);
    576 
    577 
    578     vp8_filter_block2d_bil_var_mmx(
    579         src_ptr, src_pixels_per_line,
    580         dst_ptr, dst_pixels_per_line, 8,
    581         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    582         &xsum0, &xxsum0
    583     );
    584 
    585 
    586     vp8_filter_block2d_bil_var_mmx(
    587         src_ptr + f2soffset, src_pixels_per_line,
    588         dst_ptr + f2doffset, dst_pixels_per_line, 8,
    589         vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
    590         &xsum1, &xxsum1
    591     );
    592 
    593     xsum0 += xsum1;
    594     xxsum0 += xxsum1;
    595     *sse = xxsum0;
    596     return (xxsum0 - ((xsum0 * xsum0) >> 7));
    597 }
    598