Home | History | Annotate | Download | only in encoder
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 #include "vp9/encoder/vp9_variance.h"
     13 #include "vp9/common/vp9_filter.h"
     14 #include "vp9/common/vp9_subpelvar.h"
     15 #include "vpx/vpx_integer.h"
     16 #include "vpx_ports/mem.h"
     17 #include "./vp9_rtcd.h"
     18 
     19 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
     20   unsigned int i, sum = 0;
     21 
     22   for (i = 0; i < 256; i++) {
     23     sum += (src_ptr[i] * src_ptr[i]);
     24   }
     25 
     26   return sum;
     27 }
     28 
     29 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
     30                                  int  source_stride,
     31                                  const uint8_t *ref_ptr,
     32                                  int  recon_stride,
     33                                  unsigned int *sse) {
     34   unsigned int var;
     35   int avg;
     36 
     37   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
     38   *sse = var;
     39   return (var - (((int64_t)avg * avg) >> 11));
     40 }
     41 
     42 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
     43                                            int  src_pixels_per_line,
     44                                            int  xoffset,
     45                                            int  yoffset,
     46                                            const uint8_t *dst_ptr,
     47                                            int dst_pixels_per_line,
     48                                            unsigned int *sse) {
     49   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
     50   uint8_t temp2[68 * 64];
     51   const int16_t *hfilter, *vfilter;
     52 
     53   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
     54   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
     55 
     56   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
     57                                     1, 33, 64, hfilter);
     58   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
     59 
     60   return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
     61 }
     62 
     63 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
     64                                                int  src_pixels_per_line,
     65                                                int  xoffset,
     66                                                int  yoffset,
     67                                                const uint8_t *dst_ptr,
     68                                                int dst_pixels_per_line,
     69                                                unsigned int *sse,
     70                                                const uint8_t *second_pred) {
     71   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
     72   uint8_t temp2[68 * 64];
     73   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
     74   const int16_t *hfilter, *vfilter;
     75 
     76   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
     77   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
     78 
     79   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
     80                                     1, 33, 64, hfilter);
     81   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
     82   comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
     83   return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
     84 }
     85 
     86 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
     87                                  int  source_stride,
     88                                  const uint8_t *ref_ptr,
     89                                  int  recon_stride,
     90                                  unsigned int *sse) {
     91   unsigned int var;
     92   int avg;
     93 
     94   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
     95   *sse = var;
     96   return (var - (((int64_t)avg * avg) >> 11));
     97 }
     98 
     99 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
    100                                            int  src_pixels_per_line,
    101                                            int  xoffset,
    102                                            int  yoffset,
    103                                            const uint8_t *dst_ptr,
    104                                            int dst_pixels_per_line,
    105                                            unsigned int *sse) {
    106   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
    107   uint8_t temp2[68 * 64];
    108   const int16_t *hfilter, *vfilter;
    109 
    110   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    111   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    112 
    113   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    114                                     1, 65, 32, hfilter);
    115   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
    116 
    117   return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
    118 }
    119 
    120 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
    121                                                int  src_pixels_per_line,
    122                                                int  xoffset,
    123                                                int  yoffset,
    124                                                const uint8_t *dst_ptr,
    125                                                int dst_pixels_per_line,
    126                                                unsigned int *sse,
    127                                                const uint8_t *second_pred) {
    128   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
    129   uint8_t temp2[68 * 64];
    130   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
    131   const int16_t *hfilter, *vfilter;
    132 
    133   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    134   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    135 
    136   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    137                                     1, 65, 32, hfilter);
    138   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
    139   comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
    140   return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
    141 }
    142 
    143 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
    144                                  int  source_stride,
    145                                  const uint8_t *ref_ptr,
    146                                  int  recon_stride,
    147                                  unsigned int *sse) {
    148   unsigned int var;
    149   int avg;
    150 
    151   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
    152   *sse = var;
    153   return (var - (((int64_t)avg * avg) >> 9));
    154 }
    155 
    156 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
    157                                            int  src_pixels_per_line,
    158                                            int  xoffset,
    159                                            int  yoffset,
    160                                            const uint8_t *dst_ptr,
    161                                            int dst_pixels_per_line,
    162                                            unsigned int *sse) {
    163   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    164   uint8_t temp2[36 * 32];
    165   const int16_t *hfilter, *vfilter;
    166 
    167   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    168   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    169 
    170   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    171                                     1, 17, 32, hfilter);
    172   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
    173 
    174   return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
    175 }
    176 
    177 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
    178                                                int  src_pixels_per_line,
    179                                                int  xoffset,
    180                                                int  yoffset,
    181                                                const uint8_t *dst_ptr,
    182                                                int dst_pixels_per_line,
    183                                                unsigned int *sse,
    184                                                const uint8_t *second_pred) {
    185   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    186   uint8_t temp2[36 * 32];
    187   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
    188   const int16_t *hfilter, *vfilter;
    189 
    190   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    191   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    192 
    193   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    194                                     1, 17, 32, hfilter);
    195   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
    196   comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
    197   return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
    198 }
    199 
    200 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
    201                                  int  source_stride,
    202                                  const uint8_t *ref_ptr,
    203                                  int  recon_stride,
    204                                  unsigned int *sse) {
    205   unsigned int var;
    206   int avg;
    207 
    208   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
    209   *sse = var;
    210   return (var - (((int64_t)avg * avg) >> 9));
    211 }
    212 
    213 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
    214                                            int  src_pixels_per_line,
    215                                            int  xoffset,
    216                                            int  yoffset,
    217                                            const uint8_t *dst_ptr,
    218                                            int dst_pixels_per_line,
    219                                            unsigned int *sse) {
    220   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    221   uint8_t temp2[36 * 32];
    222   const int16_t *hfilter, *vfilter;
    223 
    224   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    225   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    226 
    227   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    228                                     1, 33, 16, hfilter);
    229   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
    230 
    231   return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
    232 }
    233 
    234 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
    235                                                int  src_pixels_per_line,
    236                                                int  xoffset,
    237                                                int  yoffset,
    238                                                const uint8_t *dst_ptr,
    239                                                int dst_pixels_per_line,
    240                                                unsigned int *sse,
    241                                                const uint8_t *second_pred) {
    242   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    243   uint8_t temp2[36 * 32];
    244   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
    245   const int16_t *hfilter, *vfilter;
    246 
    247   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    248   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    249 
    250   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    251                                     1, 33, 16, hfilter);
    252   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
    253   comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
    254   return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
    255 }
    256 
    257 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
    258                                  int  source_stride,
    259                                  const uint8_t *ref_ptr,
    260                                  int  recon_stride,
    261                                  unsigned int *sse) {
    262   unsigned int var;
    263   int avg;
    264 
    265   variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
    266   *sse = var;
    267   return (var - (((int64_t)avg * avg) >> 12));
    268 }
    269 
    270 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
    271                                  int  source_stride,
    272                                  const uint8_t *ref_ptr,
    273                                  int  recon_stride,
    274                                  unsigned int *sse) {
    275   unsigned int var;
    276   int avg;
    277 
    278   variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
    279   *sse = var;
    280   return (var - (((int64_t)avg * avg) >> 10));
    281 }
    282 
    283 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
    284                                  int  source_stride,
    285                                  const uint8_t *ref_ptr,
    286                                  int  recon_stride,
    287                                  unsigned int *sse) {
    288   unsigned int var;
    289   int avg;
    290 
    291   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    292   *sse = var;
    293   return (var - (((unsigned int)avg * avg) >> 8));
    294 }
    295 
    296 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
    297                                 int  source_stride,
    298                                 const uint8_t *ref_ptr,
    299                                 int  recon_stride,
    300                                 unsigned int *sse) {
    301   unsigned int var;
    302   int avg;
    303 
    304   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
    305   *sse = var;
    306   return (var - (((unsigned int)avg * avg) >> 7));
    307 }
    308 
    309 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
    310                                 int  source_stride,
    311                                 const uint8_t *ref_ptr,
    312                                 int  recon_stride,
    313                                 unsigned int *sse) {
    314   unsigned int var;
    315   int avg;
    316 
    317   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
    318   *sse = var;
    319   return (var - (((unsigned int)avg * avg) >> 7));
    320 }
    321 
    322 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
    323                        const uint8_t *ref_ptr, int ref_stride,
    324                        unsigned int *sse, int *sum) {
    325   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
    326 }
    327 
    328 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
    329                                int  source_stride,
    330                                const uint8_t *ref_ptr,
    331                                int  recon_stride,
    332                                unsigned int *sse) {
    333   unsigned int var;
    334   int avg;
    335 
    336   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
    337   *sse = var;
    338   return (var - (((unsigned int)avg * avg) >> 6));
    339 }
    340 
    341 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
    342                                int  source_stride,
    343                                const uint8_t *ref_ptr,
    344                                int  recon_stride,
    345                                unsigned int *sse) {
    346   unsigned int var;
    347   int avg;
    348 
    349   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
    350   *sse = var;
    351   return (var - (((unsigned int)avg * avg) >> 5));
    352 }
    353 
    354 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
    355                                int  source_stride,
    356                                const uint8_t *ref_ptr,
    357                                int  recon_stride,
    358                                unsigned int *sse) {
    359   unsigned int var;
    360   int avg;
    361 
    362   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
    363   *sse = var;
    364   return (var - (((unsigned int)avg * avg) >> 5));
    365 }
    366 
    367 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
    368                                int  source_stride,
    369                                const uint8_t *ref_ptr,
    370                                int  recon_stride,
    371                                unsigned int *sse) {
    372   unsigned int var;
    373   int avg;
    374 
    375   variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
    376   *sse = var;
    377   return (var - (((unsigned int)avg * avg) >> 4));
    378 }
    379 
    380 
    381 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
    382                             int  source_stride,
    383                             const uint8_t *ref_ptr,
    384                             int  recon_stride,
    385                             unsigned int *sse) {
    386   unsigned int var;
    387   int avg;
    388 
    389   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
    390   *sse = var;
    391   return var;
    392 }
    393 
    394 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr,
    395                            int  source_stride,
    396                            const uint8_t *ref_ptr,
    397                            int  recon_stride,
    398                            unsigned int *sse) {
    399   unsigned int var;
    400   int avg;
    401 
    402   variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
    403   *sse = var;
    404   return var;
    405 }
    406 
    407 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr,
    408                            int  source_stride,
    409                            const uint8_t *ref_ptr,
    410                            int  recon_stride,
    411                            unsigned int *sse) {
    412   unsigned int var;
    413   int avg;
    414 
    415   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
    416   *sse = var;
    417   return var;
    418 }
    419 
    420 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
    421                           int  source_stride,
    422                           const uint8_t *ref_ptr,
    423                           int  recon_stride,
    424                           unsigned int *sse) {
    425   unsigned int var;
    426   int avg;
    427 
    428   variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
    429   *sse = var;
    430   return var;
    431 }
    432 
    433 
    434 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
    435                                          int  src_pixels_per_line,
    436                                          int  xoffset,
    437                                          int  yoffset,
    438                                          const uint8_t *dst_ptr,
    439                                          int dst_pixels_per_line,
    440                                          unsigned int *sse) {
    441   uint8_t temp2[20 * 16];
    442   const int16_t *hfilter, *vfilter;
    443   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
    444 
    445   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    446   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    447 
    448   // First filter 1d Horizontal
    449   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    450                                     1, 5, 4, hfilter);
    451 
    452   // Now filter Verticaly
    453   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
    454 
    455   return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
    456 }
    457 
    458 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
    459                                              int  src_pixels_per_line,
    460                                              int  xoffset,
    461                                              int  yoffset,
    462                                              const uint8_t *dst_ptr,
    463                                              int dst_pixels_per_line,
    464                                              unsigned int *sse,
    465                                              const uint8_t *second_pred) {
    466   uint8_t temp2[20 * 16];
    467   const int16_t *hfilter, *vfilter;
    468   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
    469   uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
    470 
    471   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    472   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    473 
    474   // First filter 1d Horizontal
    475   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    476                                     1, 5, 4, hfilter);
    477 
    478   // Now filter Verticaly
    479   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
    480   comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
    481   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
    482 }
    483 
    484 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
    485                                          int  src_pixels_per_line,
    486                                          int  xoffset,
    487                                          int  yoffset,
    488                                          const uint8_t *dst_ptr,
    489                                          int dst_pixels_per_line,
    490                                          unsigned int *sse) {
    491   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
    492   uint8_t temp2[20 * 16];
    493   const int16_t *hfilter, *vfilter;
    494 
    495   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    496   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    497 
    498   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    499                                     1, 9, 8, hfilter);
    500   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
    501 
    502   return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
    503 }
    504 
    505 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
    506                                              int  src_pixels_per_line,
    507                                              int  xoffset,
    508                                              int  yoffset,
    509                                              const uint8_t *dst_ptr,
    510                                              int dst_pixels_per_line,
    511                                              unsigned int *sse,
    512                                              const uint8_t *second_pred) {
    513   uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
    514   uint8_t temp2[20 * 16];
    515   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
    516   const int16_t *hfilter, *vfilter;
    517 
    518   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    519   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    520 
    521   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    522                                     1, 9, 8, hfilter);
    523   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
    524   comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
    525   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
    526 }
    527 
    528 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
    529                                            int  src_pixels_per_line,
    530                                            int  xoffset,
    531                                            int  yoffset,
    532                                            const uint8_t *dst_ptr,
    533                                            int dst_pixels_per_line,
    534                                            unsigned int *sse) {
    535   uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
    536   uint8_t temp2[20 * 16];
    537   const int16_t *hfilter, *vfilter;
    538 
    539   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    540   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    541 
    542   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    543                                     1, 17, 16, hfilter);
    544   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
    545 
    546   return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
    547 }
    548 
    549 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
    550                                                int  src_pixels_per_line,
    551                                                int  xoffset,
    552                                                int  yoffset,
    553                                                const uint8_t *dst_ptr,
    554                                                int dst_pixels_per_line,
    555                                                unsigned int *sse,
    556                                                const uint8_t *second_pred) {
    557   uint16_t fdata3[17 * 16];
    558   uint8_t temp2[20 * 16];
    559   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
    560   const int16_t *hfilter, *vfilter;
    561 
    562   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    563   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    564 
    565   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    566                                     1, 17, 16, hfilter);
    567   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
    568 
    569   comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
    570   return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
    571 }
    572 
    573 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
    574                                            int  src_pixels_per_line,
    575                                            int  xoffset,
    576                                            int  yoffset,
    577                                            const uint8_t *dst_ptr,
    578                                            int dst_pixels_per_line,
    579                                            unsigned int *sse) {
    580   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
    581   uint8_t temp2[68 * 64];
    582   const int16_t *hfilter, *vfilter;
    583 
    584   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    585   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    586 
    587   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    588                                     1, 65, 64, hfilter);
    589   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
    590 
    591   return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
    592 }
    593 
    594 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
    595                                                int  src_pixels_per_line,
    596                                                int  xoffset,
    597                                                int  yoffset,
    598                                                const uint8_t *dst_ptr,
    599                                                int dst_pixels_per_line,
    600                                                unsigned int *sse,
    601                                                const uint8_t *second_pred) {
    602   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
    603   uint8_t temp2[68 * 64];
    604   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
    605   const int16_t *hfilter, *vfilter;
    606 
    607   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    608   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    609 
    610   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    611                                     1, 65, 64, hfilter);
    612   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
    613   comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
    614   return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
    615 }
    616 
    617 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
    618                                            int  src_pixels_per_line,
    619                                            int  xoffset,
    620                                            int  yoffset,
    621                                            const uint8_t *dst_ptr,
    622                                            int dst_pixels_per_line,
    623                                            unsigned int *sse) {
    624   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    625   uint8_t temp2[36 * 32];
    626   const int16_t *hfilter, *vfilter;
    627 
    628   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    629   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    630 
    631   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    632                                     1, 33, 32, hfilter);
    633   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
    634 
    635   return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
    636 }
    637 
    638 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
    639                                                int  src_pixels_per_line,
    640                                                int  xoffset,
    641                                                int  yoffset,
    642                                                const uint8_t *dst_ptr,
    643                                                int dst_pixels_per_line,
    644                                                unsigned int *sse,
    645                                                const uint8_t *second_pred) {
    646   uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
    647   uint8_t temp2[36 * 32];
    648   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
    649   const int16_t *hfilter, *vfilter;
    650 
    651   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    652   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    653 
    654   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    655                                     1, 33, 32, hfilter);
    656   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
    657   comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
    658   return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
    659 }
    660 
    661 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
    662                                               int  source_stride,
    663                                               const uint8_t *ref_ptr,
    664                                               int  recon_stride,
    665                                               unsigned int *sse) {
    666   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
    667                                        ref_ptr, recon_stride, sse);
    668 }
    669 
    670 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
    671                                               int  source_stride,
    672                                               const uint8_t *ref_ptr,
    673                                               int  recon_stride,
    674                                               unsigned int *sse) {
    675   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
    676                                        ref_ptr, recon_stride, sse);
    677 }
    678 
    679 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
    680                                               int  source_stride,
    681                                               const uint8_t *ref_ptr,
    682                                               int  recon_stride,
    683                                               unsigned int *sse) {
    684   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
    685                                        ref_ptr, recon_stride, sse);
    686 }
    687 
    688 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
    689                                               int  source_stride,
    690                                               const uint8_t *ref_ptr,
    691                                               int  recon_stride,
    692                                               unsigned int *sse) {
    693   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
    694                                        ref_ptr, recon_stride, sse);
    695 }
    696 
    697 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
    698                                               int  source_stride,
    699                                               const uint8_t *ref_ptr,
    700                                               int  recon_stride,
    701                                               unsigned int *sse) {
    702   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
    703                                        ref_ptr, recon_stride, sse);
    704 }
    705 
    706 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
    707                                               int  source_stride,
    708                                               const uint8_t *ref_ptr,
    709                                               int  recon_stride,
    710                                               unsigned int *sse) {
    711   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
    712                                        ref_ptr, recon_stride, sse);
    713 }
    714 
    715 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
    716                                                int  source_stride,
    717                                                const uint8_t *ref_ptr,
    718                                                int  recon_stride,
    719                                                unsigned int *sse) {
    720   return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
    721                                        ref_ptr, recon_stride, sse);
    722 }
    723 
    724 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
    725                                                int  source_stride,
    726                                                const uint8_t *ref_ptr,
    727                                                int  recon_stride,
    728                                                unsigned int *sse) {
    729   return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
    730                                        ref_ptr, recon_stride, sse);
    731 }
    732 
    733 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
    734                                                int  source_stride,
    735                                                const uint8_t *ref_ptr,
    736                                                int  recon_stride,
    737                                                unsigned int *sse) {
    738   return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
    739                                        ref_ptr, recon_stride, sse);
    740 }
    741 
    742 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
    743                                       int  src_pixels_per_line,
    744                                       int  xoffset,
    745                                       int  yoffset,
    746                                       const uint8_t *dst_ptr,
    747                                       int dst_pixels_per_line,
    748                                       unsigned int *sse) {
    749   vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
    750                                 xoffset, yoffset, dst_ptr,
    751                                 dst_pixels_per_line, sse);
    752   return *sse;
    753 }
    754 
    755 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
    756                                       int  src_pixels_per_line,
    757                                       int  xoffset,
    758                                       int  yoffset,
    759                                       const uint8_t *dst_ptr,
    760                                       int dst_pixels_per_line,
    761                                       unsigned int *sse) {
    762   vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
    763                                 xoffset, yoffset, dst_ptr,
    764                                 dst_pixels_per_line, sse);
    765   return *sse;
    766 }
    767 
    768 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
    769                                       int  src_pixels_per_line,
    770                                       int  xoffset,
    771                                       int  yoffset,
    772                                       const uint8_t *dst_ptr,
    773                                       int dst_pixels_per_line,
    774                                       unsigned int *sse) {
    775   vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
    776                                 xoffset, yoffset, dst_ptr,
    777                                 dst_pixels_per_line, sse);
    778   return *sse;
    779 }
    780 
    781 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
    782                                           int  src_pixels_per_line,
    783                                           int  xoffset,
    784                                           int  yoffset,
    785                                           const uint8_t *dst_ptr,
    786                                           int dst_pixels_per_line,
    787                                           unsigned int *sse) {
    788   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
    789   uint8_t temp2[20 * 16];
    790   const int16_t *hfilter, *vfilter;
    791 
    792   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    793   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    794 
    795   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    796                                     1, 9, 16, hfilter);
    797   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
    798 
    799   return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
    800 }
    801 
    802 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
    803                                               int  src_pixels_per_line,
    804                                               int  xoffset,
    805                                               int  yoffset,
    806                                               const uint8_t *dst_ptr,
    807                                               int dst_pixels_per_line,
    808                                               unsigned int *sse,
    809                                               const uint8_t *second_pred) {
    810   uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
    811   uint8_t temp2[20 * 16];
    812   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
    813   const int16_t *hfilter, *vfilter;
    814 
    815   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    816   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    817 
    818   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    819                                     1, 9, 16, hfilter);
    820   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
    821   comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
    822   return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
    823 }
    824 
    825 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
    826                                           int  src_pixels_per_line,
    827                                           int  xoffset,
    828                                           int  yoffset,
    829                                           const uint8_t *dst_ptr,
    830                                           int dst_pixels_per_line,
    831                                           unsigned int *sse) {
    832   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
    833   uint8_t temp2[20 * 16];
    834   const int16_t *hfilter, *vfilter;
    835 
    836   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    837   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    838 
    839   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    840                                     1, 17, 8, hfilter);
    841   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
    842 
    843   return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
    844 }
    845 
    846 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
    847                                               int  src_pixels_per_line,
    848                                               int  xoffset,
    849                                               int  yoffset,
    850                                               const uint8_t *dst_ptr,
    851                                               int dst_pixels_per_line,
    852                                               unsigned int *sse,
    853                                               const uint8_t *second_pred) {
    854   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
    855   uint8_t temp2[20 * 16];
    856   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
    857   const int16_t *hfilter, *vfilter;
    858 
    859   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    860   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    861 
    862   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    863                                     1, 17, 8, hfilter);
    864   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
    865   comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
    866   return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
    867 }
    868 
    869 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
    870                                          int  src_pixels_per_line,
    871                                          int  xoffset,
    872                                          int  yoffset,
    873                                          const uint8_t *dst_ptr,
    874                                          int dst_pixels_per_line,
    875                                          unsigned int *sse) {
    876   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
    877   uint8_t temp2[20 * 16];
    878   const int16_t *hfilter, *vfilter;
    879 
    880   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    881   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    882 
    883   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    884                                     1, 5, 8, hfilter);
    885   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
    886 
    887   return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
    888 }
    889 
    890 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
    891                                              int  src_pixels_per_line,
    892                                              int  xoffset,
    893                                              int  yoffset,
    894                                              const uint8_t *dst_ptr,
    895                                              int dst_pixels_per_line,
    896                                              unsigned int *sse,
    897                                              const uint8_t *second_pred) {
    898   uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
    899   uint8_t temp2[20 * 16];
    900   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
    901   const int16_t *hfilter, *vfilter;
    902 
    903   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    904   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    905 
    906   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    907                                     1, 5, 8, hfilter);
    908   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
    909   comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
    910   return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
    911 }
    912 
    913 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
    914                                          int  src_pixels_per_line,
    915                                          int  xoffset,
    916                                          int  yoffset,
    917                                          const uint8_t *dst_ptr,
    918                                          int dst_pixels_per_line,
    919                                          unsigned int *sse) {
    920   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
    921   // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
    922   // of this big? same issue appears in all other block size settings.
    923   uint8_t temp2[20 * 16];
    924   const int16_t *hfilter, *vfilter;
    925 
    926   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    927   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    928 
    929   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    930                                     1, 9, 4, hfilter);
    931   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
    932 
    933   return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
    934 }
    935 
    936 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
    937                                              int  src_pixels_per_line,
    938                                              int  xoffset,
    939                                              int  yoffset,
    940                                              const uint8_t *dst_ptr,
    941                                              int dst_pixels_per_line,
    942                                              unsigned int *sse,
    943                                              const uint8_t *second_pred) {
    944   uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
    945   uint8_t temp2[20 * 16];
    946   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
    947   const int16_t *hfilter, *vfilter;
    948 
    949   hfilter = BILINEAR_FILTERS_2TAP(xoffset);
    950   vfilter = BILINEAR_FILTERS_2TAP(yoffset);
    951 
    952   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
    953                                     1, 9, 4, hfilter);
    954   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
    955   comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
    956   return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
    957 }
    958