Home | History | Annotate | Download | only in test
      1 /*
      2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <string.h>
     13 
     14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
     15 
     16 #include "config/aom_config.h"
     17 #include "config/aom_dsp_rtcd.h"
     18 
     19 #include "aom_dsp/aom_dsp_common.h"
     20 #include "aom_dsp/aom_filter.h"
     21 #include "aom_mem/aom_mem.h"
     22 #include "aom_ports/aom_timer.h"
     23 #include "aom_ports/mem.h"
     24 #include "av1/common/filter.h"
     25 #include "test/acm_random.h"
     26 #include "test/clear_system_state.h"
     27 #include "test/register_state_check.h"
     28 #include "test/util.h"
     29 
     30 namespace {
     31 
     32 static const unsigned int kMaxDimension = MAX_SB_SIZE;
     33 
     34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
     35                              uint8_t *dst, ptrdiff_t dst_stride,
     36                              const int16_t *filter_x, int filter_x_stride,
     37                              const int16_t *filter_y, int filter_y_stride,
     38                              int w, int h);
     39 
     40 struct ConvolveFunctions {
     41   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc h8, ConvolveFunc v8, int bd)
     42       : copy_(copy), h8_(h8), v8_(v8), use_highbd_(bd) {}
     43 
     44   ConvolveFunc copy_;
     45   ConvolveFunc h8_;
     46   ConvolveFunc v8_;
     47   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
     48 };
     49 
     50 typedef ::testing::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
     51 
     52 #define ALL_SIZES_64(convolve_fn)                                         \
     53   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
     54       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
     55       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
     56       make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
     57       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
     58       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
     59       make_tuple(64, 64, &convolve_fn)
     60 
     61 #define ALL_SIZES(convolve_fn)                                          \
     62   make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
     63       make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
     64 
     65 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
     66 #define AV1_FILTER_WEIGHT 128
     67 #define AV1_FILTER_SHIFT 7
     68 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
     69 
     70 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
     71                         const int16_t *HFilter, const int16_t *VFilter,
     72                         uint8_t *dst_ptr, unsigned int dst_stride,
     73                         unsigned int output_width, unsigned int output_height) {
     74   // Between passes, we use an intermediate buffer whose height is extended to
     75   // have enough horizontally filtered values as input for the vertical pass.
     76   // This buffer is allocated to be big enough for the largest block type we
     77   // support.
     78   const int kInterp_Extend = 4;
     79   const unsigned int intermediate_height =
     80       (kInterp_Extend - 1) + output_height + kInterp_Extend;
     81   unsigned int i, j;
     82 
     83   assert(intermediate_height > 7);
     84 
     85   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
     86   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
     87   //                                 + kInterp_Extend
     88   //                               = 3 + 16 + 4
     89   //                               = 23
     90   // and filter_max_width          = 16
     91   //
     92   uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
     93   const int intermediate_next_stride =
     94       1 - static_cast<int>(intermediate_height * output_width);
     95 
     96   // Horizontal pass (src -> transposed intermediate).
     97   uint8_t *output_ptr = intermediate_buffer;
     98   const int src_next_row_stride = src_stride - output_width;
     99   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    100   for (i = 0; i < intermediate_height; ++i) {
    101     for (j = 0; j < output_width; ++j) {
    102       // Apply filter...
    103       const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
    104                        (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
    105                        (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
    106                        (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
    107                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
    108 
    109       // Normalize back to 0-255...
    110       *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
    111       ++src_ptr;
    112       output_ptr += intermediate_height;
    113     }
    114     src_ptr += src_next_row_stride;
    115     output_ptr += intermediate_next_stride;
    116   }
    117 
    118   // Vertical pass (transposed intermediate -> dst).
    119   src_ptr = intermediate_buffer;
    120   const int dst_next_row_stride = dst_stride - output_width;
    121   for (i = 0; i < output_height; ++i) {
    122     for (j = 0; j < output_width; ++j) {
    123       // Apply filter...
    124       const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
    125                        (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
    126                        (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
    127                        (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
    128                        (AV1_FILTER_WEIGHT >> 1);  // Rounding
    129 
    130       // Normalize back to 0-255...
    131       *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
    132       src_ptr += intermediate_height;
    133     }
    134     src_ptr += intermediate_next_stride;
    135     dst_ptr += dst_next_row_stride;
    136   }
    137 }
    138 
    139 void block2d_average_c(uint8_t *src, unsigned int src_stride,
    140                        uint8_t *output_ptr, unsigned int output_stride,
    141                        unsigned int output_width, unsigned int output_height) {
    142   unsigned int i, j;
    143   for (i = 0; i < output_height; ++i) {
    144     for (j = 0; j < output_width; ++j) {
    145       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    146     }
    147     output_ptr += output_stride;
    148   }
    149 }
    150 
    151 void filter_average_block2d_8_c(const uint8_t *src_ptr,
    152                                 const unsigned int src_stride,
    153                                 const int16_t *HFilter, const int16_t *VFilter,
    154                                 uint8_t *dst_ptr, unsigned int dst_stride,
    155                                 unsigned int output_width,
    156                                 unsigned int output_height) {
    157   uint8_t tmp[kMaxDimension * kMaxDimension];
    158 
    159   assert(output_width <= kMaxDimension);
    160   assert(output_height <= kMaxDimension);
    161   filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
    162                      output_width, output_height);
    163   block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
    164                     output_height);
    165 }
    166 
    167 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
    168                                const unsigned int src_stride,
    169                                const int16_t *HFilter, const int16_t *VFilter,
    170                                uint16_t *dst_ptr, unsigned int dst_stride,
    171                                unsigned int output_width,
    172                                unsigned int output_height, int bd) {
    173   // Between passes, we use an intermediate buffer whose height is extended to
    174   // have enough horizontally filtered values as input for the vertical pass.
    175   // This buffer is allocated to be big enough for the largest block type we
    176   // support.
    177   const int kInterp_Extend = 4;
    178   const unsigned int intermediate_height =
    179       (kInterp_Extend - 1) + output_height + kInterp_Extend;
    180 
    181   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
    182    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
    183    *                                 + kInterp_Extend
    184    *                               = 3 + 16 + 4
    185    *                               = 23
    186    * and filter_max_width = 16
    187    */
    188   uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
    189   const int intermediate_next_stride =
    190       1 - static_cast<int>(intermediate_height * output_width);
    191 
    192   // Horizontal pass (src -> transposed intermediate).
    193   {
    194     uint16_t *output_ptr = intermediate_buffer;
    195     const int src_next_row_stride = src_stride - output_width;
    196     unsigned int i, j;
    197     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    198     for (i = 0; i < intermediate_height; ++i) {
    199       for (j = 0; j < output_width; ++j) {
    200         // Apply filter...
    201         const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
    202                          (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
    203                          (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
    204                          (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
    205                          (AV1_FILTER_WEIGHT >> 1);  // Rounding
    206 
    207         // Normalize back to 0-255...
    208         *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
    209         ++src_ptr;
    210         output_ptr += intermediate_height;
    211       }
    212       src_ptr += src_next_row_stride;
    213       output_ptr += intermediate_next_stride;
    214     }
    215   }
    216 
    217   // Vertical pass (transposed intermediate -> dst).
    218   {
    219     const uint16_t *interm_ptr = intermediate_buffer;
    220     const int dst_next_row_stride = dst_stride - output_width;
    221     unsigned int i, j;
    222     for (i = 0; i < output_height; ++i) {
    223       for (j = 0; j < output_width; ++j) {
    224         // Apply filter...
    225         const int temp =
    226             (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
    227             (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
    228             (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
    229             (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
    230             (AV1_FILTER_WEIGHT >> 1);  // Rounding
    231 
    232         // Normalize back to 0-255...
    233         *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
    234         interm_ptr += intermediate_height;
    235       }
    236       interm_ptr += intermediate_next_stride;
    237       dst_ptr += dst_next_row_stride;
    238     }
    239   }
    240 }
    241 
    242 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
    243                               uint16_t *output_ptr, unsigned int output_stride,
    244                               unsigned int output_width,
    245                               unsigned int output_height) {
    246   unsigned int i, j;
    247   for (i = 0; i < output_height; ++i) {
    248     for (j = 0; j < output_width; ++j) {
    249       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    250     }
    251     output_ptr += output_stride;
    252   }
    253 }
    254 
    255 void highbd_filter_average_block2d_8_c(
    256     const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    257     const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
    258     unsigned int output_width, unsigned int output_height, int bd) {
    259   uint16_t tmp[kMaxDimension * kMaxDimension];
    260 
    261   assert(output_width <= kMaxDimension);
    262   assert(output_height <= kMaxDimension);
    263   highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
    264                             kMaxDimension, output_width, output_height, bd);
    265   highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
    266                            output_width, output_height);
    267 }
    268 
    269 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
    270  public:
    271   static void SetUpTestCase() {
    272     // Force input_ to be unaligned, output to be 16 byte aligned.
    273     input_ = reinterpret_cast<uint8_t *>(
    274                  aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
    275              1;
    276     ref8_ = reinterpret_cast<uint8_t *>(
    277         aom_memalign(kDataAlignment, kOutputStride * kMaxDimension));
    278     output_ = reinterpret_cast<uint8_t *>(
    279         aom_memalign(kDataAlignment, kOutputBufferSize));
    280     output_ref_ = reinterpret_cast<uint8_t *>(
    281         aom_memalign(kDataAlignment, kOutputBufferSize));
    282     input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
    283                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
    284                1;
    285     ref16_ = reinterpret_cast<uint16_t *>(aom_memalign(
    286         kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t)));
    287     output16_ = reinterpret_cast<uint16_t *>(
    288         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
    289     output16_ref_ = reinterpret_cast<uint16_t *>(
    290         aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
    291   }
    292 
    293   virtual void TearDown() { libaom_test::ClearSystemState(); }
    294 
    295   static void TearDownTestCase() {
    296     aom_free(input_ - 1);
    297     input_ = NULL;
    298     aom_free(ref8_);
    299     ref8_ = NULL;
    300     aom_free(output_);
    301     output_ = NULL;
    302     aom_free(output_ref_);
    303     output_ref_ = NULL;
    304     aom_free(input16_ - 1);
    305     input16_ = NULL;
    306     aom_free(ref16_);
    307     ref16_ = NULL;
    308     aom_free(output16_);
    309     output16_ = NULL;
    310     aom_free(output16_ref_);
    311     output16_ref_ = NULL;
    312   }
    313 
    314  protected:
    315   static const int kDataAlignment = 16;
    316   static const int kOuterBlockSize = 4 * kMaxDimension;
    317   static const int kInputStride = kOuterBlockSize;
    318   static const int kOutputStride = kOuterBlockSize;
    319   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
    320   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
    321 
    322   int Width() const { return GET_PARAM(0); }
    323   int Height() const { return GET_PARAM(1); }
    324   int BorderLeft() const {
    325     const int center = (kOuterBlockSize - Width()) / 2;
    326     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
    327   }
    328   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
    329 
    330   bool IsIndexInBorder(int i) {
    331     return (i < BorderTop() * kOuterBlockSize ||
    332             i >= (BorderTop() + Height()) * kOuterBlockSize ||
    333             i % kOuterBlockSize < BorderLeft() ||
    334             i % kOuterBlockSize >= (BorderLeft() + Width()));
    335   }
    336 
    337   virtual void SetUp() {
    338     UUT_ = GET_PARAM(2);
    339     if (UUT_->use_highbd_ != 0)
    340       mask_ = (1 << UUT_->use_highbd_) - 1;
    341     else
    342       mask_ = 255;
    343     /* Set up guard blocks for an inner block centered in the outer block */
    344     for (int i = 0; i < kOutputBufferSize; ++i) {
    345       if (IsIndexInBorder(i)) {
    346         output_[i] = 255;
    347         output16_[i] = mask_;
    348       } else {
    349         output_[i] = 0;
    350         output16_[i] = 0;
    351       }
    352     }
    353 
    354     ::libaom_test::ACMRandom prng;
    355     for (int i = 0; i < kInputBufferSize; ++i) {
    356       if (i & 1) {
    357         input_[i] = 255;
    358         input16_[i] = mask_;
    359       } else {
    360         input_[i] = prng.Rand8Extremes();
    361         input16_[i] = prng.Rand16() & mask_;
    362       }
    363     }
    364   }
    365 
    366   void SetConstantInput(int value) {
    367     memset(input_, value, kInputBufferSize);
    368     aom_memset16(input16_, value, kInputBufferSize);
    369   }
    370 
    371   void CopyOutputToRef() {
    372     memcpy(output_ref_, output_, kOutputBufferSize);
    373     // Copy 16-bit pixels values. The effective number of bytes is double.
    374     memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
    375   }
    376 
    377   void CheckGuardBlocks() {
    378     for (int i = 0; i < kOutputBufferSize; ++i) {
    379       if (IsIndexInBorder(i)) {
    380         EXPECT_EQ(255, output_[i]);
    381       }
    382     }
    383   }
    384 
    385   uint8_t *input() const {
    386     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    387     if (UUT_->use_highbd_ == 0) {
    388       return input_ + offset;
    389     } else {
    390       return CONVERT_TO_BYTEPTR(input16_) + offset;
    391     }
    392   }
    393 
    394   uint8_t *output() const {
    395     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    396     if (UUT_->use_highbd_ == 0) {
    397       return output_ + offset;
    398     } else {
    399       return CONVERT_TO_BYTEPTR(output16_) + offset;
    400     }
    401   }
    402 
    403   uint8_t *output_ref() const {
    404     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    405     if (UUT_->use_highbd_ == 0) {
    406       return output_ref_ + offset;
    407     } else {
    408       return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
    409     }
    410   }
    411 
    412   uint16_t lookup(uint8_t *list, int index) const {
    413     if (UUT_->use_highbd_ == 0) {
    414       return list[index];
    415     } else {
    416       return CONVERT_TO_SHORTPTR(list)[index];
    417     }
    418   }
    419 
    420   void assign_val(uint8_t *list, int index, uint16_t val) const {
    421     if (UUT_->use_highbd_ == 0) {
    422       list[index] = (uint8_t)val;
    423     } else {
    424       CONVERT_TO_SHORTPTR(list)[index] = val;
    425     }
    426   }
    427 
    428   void wrapper_filter_average_block2d_8_c(
    429       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    430       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
    431       unsigned int output_width, unsigned int output_height) {
    432     if (UUT_->use_highbd_ == 0) {
    433       filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
    434                                  dst_stride, output_width, output_height);
    435     } else {
    436       highbd_filter_average_block2d_8_c(
    437           CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
    438           CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
    439           UUT_->use_highbd_);
    440     }
    441   }
    442 
    443   void wrapper_filter_block2d_8_c(
    444       const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    445       const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
    446       unsigned int output_width, unsigned int output_height) {
    447     if (UUT_->use_highbd_ == 0) {
    448       filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
    449                          dst_stride, output_width, output_height);
    450     } else {
    451       highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
    452                                 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
    453                                 dst_stride, output_width, output_height,
    454                                 UUT_->use_highbd_);
    455     }
    456   }
    457 
    458   const ConvolveFunctions *UUT_;
    459   static uint8_t *input_;
    460   static uint8_t *ref8_;
    461   static uint8_t *output_;
    462   static uint8_t *output_ref_;
    463   static uint16_t *input16_;
    464   static uint16_t *ref16_;
    465   static uint16_t *output16_;
    466   static uint16_t *output16_ref_;
    467   int mask_;
    468 };
    469 
    470 uint8_t *ConvolveTest::input_ = NULL;
    471 uint8_t *ConvolveTest::ref8_ = NULL;
    472 uint8_t *ConvolveTest::output_ = NULL;
    473 uint8_t *ConvolveTest::output_ref_ = NULL;
    474 uint16_t *ConvolveTest::input16_ = NULL;
    475 uint16_t *ConvolveTest::ref16_ = NULL;
    476 uint16_t *ConvolveTest::output16_ = NULL;
    477 uint16_t *ConvolveTest::output16_ref_ = NULL;
    478 
    479 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
    480 
    481 TEST_P(ConvolveTest, Copy) {
    482   uint8_t *const in = input();
    483   uint8_t *const out = output();
    484 
    485   ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
    486                                        NULL, 0, NULL, 0, Width(), Height()));
    487 
    488   CheckGuardBlocks();
    489 
    490   for (int y = 0; y < Height(); ++y)
    491     for (int x = 0; x < Width(); ++x)
    492       ASSERT_EQ(lookup(out, y * kOutputStride + x),
    493                 lookup(in, y * kInputStride + x))
    494           << "(" << x << "," << y << ")";
    495 }
    496 
    497 const int kNumFilterBanks = SWITCHABLE_FILTERS;
    498 const int kNumFilters = 16;
    499 
    500 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
    501   int subpel_search;
    502   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
    503        ++subpel_search) {
    504     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    505       const InterpFilter filter = (InterpFilter)filter_bank;
    506       const InterpKernel *filters =
    507           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    508                                                              subpel_search);
    509       for (int i = 0; i < kNumFilters; i++) {
    510         const int p0 = filters[i][0] + filters[i][1];
    511         const int p1 = filters[i][2] + filters[i][3];
    512         const int p2 = filters[i][4] + filters[i][5];
    513         const int p3 = filters[i][6] + filters[i][7];
    514         EXPECT_LE(p0, 128);
    515         EXPECT_LE(p1, 128);
    516         EXPECT_LE(p2, 128);
    517         EXPECT_LE(p3, 128);
    518         EXPECT_LE(p0 + p3, 128);
    519         EXPECT_LE(p0 + p3 + p1, 128);
    520         EXPECT_LE(p0 + p3 + p1 + p2, 128);
    521         EXPECT_EQ(p0 + p1 + p2 + p3, 128);
    522       }
    523     }
    524   }
    525 }
    526 
    527 const int16_t kInvalidFilter[8] = { 0 };
    528 
    529 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
    530   uint8_t *const in = input();
    531   uint8_t *const out = output();
    532   uint8_t *ref;
    533   if (UUT_->use_highbd_ == 0) {
    534     ref = ref8_;
    535   } else {
    536     ref = CONVERT_TO_BYTEPTR(ref16_);
    537   }
    538   int subpel_search;
    539   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
    540        ++subpel_search) {
    541     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    542       const InterpFilter filter = (InterpFilter)filter_bank;
    543       const InterpKernel *filters =
    544           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    545                                                              subpel_search);
    546       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    547         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    548           wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
    549                                      filters[filter_y], ref, kOutputStride,
    550                                      Width(), Height());
    551 
    552           if (filter_x && filter_y)
    553             continue;
    554           else if (filter_y)
    555             ASM_REGISTER_STATE_CHECK(
    556                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
    557                           16, filters[filter_y], 16, Width(), Height()));
    558           else if (filter_x)
    559             ASM_REGISTER_STATE_CHECK(UUT_->h8_(
    560                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
    561                 kInvalidFilter, 16, Width(), Height()));
    562           else
    563             ASM_REGISTER_STATE_CHECK(UUT_->copy_(
    564                 in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
    565                 kInvalidFilter, 0, Width(), Height()));
    566 
    567           CheckGuardBlocks();
    568 
    569           for (int y = 0; y < Height(); ++y)
    570             for (int x = 0; x < Width(); ++x)
    571               ASSERT_EQ(lookup(ref, y * kOutputStride + x),
    572                         lookup(out, y * kOutputStride + x))
    573                   << "mismatch at (" << x << "," << y << "), "
    574                   << "filters (" << filter_bank << "," << filter_x << ","
    575                   << filter_y << ")";
    576         }
    577       }
    578     }
    579   }
    580 }
    581 
    582 TEST_P(ConvolveTest, FilterExtremes) {
    583   uint8_t *const in = input();
    584   uint8_t *const out = output();
    585   uint8_t *ref;
    586   if (UUT_->use_highbd_ == 0) {
    587     ref = ref8_;
    588   } else {
    589     ref = CONVERT_TO_BYTEPTR(ref16_);
    590   }
    591 
    592   // Populate ref and out with some random data
    593   ::libaom_test::ACMRandom prng;
    594   for (int y = 0; y < Height(); ++y) {
    595     for (int x = 0; x < Width(); ++x) {
    596       uint16_t r;
    597       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
    598         r = prng.Rand8Extremes();
    599       } else {
    600         r = prng.Rand16() & mask_;
    601       }
    602       assign_val(out, y * kOutputStride + x, r);
    603       assign_val(ref, y * kOutputStride + x, r);
    604     }
    605   }
    606 
    607   for (int axis = 0; axis < 2; axis++) {
    608     int seed_val = 0;
    609     while (seed_val < 256) {
    610       for (int y = 0; y < 8; ++y) {
    611         for (int x = 0; x < 8; ++x) {
    612           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
    613                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
    614           if (axis) seed_val++;
    615         }
    616         if (axis)
    617           seed_val -= 8;
    618         else
    619           seed_val++;
    620       }
    621       if (axis) seed_val += 8;
    622       int subpel_search;
    623       for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
    624            ++subpel_search) {
    625         for (int filter_bank = 0; filter_bank < kNumFilterBanks;
    626              ++filter_bank) {
    627           const InterpFilter filter = (InterpFilter)filter_bank;
    628           const InterpKernel *filters =
    629               (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    630                                                                  subpel_search);
    631           for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    632             for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    633               wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
    634                                          filters[filter_y], ref, kOutputStride,
    635                                          Width(), Height());
    636               if (filter_x && filter_y)
    637                 continue;
    638               else if (filter_y)
    639                 ASM_REGISTER_STATE_CHECK(UUT_->v8_(
    640                     in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
    641                     filters[filter_y], 16, Width(), Height()));
    642               else if (filter_x)
    643                 ASM_REGISTER_STATE_CHECK(UUT_->h8_(
    644                     in, kInputStride, out, kOutputStride, filters[filter_x], 16,
    645                     kInvalidFilter, 16, Width(), Height()));
    646               else
    647                 ASM_REGISTER_STATE_CHECK(UUT_->copy_(
    648                     in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
    649                     kInvalidFilter, 0, Width(), Height()));
    650 
    651               for (int y = 0; y < Height(); ++y)
    652                 for (int x = 0; x < Width(); ++x)
    653                   ASSERT_EQ(lookup(ref, y * kOutputStride + x),
    654                             lookup(out, y * kOutputStride + x))
    655                       << "mismatch at (" << x << "," << y << "), "
    656                       << "filters (" << filter_bank << "," << filter_x << ","
    657                       << filter_y << ")";
    658             }
    659           }
    660         }
    661       }
    662     }
    663   }
    664 }
    665 
    666 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
    667   const uint8_t *const in = input();
    668   uint8_t *const out = output();
    669   const int kNumTests = 5000000;
    670   const int width = Width();
    671   const int height = Height();
    672   aom_usec_timer timer;
    673 
    674   aom_usec_timer_start(&timer);
    675   for (int n = 0; n < kNumTests; ++n) {
    676     UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, width,
    677                 height);
    678   }
    679   aom_usec_timer_mark(&timer);
    680 
    681   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
    682   printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
    683          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
    684 }
    685 
    686 TEST_P(ConvolveTest, DISABLED_Speed) {
    687   uint8_t *const in = input();
    688   uint8_t *const out = output();
    689   uint8_t *ref;
    690   if (UUT_->use_highbd_ == 0) {
    691     ref = ref8_;
    692   } else {
    693     ref = CONVERT_TO_BYTEPTR(ref16_);
    694   }
    695 
    696   // Populate ref and out with some random data
    697   ::libaom_test::ACMRandom prng;
    698   for (int y = 0; y < Height(); ++y) {
    699     for (int x = 0; x < Width(); ++x) {
    700       uint16_t r;
    701       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
    702         r = prng.Rand8Extremes();
    703       } else {
    704         r = prng.Rand16() & mask_;
    705       }
    706       assign_val(out, y * kOutputStride + x, r);
    707       assign_val(ref, y * kOutputStride + x, r);
    708     }
    709   }
    710 
    711   const InterpFilter filter = (InterpFilter)1;
    712   const InterpKernel *filters =
    713       (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS);
    714   wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
    715                                      out, kOutputStride, Width(), Height());
    716 
    717   aom_usec_timer timer;
    718   int tests_num = 1000;
    719 
    720   aom_usec_timer_start(&timer);
    721   while (tests_num > 0) {
    722     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    723       const InterpFilter filter = (InterpFilter)filter_bank;
    724       const InterpKernel *filters =
    725           (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    726                                                              USE_8_TAPS);
    727       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    728         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    729           if (filter_x && filter_y) continue;
    730           if (filter_y)
    731             ASM_REGISTER_STATE_CHECK(
    732                 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
    733                           16, filters[filter_y], 16, Width(), Height()));
    734           else if (filter_x)
    735             ASM_REGISTER_STATE_CHECK(UUT_->h8_(
    736                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
    737                 kInvalidFilter, 16, Width(), Height()));
    738         }
    739       }
    740     }
    741     tests_num--;
    742   }
    743   aom_usec_timer_mark(&timer);
    744 
    745   const int elapsed_time =
    746       static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
    747   printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
    748          UUT_->use_highbd_, elapsed_time);
    749 }
    750 
    751 using ::testing::make_tuple;
    752 
    753 #define WRAP(func, bd)                                                       \
    754   static void wrap_##func##_##bd(                                            \
    755       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
    756       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
    757       const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
    758     aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
    759                       filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
    760   }
    761 #if HAVE_SSE2 && ARCH_X86_64
    762 WRAP(convolve_copy_sse2, 8)
    763 WRAP(convolve_copy_sse2, 10)
    764 WRAP(convolve_copy_sse2, 12)
    765 WRAP(convolve8_horiz_sse2, 8)
    766 WRAP(convolve8_vert_sse2, 8)
    767 WRAP(convolve8_horiz_sse2, 10)
    768 WRAP(convolve8_vert_sse2, 10)
    769 WRAP(convolve8_horiz_sse2, 12)
    770 WRAP(convolve8_vert_sse2, 12)
    771 #endif  // HAVE_SSE2 && ARCH_X86_64
    772 
    773 WRAP(convolve_copy_c, 8)
    774 WRAP(convolve8_horiz_c, 8)
    775 WRAP(convolve8_vert_c, 8)
    776 WRAP(convolve_copy_c, 10)
    777 WRAP(convolve8_horiz_c, 10)
    778 WRAP(convolve8_vert_c, 10)
    779 WRAP(convolve_copy_c, 12)
    780 WRAP(convolve8_horiz_c, 12)
    781 WRAP(convolve8_vert_c, 12)
    782 
    783 #if HAVE_AVX2
    784 WRAP(convolve_copy_avx2, 8)
    785 WRAP(convolve8_horiz_avx2, 8)
    786 WRAP(convolve8_vert_avx2, 8)
    787 
    788 WRAP(convolve_copy_avx2, 10)
    789 WRAP(convolve8_horiz_avx2, 10)
    790 WRAP(convolve8_vert_avx2, 10)
    791 
    792 WRAP(convolve_copy_avx2, 12)
    793 WRAP(convolve8_horiz_avx2, 12)
    794 WRAP(convolve8_vert_avx2, 12)
    795 #endif  // HAVE_AVX2
    796 
    797 #undef WRAP
    798 
    799 const ConvolveFunctions convolve8_c(wrap_convolve_copy_c_8,
    800                                     wrap_convolve8_horiz_c_8,
    801                                     wrap_convolve8_vert_c_8, 8);
    802 const ConvolveFunctions convolve10_c(wrap_convolve_copy_c_10,
    803                                      wrap_convolve8_horiz_c_10,
    804                                      wrap_convolve8_vert_c_10, 10);
    805 const ConvolveFunctions convolve12_c(wrap_convolve_copy_c_12,
    806                                      wrap_convolve8_horiz_c_12,
    807                                      wrap_convolve8_vert_c_12, 12);
    808 const ConvolveParam kArrayConvolve_c[] = {
    809   ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
    810 };
    811 
    812 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
    813 
    814 #if HAVE_SSE2 && ARCH_X86_64
    815 const ConvolveFunctions convolve8_sse2(aom_convolve_copy_c,
    816                                        aom_convolve8_horiz_sse2,
    817                                        aom_convolve8_vert_sse2, 0);
    818 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve_copy_sse2_8,
    819                                             wrap_convolve8_horiz_sse2_8,
    820                                             wrap_convolve8_vert_sse2_8, 8);
    821 const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve_copy_sse2_10,
    822                                              wrap_convolve8_horiz_sse2_10,
    823                                              wrap_convolve8_vert_sse2_10, 10);
    824 const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve_copy_sse2_12,
    825                                              wrap_convolve8_horiz_sse2_12,
    826                                              wrap_convolve8_vert_sse2_12, 12);
    827 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
    828                                               ALL_SIZES(wrap_convolve8_sse2),
    829                                               ALL_SIZES(wrap_convolve10_sse2),
    830                                               ALL_SIZES(wrap_convolve12_sse2) };
    831 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
    832                         ::testing::ValuesIn(kArrayConvolve_sse2));
    833 #endif
    834 
    835 #if HAVE_SSSE3
    836 const ConvolveFunctions convolve8_ssse3(aom_convolve_copy_c,
    837                                         aom_convolve8_horiz_ssse3,
    838                                         aom_convolve8_vert_ssse3, 0);
    839 
    840 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
    841 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
    842                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
    843 #endif
    844 
    845 #if HAVE_AVX2
    846 const ConvolveFunctions convolve8_avx2(aom_convolve_copy_c,
    847                                        aom_convolve8_horiz_avx2,
    848                                        aom_convolve8_vert_avx2, 0);
    849 
    850 const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve_copy_avx2_8,
    851                                             wrap_convolve8_horiz_avx2_8,
    852                                             wrap_convolve8_vert_avx2_8, 8);
    853 const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve_copy_avx2_10,
    854                                              wrap_convolve8_horiz_avx2_10,
    855                                              wrap_convolve8_vert_avx2_10, 10);
    856 const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve_copy_avx2_12,
    857                                              wrap_convolve8_horiz_avx2_12,
    858                                              wrap_convolve8_vert_avx2_12, 12);
    859 const ConvolveParam kArray_Convolve8_avx2[] = {
    860   ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
    861   ALL_SIZES_64(wrap_convolve12_avx2), ALL_SIZES(convolve8_avx2)
    862 };
    863 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
    864                         ::testing::ValuesIn(kArray_Convolve8_avx2));
    865 #endif  // HAVE_AVX2
    866 
    867 }  // namespace
    868