Home | History | Annotate | Download | only in test
      1 /*
      2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <cstdlib>
     13 #include <new>
     14 
     15 #include "config/aom_config.h"
     16 #include "config/aom_dsp_rtcd.h"
     17 
     18 #include "aom/aom_codec.h"
     19 #include "aom/aom_integer.h"
     20 #include "aom_dsp/variance.h"
     21 #include "aom_mem/aom_mem.h"
     22 #include "aom_ports/aom_timer.h"
     23 #include "aom_ports/mem.h"
     24 #include "av1/common/reconinter.h"
     25 #include "test/acm_random.h"
     26 #include "test/clear_system_state.h"
     27 #include "test/register_state_check.h"
     28 #include "test/util.h"
     29 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
     30 
     31 namespace AV1CompMaskVariance {
     32 typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
     33                                     int width, int height, const uint8_t *ref,
     34                                     int ref_stride, const uint8_t *mask,
     35                                     int mask_stride, int invert_mask);
     36 
     37 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AV2
     38 const BLOCK_SIZE kValidBlockSize[] = {
     39   BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8,  BLOCK_16X16,
     40   BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32,
     41 };
     42 #endif
     43 typedef ::testing::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
     44 
     45 class AV1CompMaskVarianceTest
     46     : public ::testing::TestWithParam<CompMaskPredParam> {
     47  public:
     48   ~AV1CompMaskVarianceTest();
     49   void SetUp();
     50 
     51   void TearDown();
     52 
     53  protected:
     54   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
     55   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
     56   bool CheckResult(int width, int height) {
     57     for (int y = 0; y < height; ++y) {
     58       for (int x = 0; x < width; ++x) {
     59         const int idx = y * width + x;
     60         if (comp_pred1_[idx] != comp_pred2_[idx]) {
     61           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
     62           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
     63           return false;
     64         }
     65       }
     66     }
     67     return true;
     68   }
     69 
     70   libaom_test::ACMRandom rnd_;
     71   uint8_t *comp_pred1_;
     72   uint8_t *comp_pred2_;
     73   uint8_t *pred_;
     74   uint8_t *ref_buffer_;
     75   uint8_t *ref_;
     76 };
     77 
     78 AV1CompMaskVarianceTest::~AV1CompMaskVarianceTest() { ; }
     79 
     80 void AV1CompMaskVarianceTest::SetUp() {
     81   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
     82   av1_init_wedge_masks();
     83   comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     84   comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     85   pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     86   ref_buffer_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (8 * MAX_SB_SIZE));
     87   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
     88   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
     89     pred_[i] = rnd_.Rand8();
     90   }
     91   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
     92     ref_buffer_[i] = rnd_.Rand8();
     93   }
     94 }
     95 
     96 void AV1CompMaskVarianceTest::TearDown() {
     97   aom_free(comp_pred1_);
     98   aom_free(comp_pred2_);
     99   aom_free(pred_);
    100   aom_free(ref_buffer_);
    101   libaom_test::ClearSystemState();
    102 }
    103 
    104 void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
    105                                              BLOCK_SIZE bsize, int inv) {
    106   const int w = block_size_wide[bsize];
    107   const int h = block_size_high[bsize];
    108 
    109   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    110   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    111     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    112 
    113     aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
    114                          inv);
    115     test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
    116 
    117     ASSERT_EQ(CheckResult(w, h), true)
    118         << " wedge " << wedge_index << " inv " << inv;
    119   }
    120 }
    121 
    122 void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
    123                                            BLOCK_SIZE bsize) {
    124   const int w = block_size_wide[bsize];
    125   const int h = block_size_high[bsize];
    126 
    127   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    128   int wedge_index = wedge_types / 2;
    129   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    130   const int num_loops = 1000000000 / (w + h);
    131 
    132   comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
    133   double elapsed_time[2] = { 0 };
    134   for (int i = 0; i < 2; ++i) {
    135     aom_usec_timer timer;
    136     aom_usec_timer_start(&timer);
    137     comp_mask_pred_func func = funcs[i];
    138     for (int j = 0; j < num_loops; ++j) {
    139       func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
    140     }
    141     aom_usec_timer_mark(&timer);
    142     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    143     elapsed_time[i] = 1000.0 * time / num_loops;
    144   }
    145   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    146          elapsed_time[1]);
    147   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    148 }
    149 
    150 TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
    151   // inv = 0, 1
    152   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    153   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    154 }
    155 
    156 TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
    157   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    158 }
    159 
    160 #if HAVE_SSSE3
    161 INSTANTIATE_TEST_CASE_P(
    162     SSSE3, AV1CompMaskVarianceTest,
    163     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
    164                        ::testing::ValuesIn(kValidBlockSize)));
    165 #endif
    166 
    167 #if HAVE_AVX2
    168 INSTANTIATE_TEST_CASE_P(
    169     AVX2, AV1CompMaskVarianceTest,
    170     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
    171                        ::testing::ValuesIn(kValidBlockSize)));
    172 #endif
    173 
    174 #ifndef aom_comp_mask_pred
    175 // can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
    176 class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
    177  public:
    178   ~AV1CompMaskUpVarianceTest();
    179 
    180  protected:
    181   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
    182   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
    183                     int havSub);
    184 };
    185 
    186 AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
    187 
    188 void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
    189                                                BLOCK_SIZE bsize, int inv) {
    190   const int w = block_size_wide[bsize];
    191   const int h = block_size_high[bsize];
    192   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    193   int subpel_search;
    194   for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
    195        ++subpel_search) {
    196     // loop through subx and suby
    197     for (int sub = 0; sub < 8 * 8; ++sub) {
    198       int subx = sub & 0x7;
    199       int suby = (sub >> 3);
    200       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    201         const uint8_t *mask =
    202             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    203 
    204         // ref
    205         aom_comp_mask_upsampled_pred_c(
    206             NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
    207             MAX_SB_SIZE, mask, w, inv, subpel_search);
    208 
    209         aom_comp_mask_pred = test_impl;  // test
    210         aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred2_, pred_,
    211                                      w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
    212                                      w, inv, subpel_search);
    213         ASSERT_EQ(CheckResult(w, h), true)
    214             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
    215             << "," << suby << ")";
    216       }
    217     }
    218   }
    219 }
    220 
    221 void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
    222                                              BLOCK_SIZE bsize, int havSub) {
    223   const int w = block_size_wide[bsize];
    224   const int h = block_size_high[bsize];
    225   const int subx = havSub ? 3 : 0;
    226   const int suby = havSub ? 4 : 0;
    227 
    228   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    229   int wedge_index = wedge_types / 2;
    230   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    231 
    232   const int num_loops = 1000000000 / (w + h);
    233   comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
    234   double elapsed_time[2] = { 0 };
    235   int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
    236   for (int i = 0; i < 2; ++i) {
    237     aom_usec_timer timer;
    238     aom_usec_timer_start(&timer);
    239     aom_comp_mask_pred = funcs[i];
    240     for (int j = 0; j < num_loops; ++j) {
    241       aom_comp_mask_upsampled_pred(NULL, NULL, 0, 0, NULL, comp_pred1_, pred_,
    242                                    w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w,
    243                                    0, subpel_search);
    244     }
    245     aom_usec_timer_mark(&timer);
    246     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    247     elapsed_time[i] = 1000.0 * time / num_loops;
    248   }
    249   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
    250          elapsed_time[1]);
    251   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    252 }
    253 
    254 TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
    255   // inv mask = 0, 1
    256   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    257   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    258 }
    259 
    260 TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
    261   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
    262 }
    263 
    264 #if HAVE_SSSE3
    265 INSTANTIATE_TEST_CASE_P(
    266     SSSE3, AV1CompMaskUpVarianceTest,
    267     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
    268                        ::testing::ValuesIn(kValidBlockSize)));
    269 #endif
    270 
    271 #if HAVE_AVX2
    272 INSTANTIATE_TEST_CASE_P(
    273     AVX2, AV1CompMaskUpVarianceTest,
    274     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
    275                        ::testing::ValuesIn(kValidBlockSize)));
    276 #endif
    277 
    278 #endif  // ifndef aom_comp_mask_pred
    279 
    280 typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
    281                                            const uint8_t *pred8, int width,
    282                                            int height, const uint8_t *ref8,
    283                                            int ref_stride, const uint8_t *mask,
    284                                            int mask_stride, int invert_mask);
    285 
    286 typedef ::testing::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
    287     HighbdCompMaskPredParam;
    288 
    289 class AV1HighbdCompMaskVarianceTest
    290     : public ::testing::TestWithParam<HighbdCompMaskPredParam> {
    291  public:
    292   ~AV1HighbdCompMaskVarianceTest();
    293   void SetUp();
    294 
    295   void TearDown();
    296 
    297  protected:
    298   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
    299                       int inv);
    300   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
    301   bool CheckResult(int width, int height) {
    302     for (int y = 0; y < height; ++y) {
    303       for (int x = 0; x < width; ++x) {
    304         const int idx = y * width + x;
    305         if (comp_pred1_[idx] != comp_pred2_[idx]) {
    306           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
    307           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
    308           return false;
    309         }
    310       }
    311     }
    312     return true;
    313   }
    314 
    315   libaom_test::ACMRandom rnd_;
    316   uint16_t *comp_pred1_;
    317   uint16_t *comp_pred2_;
    318   uint16_t *pred_;
    319   uint16_t *ref_buffer_;
    320   uint16_t *ref_;
    321 };
    322 
    323 AV1HighbdCompMaskVarianceTest::~AV1HighbdCompMaskVarianceTest() { ; }
    324 
    325 void AV1HighbdCompMaskVarianceTest::SetUp() {
    326   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
    327   av1_init_wedge_masks();
    328 
    329   comp_pred1_ =
    330       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
    331   comp_pred2_ =
    332       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
    333   pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
    334   ref_buffer_ = (uint16_t *)aom_memalign(
    335       16, (MAX_SB_SQUARE + (8 * MAX_SB_SIZE)) * sizeof(*ref_buffer_));
    336   ref_ = ref_buffer_ + (8 * MAX_SB_SIZE);
    337 }
    338 
    339 void AV1HighbdCompMaskVarianceTest::TearDown() {
    340   aom_free(comp_pred1_);
    341   aom_free(comp_pred2_);
    342   aom_free(pred_);
    343   aom_free(ref_buffer_);
    344   libaom_test::ClearSystemState();
    345 }
    346 
    347 void AV1HighbdCompMaskVarianceTest::RunCheckOutput(
    348     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
    349   int bd_ = GET_PARAM(2);
    350 
    351   const int w = block_size_wide[bsize];
    352   const int h = block_size_high[bsize];
    353 
    354   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    355 
    356   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    357     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    358   }
    359   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    360     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    361   }
    362 
    363   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    364     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    365 
    366     aom_highbd_comp_mask_pred_c(
    367         CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    368         CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
    369 
    370     test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
    371               CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
    372 
    373     ASSERT_EQ(CheckResult(w, h), true)
    374         << " wedge " << wedge_index << " inv " << inv;
    375   }
    376 }
    377 
    378 void AV1HighbdCompMaskVarianceTest::RunSpeedTest(
    379     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
    380   int bd_ = GET_PARAM(2);
    381 
    382   const int w = block_size_wide[bsize];
    383   const int h = block_size_high[bsize];
    384 
    385   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    386   int wedge_index = wedge_types / 2;
    387 
    388   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    389     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    390   }
    391   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    392     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    393   }
    394 
    395   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    396   const int num_loops = 1000000000 / (w + h);
    397 
    398   highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
    399                                           test_impl };
    400   double elapsed_time[2] = { 0 };
    401   for (int i = 0; i < 2; ++i) {
    402     aom_usec_timer timer;
    403     aom_usec_timer_start(&timer);
    404     highbd_comp_mask_pred_func func = funcs[i];
    405     for (int j = 0; j < num_loops; ++j) {
    406       func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    407            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
    408     }
    409     aom_usec_timer_mark(&timer);
    410     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    411     elapsed_time[i] = 1000.0 * time / num_loops;
    412   }
    413   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    414          elapsed_time[1]);
    415   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    416 }
    417 
    418 TEST_P(AV1HighbdCompMaskVarianceTest, CheckOutput) {
    419   // inv = 0, 1
    420   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    421   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    422 }
    423 
    424 TEST_P(AV1HighbdCompMaskVarianceTest, DISABLED_Speed) {
    425   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    426 }
    427 
    428 #if HAVE_AVX2
    429 INSTANTIATE_TEST_CASE_P(
    430     AVX2, AV1HighbdCompMaskVarianceTest,
    431     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
    432                        ::testing::ValuesIn(kValidBlockSize),
    433                        ::testing::Range(8, 13, 2)));
    434 #endif
    435 
    436 #if HAVE_SSE2
    437 INSTANTIATE_TEST_CASE_P(
    438     SSE2, AV1HighbdCompMaskVarianceTest,
    439     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
    440                        ::testing::ValuesIn(kValidBlockSize),
    441                        ::testing::Range(8, 13, 2)));
    442 #endif
    443 
    444 #ifndef aom_highbd_comp_mask_pred
    445 // can't run this test if aom_highbd_comp_mask_pred is defined to
    446 // aom_highbd_comp_mask_pred_c
    447 class AV1HighbdCompMaskUpVarianceTest : public AV1HighbdCompMaskVarianceTest {
    448  public:
    449   ~AV1HighbdCompMaskUpVarianceTest();
    450 
    451  protected:
    452   void RunCheckOutput(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
    453                       int inv);
    454   void RunSpeedTest(highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
    455                     int havSub);
    456 };
    457 
    458 AV1HighbdCompMaskUpVarianceTest::~AV1HighbdCompMaskUpVarianceTest() { ; }
    459 
    460 void AV1HighbdCompMaskUpVarianceTest::RunCheckOutput(
    461     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
    462   (void)test_impl;
    463   int bd_ = GET_PARAM(2);
    464   const int w = block_size_wide[bsize];
    465   const int h = block_size_high[bsize];
    466   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    467 
    468   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    469     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    470   }
    471   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    472     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    473   }
    474 
    475   int subpel_search;
    476   for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
    477     // loop through subx and suby
    478     for (int sub = 0; sub < 8 * 8; ++sub) {
    479       int subx = sub & 0x7;
    480       int suby = (sub >> 3);
    481       for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    482         const uint8_t *mask =
    483             av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    484 
    485         // ref
    486         aom_highbd_upsampled_pred_c(
    487             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
    488             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
    489 
    490         aom_highbd_comp_mask_pred_c(
    491             CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    492             CONVERT_TO_BYTEPTR(comp_pred1_), w, mask, w, inv);
    493 
    494         // test
    495         aom_highbd_upsampled_pred(
    496             NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx,
    497             suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
    498 
    499         aom_highbd_comp_mask_pred(
    500             CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
    501             CONVERT_TO_BYTEPTR(comp_pred2_), w, mask, w, inv);
    502 
    503         ASSERT_EQ(CheckResult(w, h), true)
    504             << " wedge " << wedge_index << " inv " << inv << "sub (" << subx
    505             << "," << suby << ")";
    506       }
    507     }
    508   }
    509 }
    510 
    511 void AV1HighbdCompMaskUpVarianceTest::RunSpeedTest(
    512     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
    513   int bd_ = GET_PARAM(2);
    514   const int w = block_size_wide[bsize];
    515   const int h = block_size_high[bsize];
    516   const int subx = havSub ? 3 : 0;
    517   const int suby = havSub ? 4 : 0;
    518 
    519   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    520   int wedge_index = wedge_types / 2;
    521   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    522 
    523   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    524     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    525   }
    526   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    527     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    528   }
    529 
    530   const int num_loops = 1000000000 / (w + h);
    531   highbd_comp_mask_pred_func funcs[2] = { &aom_highbd_comp_mask_pred_c,
    532                                           test_impl };
    533   double elapsed_time[2] = { 0 };
    534   for (int i = 0; i < 2; ++i) {
    535     aom_usec_timer timer;
    536     aom_usec_timer_start(&timer);
    537     aom_highbd_comp_mask_pred = funcs[i];
    538     int subpel_search = 2;  // set to 1 to test 4-tap filter.
    539     for (int j = 0; j < num_loops; ++j) {
    540       aom_highbd_comp_mask_upsampled_pred(
    541           NULL, NULL, 0, 0, NULL, CONVERT_TO_BYTEPTR(comp_pred1_),
    542           CONVERT_TO_BYTEPTR(pred_), w, h, subx, suby, CONVERT_TO_BYTEPTR(ref_),
    543           MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
    544     }
    545     aom_usec_timer_mark(&timer);
    546     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    547     elapsed_time[i] = 1000.0 * time / num_loops;
    548   }
    549   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
    550          elapsed_time[1]);
    551   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    552 }
    553 
    554 TEST_P(AV1HighbdCompMaskUpVarianceTest, CheckOutput) {
    555   // inv mask = 0, 1
    556   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    557   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    558 }
    559 
    560 TEST_P(AV1HighbdCompMaskUpVarianceTest, DISABLED_Speed) {
    561   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
    562 }
    563 
    564 #if HAVE_AVX2
    565 INSTANTIATE_TEST_CASE_P(
    566     AVX2, AV1HighbdCompMaskUpVarianceTest,
    567     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
    568                        ::testing::ValuesIn(kValidBlockSize),
    569                        ::testing::Range(8, 13, 2)));
    570 #endif
    571 
    572 #if HAVE_SSE2
    573 INSTANTIATE_TEST_CASE_P(
    574     SSE2, AV1HighbdCompMaskUpVarianceTest,
    575     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
    576                        ::testing::ValuesIn(kValidBlockSize),
    577                        ::testing::Range(8, 13, 2)));
    578 #endif
    579 
    580 #endif  // ifndef aom_highbd_comp_mask_pred
    581 }  // namespace AV1CompMaskVariance
    582