Home | History | Annotate | Download | only in test
      1 /*
      2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <math.h>
     12 #include <stdlib.h>
     13 #include <string.h>
     14 
     15 #include "third_party/googletest/src/include/gtest/gtest.h"
     16 
     17 #include "./vp9_rtcd.h"
     18 #include "./vpx_dsp_rtcd.h"
     19 #include "test/acm_random.h"
     20 #include "test/clear_system_state.h"
     21 #include "test/register_state_check.h"
     22 #include "test/util.h"
     23 #include "vp9/common/vp9_entropy.h"
     24 #include "vp9/common/vp9_scan.h"
     25 #include "vpx/vpx_codec.h"
     26 #include "vpx/vpx_integer.h"
     27 #include "vpx_ports/mem.h"
     28 #include "vpx_ports/msvc.h"  // for round()
     29 
     30 using libvpx_test::ACMRandom;
     31 
     32 namespace {
     33 
     34 const int kNumCoeffs = 256;
     35 const double C1 = 0.995184726672197;
     36 const double C2 = 0.98078528040323;
     37 const double C3 = 0.956940335732209;
     38 const double C4 = 0.923879532511287;
     39 const double C5 = 0.881921264348355;
     40 const double C6 = 0.831469612302545;
     41 const double C7 = 0.773010453362737;
     42 const double C8 = 0.707106781186548;
     43 const double C9 = 0.634393284163646;
     44 const double C10 = 0.555570233019602;
     45 const double C11 = 0.471396736825998;
     46 const double C12 = 0.38268343236509;
     47 const double C13 = 0.290284677254462;
     48 const double C14 = 0.195090322016128;
     49 const double C15 = 0.098017140329561;
     50 
     51 void butterfly_16x16_dct_1d(double input[16], double output[16]) {
     52   double step[16];
     53   double intermediate[16];
     54   double temp1, temp2;
     55 
     56   // step 1
     57   step[0] = input[0] + input[15];
     58   step[1] = input[1] + input[14];
     59   step[2] = input[2] + input[13];
     60   step[3] = input[3] + input[12];
     61   step[4] = input[4] + input[11];
     62   step[5] = input[5] + input[10];
     63   step[6] = input[6] + input[9];
     64   step[7] = input[7] + input[8];
     65   step[8] = input[7] - input[8];
     66   step[9] = input[6] - input[9];
     67   step[10] = input[5] - input[10];
     68   step[11] = input[4] - input[11];
     69   step[12] = input[3] - input[12];
     70   step[13] = input[2] - input[13];
     71   step[14] = input[1] - input[14];
     72   step[15] = input[0] - input[15];
     73 
     74   // step 2
     75   output[0] = step[0] + step[7];
     76   output[1] = step[1] + step[6];
     77   output[2] = step[2] + step[5];
     78   output[3] = step[3] + step[4];
     79   output[4] = step[3] - step[4];
     80   output[5] = step[2] - step[5];
     81   output[6] = step[1] - step[6];
     82   output[7] = step[0] - step[7];
     83 
     84   temp1 = step[8] * C7;
     85   temp2 = step[15] * C9;
     86   output[8] = temp1 + temp2;
     87 
     88   temp1 = step[9] * C11;
     89   temp2 = step[14] * C5;
     90   output[9] = temp1 - temp2;
     91 
     92   temp1 = step[10] * C3;
     93   temp2 = step[13] * C13;
     94   output[10] = temp1 + temp2;
     95 
     96   temp1 = step[11] * C15;
     97   temp2 = step[12] * C1;
     98   output[11] = temp1 - temp2;
     99 
    100   temp1 = step[11] * C1;
    101   temp2 = step[12] * C15;
    102   output[12] = temp2 + temp1;
    103 
    104   temp1 = step[10] * C13;
    105   temp2 = step[13] * C3;
    106   output[13] = temp2 - temp1;
    107 
    108   temp1 = step[9] * C5;
    109   temp2 = step[14] * C11;
    110   output[14] = temp2 + temp1;
    111 
    112   temp1 = step[8] * C9;
    113   temp2 = step[15] * C7;
    114   output[15] = temp2 - temp1;
    115 
    116   // step 3
    117   step[0] = output[0] + output[3];
    118   step[1] = output[1] + output[2];
    119   step[2] = output[1] - output[2];
    120   step[3] = output[0] - output[3];
    121 
    122   temp1 = output[4] * C14;
    123   temp2 = output[7] * C2;
    124   step[4] = temp1 + temp2;
    125 
    126   temp1 = output[5] * C10;
    127   temp2 = output[6] * C6;
    128   step[5] = temp1 + temp2;
    129 
    130   temp1 = output[5] * C6;
    131   temp2 = output[6] * C10;
    132   step[6] = temp2 - temp1;
    133 
    134   temp1 = output[4] * C2;
    135   temp2 = output[7] * C14;
    136   step[7] = temp2 - temp1;
    137 
    138   step[8] = output[8] + output[11];
    139   step[9] = output[9] + output[10];
    140   step[10] = output[9] - output[10];
    141   step[11] = output[8] - output[11];
    142 
    143   step[12] = output[12] + output[15];
    144   step[13] = output[13] + output[14];
    145   step[14] = output[13] - output[14];
    146   step[15] = output[12] - output[15];
    147 
    148   // step 4
    149   output[0] = (step[0] + step[1]);
    150   output[8] = (step[0] - step[1]);
    151 
    152   temp1 = step[2] * C12;
    153   temp2 = step[3] * C4;
    154   temp1 = temp1 + temp2;
    155   output[4] = 2 * (temp1 * C8);
    156 
    157   temp1 = step[2] * C4;
    158   temp2 = step[3] * C12;
    159   temp1 = temp2 - temp1;
    160   output[12] = 2 * (temp1 * C8);
    161 
    162   output[2] = 2 * ((step[4] + step[5]) * C8);
    163   output[14] = 2 * ((step[7] - step[6]) * C8);
    164 
    165   temp1 = step[4] - step[5];
    166   temp2 = step[6] + step[7];
    167   output[6] = (temp1 + temp2);
    168   output[10] = (temp1 - temp2);
    169 
    170   intermediate[8] = step[8] + step[14];
    171   intermediate[9] = step[9] + step[15];
    172 
    173   temp1 = intermediate[8] * C12;
    174   temp2 = intermediate[9] * C4;
    175   temp1 = temp1 - temp2;
    176   output[3] = 2 * (temp1 * C8);
    177 
    178   temp1 = intermediate[8] * C4;
    179   temp2 = intermediate[9] * C12;
    180   temp1 = temp2 + temp1;
    181   output[13] = 2 * (temp1 * C8);
    182 
    183   output[9] = 2 * ((step[10] + step[11]) * C8);
    184 
    185   intermediate[11] = step[10] - step[11];
    186   intermediate[12] = step[12] + step[13];
    187   intermediate[13] = step[12] - step[13];
    188   intermediate[14] = step[8] - step[14];
    189   intermediate[15] = step[9] - step[15];
    190 
    191   output[15] = (intermediate[11] + intermediate[12]);
    192   output[1] = -(intermediate[11] - intermediate[12]);
    193 
    194   output[7] = 2 * (intermediate[13] * C8);
    195 
    196   temp1 = intermediate[14] * C12;
    197   temp2 = intermediate[15] * C4;
    198   temp1 = temp1 - temp2;
    199   output[11] = -2 * (temp1 * C8);
    200 
    201   temp1 = intermediate[14] * C4;
    202   temp2 = intermediate[15] * C12;
    203   temp1 = temp2 + temp1;
    204   output[5] = 2 * (temp1 * C8);
    205 }
    206 
    207 void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
    208   // First transform columns
    209   for (int i = 0; i < 16; ++i) {
    210     double temp_in[16], temp_out[16];
    211     for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i];
    212     butterfly_16x16_dct_1d(temp_in, temp_out);
    213     for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j];
    214   }
    215   // Then transform rows
    216   for (int i = 0; i < 16; ++i) {
    217     double temp_in[16], temp_out[16];
    218     for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16];
    219     butterfly_16x16_dct_1d(temp_in, temp_out);
    220     // Scale by some magic number
    221     for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2;
    222   }
    223 }
    224 
    225 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
    226 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
    227 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
    228                         int tx_type);
    229 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
    230                         int tx_type);
    231 
    232 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
    233 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
    234 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
    235     Idct16x16Param;
    236 
    237 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
    238                    int /*tx_type*/) {
    239   vpx_fdct16x16_c(in, out, stride);
    240 }
    241 
    242 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
    243                    int /*tx_type*/) {
    244   vpx_idct16x16_256_add_c(in, dest, stride);
    245 }
    246 
    247 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
    248   vp9_fht16x16_c(in, out, stride, tx_type);
    249 }
    250 
    251 void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
    252                   int tx_type) {
    253   vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
    254 }
    255 
    256 #if CONFIG_VP9_HIGHBITDEPTH
    257 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
    258   vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
    259 }
    260 
    261 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
    262   vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
    263 }
    264 
    265 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
    266                       int /*tx_type*/) {
    267   idct16x16_10(in, out, stride);
    268 }
    269 
    270 void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
    271                       int /*tx_type*/) {
    272   idct16x16_12(in, out, stride);
    273 }
    274 
    275 void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
    276   vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
    277 }
    278 
    279 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
    280   vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
    281 }
    282 
    283 #if HAVE_SSE2
    284 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
    285   vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
    286 }
    287 
    288 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
    289   vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
    290 }
    291 
    292 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
    293   vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
    294 }
    295 
    296 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
    297   vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
    298 }
    299 
    300 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
    301   vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
    302 }
    303 
    304 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
    305   vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
    306 }
    307 #endif  // HAVE_SSE2
    308 #endif  // CONFIG_VP9_HIGHBITDEPTH
    309 
    310 class Trans16x16TestBase {
    311  public:
    312   virtual ~Trans16x16TestBase() {}
    313 
    314  protected:
    315   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
    316 
    317   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
    318 
    319   void RunAccuracyCheck() {
    320     ACMRandom rnd(ACMRandom::DeterministicSeed());
    321     uint32_t max_error = 0;
    322     int64_t total_error = 0;
    323     const int count_test_block = 10000;
    324     for (int i = 0; i < count_test_block; ++i) {
    325       DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
    326       DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
    327       DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    328       DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
    329 #if CONFIG_VP9_HIGHBITDEPTH
    330       DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    331       DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
    332 #endif
    333 
    334       // Initialize a test block with input range [-mask_, mask_].
    335       for (int j = 0; j < kNumCoeffs; ++j) {
    336         if (bit_depth_ == VPX_BITS_8) {
    337           src[j] = rnd.Rand8();
    338           dst[j] = rnd.Rand8();
    339           test_input_block[j] = src[j] - dst[j];
    340 #if CONFIG_VP9_HIGHBITDEPTH
    341         } else {
    342           src16[j] = rnd.Rand16() & mask_;
    343           dst16[j] = rnd.Rand16() & mask_;
    344           test_input_block[j] = src16[j] - dst16[j];
    345 #endif
    346         }
    347       }
    348 
    349       ASM_REGISTER_STATE_CHECK(
    350           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
    351       if (bit_depth_ == VPX_BITS_8) {
    352         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
    353 #if CONFIG_VP9_HIGHBITDEPTH
    354       } else {
    355         ASM_REGISTER_STATE_CHECK(
    356             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
    357 #endif
    358       }
    359 
    360       for (int j = 0; j < kNumCoeffs; ++j) {
    361 #if CONFIG_VP9_HIGHBITDEPTH
    362         const int32_t diff =
    363             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
    364 #else
    365         const int32_t diff = dst[j] - src[j];
    366 #endif
    367         const uint32_t error = diff * diff;
    368         if (max_error < error) max_error = error;
    369         total_error += error;
    370       }
    371     }
    372 
    373     EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
    374         << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
    375 
    376     EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
    377         << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
    378   }
    379 
    380   void RunCoeffCheck() {
    381     ACMRandom rnd(ACMRandom::DeterministicSeed());
    382     const int count_test_block = 1000;
    383     DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
    384     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    385     DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
    386 
    387     for (int i = 0; i < count_test_block; ++i) {
    388       // Initialize a test block with input range [-mask_, mask_].
    389       for (int j = 0; j < kNumCoeffs; ++j) {
    390         input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
    391       }
    392 
    393       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
    394       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
    395 
    396       // The minimum quant value is 4.
    397       for (int j = 0; j < kNumCoeffs; ++j)
    398         EXPECT_EQ(output_block[j], output_ref_block[j]);
    399     }
    400   }
    401 
    402   void RunMemCheck() {
    403     ACMRandom rnd(ACMRandom::DeterministicSeed());
    404     const int count_test_block = 1000;
    405     DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    406     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    407     DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
    408 
    409     for (int i = 0; i < count_test_block; ++i) {
    410       // Initialize a test block with input range [-mask_, mask_].
    411       for (int j = 0; j < kNumCoeffs; ++j) {
    412         input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
    413       }
    414       if (i == 0) {
    415         for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
    416       } else if (i == 1) {
    417         for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
    418       }
    419 
    420       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
    421       ASM_REGISTER_STATE_CHECK(
    422           RunFwdTxfm(input_extreme_block, output_block, pitch_));
    423 
    424       // The minimum quant value is 4.
    425       for (int j = 0; j < kNumCoeffs; ++j) {
    426         EXPECT_EQ(output_block[j], output_ref_block[j]);
    427         EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
    428             << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
    429       }
    430     }
    431   }
    432 
    433   void RunQuantCheck(int dc_thred, int ac_thred) {
    434     ACMRandom rnd(ACMRandom::DeterministicSeed());
    435     const int count_test_block = 100000;
    436     DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
    437     DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
    438 
    439     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    440     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
    441 #if CONFIG_VP9_HIGHBITDEPTH
    442     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    443     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
    444 #endif
    445 
    446     for (int i = 0; i < count_test_block; ++i) {
    447       // Initialize a test block with input range [-mask_, mask_].
    448       for (int j = 0; j < kNumCoeffs; ++j) {
    449         input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
    450       }
    451       if (i == 0) {
    452         for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
    453       }
    454       if (i == 1) {
    455         for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
    456       }
    457 
    458       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
    459 
    460       // clear reconstructed pixel buffers
    461       memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
    462       memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
    463 #if CONFIG_VP9_HIGHBITDEPTH
    464       memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
    465       memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
    466 #endif
    467 
    468       // quantization with maximum allowed step sizes
    469       output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
    470       for (int j = 1; j < kNumCoeffs; ++j) {
    471         output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
    472       }
    473       if (bit_depth_ == VPX_BITS_8) {
    474         inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
    475         ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
    476 #if CONFIG_VP9_HIGHBITDEPTH
    477       } else {
    478         inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
    479                      tx_type_);
    480         ASM_REGISTER_STATE_CHECK(
    481             RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
    482 #endif
    483       }
    484       if (bit_depth_ == VPX_BITS_8) {
    485         for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]);
    486 #if CONFIG_VP9_HIGHBITDEPTH
    487       } else {
    488         for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]);
    489 #endif
    490       }
    491     }
    492   }
    493 
    494   void RunInvAccuracyCheck() {
    495     ACMRandom rnd(ACMRandom::DeterministicSeed());
    496     const int count_test_block = 1000;
    497     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
    498     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    499     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    500     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
    501 #if CONFIG_VP9_HIGHBITDEPTH
    502     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    503     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
    504 #endif  // CONFIG_VP9_HIGHBITDEPTH
    505 
    506     for (int i = 0; i < count_test_block; ++i) {
    507       double out_r[kNumCoeffs];
    508 
    509       // Initialize a test block with input range [-255, 255].
    510       for (int j = 0; j < kNumCoeffs; ++j) {
    511         if (bit_depth_ == VPX_BITS_8) {
    512           src[j] = rnd.Rand8();
    513           dst[j] = rnd.Rand8();
    514           in[j] = src[j] - dst[j];
    515 #if CONFIG_VP9_HIGHBITDEPTH
    516         } else {
    517           src16[j] = rnd.Rand16() & mask_;
    518           dst16[j] = rnd.Rand16() & mask_;
    519           in[j] = src16[j] - dst16[j];
    520 #endif  // CONFIG_VP9_HIGHBITDEPTH
    521         }
    522       }
    523 
    524       reference_16x16_dct_2d(in, out_r);
    525       for (int j = 0; j < kNumCoeffs; ++j) {
    526         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
    527       }
    528 
    529       if (bit_depth_ == VPX_BITS_8) {
    530         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
    531 #if CONFIG_VP9_HIGHBITDEPTH
    532       } else {
    533         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
    534 #endif  // CONFIG_VP9_HIGHBITDEPTH
    535       }
    536 
    537       for (int j = 0; j < kNumCoeffs; ++j) {
    538 #if CONFIG_VP9_HIGHBITDEPTH
    539         const uint32_t diff =
    540             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
    541 #else
    542         const uint32_t diff = dst[j] - src[j];
    543 #endif  // CONFIG_VP9_HIGHBITDEPTH
    544         const uint32_t error = diff * diff;
    545         EXPECT_GE(1u, error)
    546             << "Error: 16x16 IDCT has error " << error << " at index " << j;
    547       }
    548     }
    549   }
    550 
    551   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    552     ACMRandom rnd(ACMRandom::DeterministicSeed());
    553     const int count_test_block = 10000;
    554     const int eob = 10;
    555     const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
    556     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
    557     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
    558     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
    559 #if CONFIG_VP9_HIGHBITDEPTH
    560     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
    561     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
    562 #endif  // CONFIG_VP9_HIGHBITDEPTH
    563 
    564     for (int i = 0; i < count_test_block; ++i) {
    565       for (int j = 0; j < kNumCoeffs; ++j) {
    566         if (j < eob) {
    567           // Random values less than the threshold, either positive or negative
    568           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
    569         } else {
    570           coeff[scan[j]] = 0;
    571         }
    572         if (bit_depth_ == VPX_BITS_8) {
    573           dst[j] = 0;
    574           ref[j] = 0;
    575 #if CONFIG_VP9_HIGHBITDEPTH
    576         } else {
    577           dst16[j] = 0;
    578           ref16[j] = 0;
    579 #endif  // CONFIG_VP9_HIGHBITDEPTH
    580         }
    581       }
    582       if (bit_depth_ == VPX_BITS_8) {
    583         ref_txfm(coeff, ref, pitch_);
    584         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
    585       } else {
    586 #if CONFIG_VP9_HIGHBITDEPTH
    587         ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
    588         ASM_REGISTER_STATE_CHECK(
    589             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
    590 #endif  // CONFIG_VP9_HIGHBITDEPTH
    591       }
    592 
    593       for (int j = 0; j < kNumCoeffs; ++j) {
    594 #if CONFIG_VP9_HIGHBITDEPTH
    595         const uint32_t diff =
    596             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
    597 #else
    598         const uint32_t diff = dst[j] - ref[j];
    599 #endif  // CONFIG_VP9_HIGHBITDEPTH
    600         const uint32_t error = diff * diff;
    601         EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
    602                              << error << " at index " << j;
    603       }
    604     }
    605   }
    606 
    607   int pitch_;
    608   int tx_type_;
    609   vpx_bit_depth_t bit_depth_;
    610   int mask_;
    611   FhtFunc fwd_txfm_ref;
    612   IhtFunc inv_txfm_ref;
    613 };
    614 
    615 class Trans16x16DCT : public Trans16x16TestBase,
    616                       public ::testing::TestWithParam<Dct16x16Param> {
    617  public:
    618   virtual ~Trans16x16DCT() {}
    619 
    620   virtual void SetUp() {
    621     fwd_txfm_ = GET_PARAM(0);
    622     inv_txfm_ = GET_PARAM(1);
    623     tx_type_ = GET_PARAM(2);
    624     bit_depth_ = GET_PARAM(3);
    625     pitch_ = 16;
    626     fwd_txfm_ref = fdct16x16_ref;
    627     inv_txfm_ref = idct16x16_ref;
    628     mask_ = (1 << bit_depth_) - 1;
    629 #if CONFIG_VP9_HIGHBITDEPTH
    630     switch (bit_depth_) {
    631       case VPX_BITS_10: inv_txfm_ref = idct16x16_10_ref; break;
    632       case VPX_BITS_12: inv_txfm_ref = idct16x16_12_ref; break;
    633       default: inv_txfm_ref = idct16x16_ref; break;
    634     }
    635 #else
    636     inv_txfm_ref = idct16x16_ref;
    637 #endif
    638   }
    639   virtual void TearDown() { libvpx_test::ClearSystemState(); }
    640 
    641  protected:
    642   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
    643     fwd_txfm_(in, out, stride);
    644   }
    645   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    646     inv_txfm_(out, dst, stride);
    647   }
    648 
    649   FdctFunc fwd_txfm_;
    650   IdctFunc inv_txfm_;
    651 };
    652 
    653 TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); }
    654 
    655 TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); }
    656 
    657 TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); }
    658 
    659 TEST_P(Trans16x16DCT, QuantCheck) {
    660   // Use maximally allowed quantization step sizes for DC and AC
    661   // coefficients respectively.
    662   RunQuantCheck(1336, 1828);
    663 }
    664 
    665 TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
    666 
    667 class Trans16x16HT : public Trans16x16TestBase,
    668                      public ::testing::TestWithParam<Ht16x16Param> {
    669  public:
    670   virtual ~Trans16x16HT() {}
    671 
    672   virtual void SetUp() {
    673     fwd_txfm_ = GET_PARAM(0);
    674     inv_txfm_ = GET_PARAM(1);
    675     tx_type_ = GET_PARAM(2);
    676     bit_depth_ = GET_PARAM(3);
    677     pitch_ = 16;
    678     fwd_txfm_ref = fht16x16_ref;
    679     inv_txfm_ref = iht16x16_ref;
    680     mask_ = (1 << bit_depth_) - 1;
    681 #if CONFIG_VP9_HIGHBITDEPTH
    682     switch (bit_depth_) {
    683       case VPX_BITS_10: inv_txfm_ref = iht16x16_10; break;
    684       case VPX_BITS_12: inv_txfm_ref = iht16x16_12; break;
    685       default: inv_txfm_ref = iht16x16_ref; break;
    686     }
    687 #else
    688     inv_txfm_ref = iht16x16_ref;
    689 #endif
    690   }
    691   virtual void TearDown() { libvpx_test::ClearSystemState(); }
    692 
    693  protected:
    694   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
    695     fwd_txfm_(in, out, stride, tx_type_);
    696   }
    697   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    698     inv_txfm_(out, dst, stride, tx_type_);
    699   }
    700 
    701   FhtFunc fwd_txfm_;
    702   IhtFunc inv_txfm_;
    703 };
    704 
    705 TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); }
    706 
    707 TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
    708 
    709 TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); }
    710 
    711 TEST_P(Trans16x16HT, QuantCheck) {
    712   // The encoder skips any non-DC intra prediction modes,
    713   // when the quantization step size goes beyond 988.
    714   RunQuantCheck(429, 729);
    715 }
    716 
    717 class InvTrans16x16DCT : public Trans16x16TestBase,
    718                          public ::testing::TestWithParam<Idct16x16Param> {
    719  public:
    720   virtual ~InvTrans16x16DCT() {}
    721 
    722   virtual void SetUp() {
    723     ref_txfm_ = GET_PARAM(0);
    724     inv_txfm_ = GET_PARAM(1);
    725     thresh_ = GET_PARAM(2);
    726     bit_depth_ = GET_PARAM(3);
    727     pitch_ = 16;
    728     mask_ = (1 << bit_depth_) - 1;
    729   }
    730   virtual void TearDown() { libvpx_test::ClearSystemState(); }
    731 
    732  protected:
    733   void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
    734   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    735     inv_txfm_(out, dst, stride);
    736   }
    737 
    738   IdctFunc ref_txfm_;
    739   IdctFunc inv_txfm_;
    740   int thresh_;
    741 };
    742 
    743 TEST_P(InvTrans16x16DCT, CompareReference) {
    744   CompareInvReference(ref_txfm_, thresh_);
    745 }
    746 
    747 using std::tr1::make_tuple;
    748 
    749 #if CONFIG_VP9_HIGHBITDEPTH
    750 INSTANTIATE_TEST_CASE_P(
    751     C, Trans16x16DCT,
    752     ::testing::Values(
    753         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
    754         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
    755         make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
    756 #else
    757 INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
    758                         ::testing::Values(make_tuple(&vpx_fdct16x16_c,
    759                                                      &vpx_idct16x16_256_add_c,
    760                                                      0, VPX_BITS_8)));
    761 #endif  // CONFIG_VP9_HIGHBITDEPTH
    762 
    763 #if CONFIG_VP9_HIGHBITDEPTH
    764 INSTANTIATE_TEST_CASE_P(
    765     C, Trans16x16HT,
    766     ::testing::Values(
    767         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
    768         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
    769         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
    770         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
    771         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
    772         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
    773         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
    774         make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
    775         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
    776         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
    777         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
    778         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
    779 #else
    780 INSTANTIATE_TEST_CASE_P(
    781     C, Trans16x16HT,
    782     ::testing::Values(
    783         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
    784         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
    785         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
    786         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
    787 #endif  // CONFIG_VP9_HIGHBITDEPTH
    788 
    789 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
    790 INSTANTIATE_TEST_CASE_P(
    791     NEON, Trans16x16DCT,
    792     ::testing::Values(make_tuple(&vpx_fdct16x16_neon,
    793                                  &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
    794 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
    795 
    796 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    797 INSTANTIATE_TEST_CASE_P(
    798     SSE2, Trans16x16DCT,
    799     ::testing::Values(make_tuple(&vpx_fdct16x16_sse2,
    800                                  &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
    801 INSTANTIATE_TEST_CASE_P(
    802     SSE2, Trans16x16HT,
    803     ::testing::Values(make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
    804                                  0, VPX_BITS_8),
    805                       make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
    806                                  1, VPX_BITS_8),
    807                       make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
    808                                  2, VPX_BITS_8),
    809                       make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
    810                                  3, VPX_BITS_8)));
    811 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    812 
    813 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    814 INSTANTIATE_TEST_CASE_P(
    815     SSE2, Trans16x16DCT,
    816     ::testing::Values(
    817         make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 0, VPX_BITS_10),
    818         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0,
    819                    VPX_BITS_10),
    820         make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 0, VPX_BITS_12),
    821         make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0,
    822                    VPX_BITS_12),
    823         make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_c, 0,
    824                    VPX_BITS_8)));
    825 INSTANTIATE_TEST_CASE_P(
    826     SSE2, Trans16x16HT,
    827     ::testing::Values(
    828         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
    829         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
    830         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
    831         make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
    832                    VPX_BITS_8)));
    833 // Optimizations take effect at a threshold of 3155, so we use a value close to
    834 // that to test both branches.
    835 INSTANTIATE_TEST_CASE_P(
    836     SSE2, InvTrans16x16DCT,
    837     ::testing::Values(make_tuple(&idct16x16_10_add_10_c,
    838                                  &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
    839                       make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2,
    840                                  3167, VPX_BITS_10),
    841                       make_tuple(&idct16x16_10_add_12_c,
    842                                  &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
    843                       make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
    844                                  3167, VPX_BITS_12)));
    845 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    846 
    847 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    848 INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
    849                         ::testing::Values(make_tuple(&vpx_fdct16x16_msa,
    850                                                      &vpx_idct16x16_256_add_msa,
    851                                                      0, VPX_BITS_8)));
    852 INSTANTIATE_TEST_CASE_P(
    853     MSA, Trans16x16HT,
    854     ::testing::Values(
    855         make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
    856         make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
    857         make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
    858         make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
    859                    VPX_BITS_8)));
    860 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    861 
    862 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    863 INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT,
    864                         ::testing::Values(make_tuple(&vpx_fdct16x16_c,
    865                                                      &vpx_idct16x16_256_add_vsx,
    866                                                      0, VPX_BITS_8)));
    867 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
    868 }  // namespace
    869