1 /* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "third_party/googletest/src/include/gtest/gtest.h" 16 17 #include "./vp9_rtcd.h" 18 #include "./vpx_dsp_rtcd.h" 19 #include "test/acm_random.h" 20 #include "test/clear_system_state.h" 21 #include "test/register_state_check.h" 22 #include "test/util.h" 23 #include "vp9/common/vp9_entropy.h" 24 #include "vp9/common/vp9_scan.h" 25 #include "vpx/vpx_codec.h" 26 #include "vpx/vpx_integer.h" 27 #include "vpx_ports/mem.h" 28 #include "vpx_ports/msvc.h" // for round() 29 30 using libvpx_test::ACMRandom; 31 32 namespace { 33 34 const int kNumCoeffs = 256; 35 const double C1 = 0.995184726672197; 36 const double C2 = 0.98078528040323; 37 const double C3 = 0.956940335732209; 38 const double C4 = 0.923879532511287; 39 const double C5 = 0.881921264348355; 40 const double C6 = 0.831469612302545; 41 const double C7 = 0.773010453362737; 42 const double C8 = 0.707106781186548; 43 const double C9 = 0.634393284163646; 44 const double C10 = 0.555570233019602; 45 const double C11 = 0.471396736825998; 46 const double C12 = 0.38268343236509; 47 const double C13 = 0.290284677254462; 48 const double C14 = 0.195090322016128; 49 const double C15 = 0.098017140329561; 50 51 void butterfly_16x16_dct_1d(double input[16], double output[16]) { 52 double step[16]; 53 double intermediate[16]; 54 double temp1, temp2; 55 56 // step 1 57 step[0] = input[0] + input[15]; 58 step[1] = input[1] + input[14]; 59 step[2] = input[2] + input[13]; 60 step[3] = input[3] + input[12]; 61 step[4] = input[4] + input[11]; 62 step[5] = input[5] + input[10]; 63 step[6] = input[6] + input[9]; 64 step[7] = input[7] + input[8]; 65 step[8] = input[7] - input[8]; 66 step[9] = input[6] - input[9]; 67 step[10] = input[5] - input[10]; 68 step[11] = input[4] - input[11]; 69 step[12] = input[3] - input[12]; 70 step[13] = input[2] - input[13]; 71 step[14] = input[1] - input[14]; 72 step[15] = input[0] - input[15]; 73 74 // step 2 75 output[0] = step[0] + step[7]; 76 output[1] = step[1] + step[6]; 77 output[2] = step[2] + step[5]; 78 output[3] = step[3] + step[4]; 79 output[4] = step[3] - step[4]; 80 output[5] = step[2] - step[5]; 81 output[6] = step[1] - step[6]; 82 output[7] = step[0] - step[7]; 83 84 temp1 = step[8] * C7; 85 temp2 = step[15] * C9; 86 output[8] = temp1 + temp2; 87 88 temp1 = step[9] * C11; 89 temp2 = step[14] * C5; 90 output[9] = temp1 - temp2; 91 92 temp1 = step[10] * C3; 93 temp2 = step[13] * C13; 94 output[10] = temp1 + temp2; 95 96 temp1 = step[11] * C15; 97 temp2 = step[12] * C1; 98 output[11] = temp1 - temp2; 99 100 temp1 = step[11] * C1; 101 temp2 = step[12] * C15; 102 output[12] = temp2 + temp1; 103 104 temp1 = step[10] * C13; 105 temp2 = step[13] * C3; 106 output[13] = temp2 - temp1; 107 108 temp1 = step[9] * C5; 109 temp2 = step[14] * C11; 110 output[14] = temp2 + temp1; 111 112 temp1 = step[8] * C9; 113 temp2 = step[15] * C7; 114 output[15] = temp2 - temp1; 115 116 // step 3 117 step[0] = output[0] + output[3]; 118 step[1] = output[1] + output[2]; 119 step[2] = output[1] - output[2]; 120 step[3] = output[0] - output[3]; 121 122 temp1 = output[4] * C14; 123 temp2 = output[7] * C2; 124 step[4] = temp1 + temp2; 125 126 temp1 = output[5] * C10; 127 temp2 = output[6] * C6; 128 step[5] = temp1 + temp2; 129 130 temp1 = output[5] * C6; 131 temp2 = output[6] * C10; 132 step[6] = temp2 - temp1; 133 134 temp1 = output[4] * C2; 135 temp2 = output[7] * C14; 136 step[7] = temp2 - temp1; 137 138 step[8] = output[8] + output[11]; 139 step[9] = output[9] + output[10]; 140 step[10] = output[9] - output[10]; 141 step[11] = output[8] - output[11]; 142 143 step[12] = output[12] + output[15]; 144 step[13] = output[13] + output[14]; 145 step[14] = output[13] - output[14]; 146 step[15] = output[12] - output[15]; 147 148 // step 4 149 output[0] = (step[0] + step[1]); 150 output[8] = (step[0] - step[1]); 151 152 temp1 = step[2] * C12; 153 temp2 = step[3] * C4; 154 temp1 = temp1 + temp2; 155 output[4] = 2 * (temp1 * C8); 156 157 temp1 = step[2] * C4; 158 temp2 = step[3] * C12; 159 temp1 = temp2 - temp1; 160 output[12] = 2 * (temp1 * C8); 161 162 output[2] = 2 * ((step[4] + step[5]) * C8); 163 output[14] = 2 * ((step[7] - step[6]) * C8); 164 165 temp1 = step[4] - step[5]; 166 temp2 = step[6] + step[7]; 167 output[6] = (temp1 + temp2); 168 output[10] = (temp1 - temp2); 169 170 intermediate[8] = step[8] + step[14]; 171 intermediate[9] = step[9] + step[15]; 172 173 temp1 = intermediate[8] * C12; 174 temp2 = intermediate[9] * C4; 175 temp1 = temp1 - temp2; 176 output[3] = 2 * (temp1 * C8); 177 178 temp1 = intermediate[8] * C4; 179 temp2 = intermediate[9] * C12; 180 temp1 = temp2 + temp1; 181 output[13] = 2 * (temp1 * C8); 182 183 output[9] = 2 * ((step[10] + step[11]) * C8); 184 185 intermediate[11] = step[10] - step[11]; 186 intermediate[12] = step[12] + step[13]; 187 intermediate[13] = step[12] - step[13]; 188 intermediate[14] = step[8] - step[14]; 189 intermediate[15] = step[9] - step[15]; 190 191 output[15] = (intermediate[11] + intermediate[12]); 192 output[1] = -(intermediate[11] - intermediate[12]); 193 194 output[7] = 2 * (intermediate[13] * C8); 195 196 temp1 = intermediate[14] * C12; 197 temp2 = intermediate[15] * C4; 198 temp1 = temp1 - temp2; 199 output[11] = -2 * (temp1 * C8); 200 201 temp1 = intermediate[14] * C4; 202 temp2 = intermediate[15] * C12; 203 temp1 = temp2 + temp1; 204 output[5] = 2 * (temp1 * C8); 205 } 206 207 void reference_16x16_dct_2d(int16_t input[256], double output[256]) { 208 // First transform columns 209 for (int i = 0; i < 16; ++i) { 210 double temp_in[16], temp_out[16]; 211 for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i]; 212 butterfly_16x16_dct_1d(temp_in, temp_out); 213 for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j]; 214 } 215 // Then transform rows 216 for (int i = 0; i < 16; ++i) { 217 double temp_in[16], temp_out[16]; 218 for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16]; 219 butterfly_16x16_dct_1d(temp_in, temp_out); 220 // Scale by some magic number 221 for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2; 222 } 223 } 224 225 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); 226 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); 227 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, 228 int tx_type); 229 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, 230 int tx_type); 231 232 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param; 233 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param; 234 typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> 235 Idct16x16Param; 236 237 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride, 238 int /*tx_type*/) { 239 vpx_fdct16x16_c(in, out, stride); 240 } 241 242 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, 243 int /*tx_type*/) { 244 vpx_idct16x16_256_add_c(in, dest, stride); 245 } 246 247 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { 248 vp9_fht16x16_c(in, out, stride, tx_type); 249 } 250 251 void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, 252 int tx_type) { 253 vp9_iht16x16_256_add_c(in, dest, stride, tx_type); 254 } 255 256 #if CONFIG_VP9_HIGHBITDEPTH 257 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) { 258 vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); 259 } 260 261 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) { 262 vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); 263 } 264 265 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride, 266 int /*tx_type*/) { 267 idct16x16_10(in, out, stride); 268 } 269 270 void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride, 271 int /*tx_type*/) { 272 idct16x16_12(in, out, stride); 273 } 274 275 void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { 276 vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10); 277 } 278 279 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { 280 vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12); 281 } 282 283 #if HAVE_SSE2 284 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { 285 vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); 286 } 287 288 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { 289 vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); 290 } 291 292 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { 293 vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); 294 } 295 296 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { 297 vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); 298 } 299 300 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { 301 vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); 302 } 303 304 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { 305 vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); 306 } 307 #endif // HAVE_SSE2 308 #endif // CONFIG_VP9_HIGHBITDEPTH 309 310 class Trans16x16TestBase { 311 public: 312 virtual ~Trans16x16TestBase() {} 313 314 protected: 315 virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0; 316 317 virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0; 318 319 void RunAccuracyCheck() { 320 ACMRandom rnd(ACMRandom::DeterministicSeed()); 321 uint32_t max_error = 0; 322 int64_t total_error = 0; 323 const int count_test_block = 10000; 324 for (int i = 0; i < count_test_block; ++i) { 325 DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); 326 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); 327 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 328 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); 329 #if CONFIG_VP9_HIGHBITDEPTH 330 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 331 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); 332 #endif 333 334 // Initialize a test block with input range [-mask_, mask_]. 335 for (int j = 0; j < kNumCoeffs; ++j) { 336 if (bit_depth_ == VPX_BITS_8) { 337 src[j] = rnd.Rand8(); 338 dst[j] = rnd.Rand8(); 339 test_input_block[j] = src[j] - dst[j]; 340 #if CONFIG_VP9_HIGHBITDEPTH 341 } else { 342 src16[j] = rnd.Rand16() & mask_; 343 dst16[j] = rnd.Rand16() & mask_; 344 test_input_block[j] = src16[j] - dst16[j]; 345 #endif 346 } 347 } 348 349 ASM_REGISTER_STATE_CHECK( 350 RunFwdTxfm(test_input_block, test_temp_block, pitch_)); 351 if (bit_depth_ == VPX_BITS_8) { 352 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); 353 #if CONFIG_VP9_HIGHBITDEPTH 354 } else { 355 ASM_REGISTER_STATE_CHECK( 356 RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); 357 #endif 358 } 359 360 for (int j = 0; j < kNumCoeffs; ++j) { 361 #if CONFIG_VP9_HIGHBITDEPTH 362 const int32_t diff = 363 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 364 #else 365 const int32_t diff = dst[j] - src[j]; 366 #endif 367 const uint32_t error = diff * diff; 368 if (max_error < error) max_error = error; 369 total_error += error; 370 } 371 } 372 373 EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) 374 << "Error: 16x16 FHT/IHT has an individual round trip error > 1"; 375 376 EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) 377 << "Error: 16x16 FHT/IHT has average round trip error > 1 per block"; 378 } 379 380 void RunCoeffCheck() { 381 ACMRandom rnd(ACMRandom::DeterministicSeed()); 382 const int count_test_block = 1000; 383 DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); 384 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); 385 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); 386 387 for (int i = 0; i < count_test_block; ++i) { 388 // Initialize a test block with input range [-mask_, mask_]. 389 for (int j = 0; j < kNumCoeffs; ++j) { 390 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); 391 } 392 393 fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_); 394 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_)); 395 396 // The minimum quant value is 4. 397 for (int j = 0; j < kNumCoeffs; ++j) 398 EXPECT_EQ(output_block[j], output_ref_block[j]); 399 } 400 } 401 402 void RunMemCheck() { 403 ACMRandom rnd(ACMRandom::DeterministicSeed()); 404 const int count_test_block = 1000; 405 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); 406 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); 407 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); 408 409 for (int i = 0; i < count_test_block; ++i) { 410 // Initialize a test block with input range [-mask_, mask_]. 411 for (int j = 0; j < kNumCoeffs; ++j) { 412 input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; 413 } 414 if (i == 0) { 415 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; 416 } else if (i == 1) { 417 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; 418 } 419 420 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); 421 ASM_REGISTER_STATE_CHECK( 422 RunFwdTxfm(input_extreme_block, output_block, pitch_)); 423 424 // The minimum quant value is 4. 425 for (int j = 0; j < kNumCoeffs; ++j) { 426 EXPECT_EQ(output_block[j], output_ref_block[j]); 427 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) 428 << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; 429 } 430 } 431 } 432 433 void RunQuantCheck(int dc_thred, int ac_thred) { 434 ACMRandom rnd(ACMRandom::DeterministicSeed()); 435 const int count_test_block = 100000; 436 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); 437 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); 438 439 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 440 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); 441 #if CONFIG_VP9_HIGHBITDEPTH 442 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 443 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); 444 #endif 445 446 for (int i = 0; i < count_test_block; ++i) { 447 // Initialize a test block with input range [-mask_, mask_]. 448 for (int j = 0; j < kNumCoeffs; ++j) { 449 input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; 450 } 451 if (i == 0) { 452 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; 453 } 454 if (i == 1) { 455 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; 456 } 457 458 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); 459 460 // clear reconstructed pixel buffers 461 memset(dst, 0, kNumCoeffs * sizeof(uint8_t)); 462 memset(ref, 0, kNumCoeffs * sizeof(uint8_t)); 463 #if CONFIG_VP9_HIGHBITDEPTH 464 memset(dst16, 0, kNumCoeffs * sizeof(uint16_t)); 465 memset(ref16, 0, kNumCoeffs * sizeof(uint16_t)); 466 #endif 467 468 // quantization with maximum allowed step sizes 469 output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred; 470 for (int j = 1; j < kNumCoeffs; ++j) { 471 output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred; 472 } 473 if (bit_depth_ == VPX_BITS_8) { 474 inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_); 475 ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_)); 476 #if CONFIG_VP9_HIGHBITDEPTH 477 } else { 478 inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_, 479 tx_type_); 480 ASM_REGISTER_STATE_CHECK( 481 RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_)); 482 #endif 483 } 484 if (bit_depth_ == VPX_BITS_8) { 485 for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]); 486 #if CONFIG_VP9_HIGHBITDEPTH 487 } else { 488 for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]); 489 #endif 490 } 491 } 492 } 493 494 void RunInvAccuracyCheck() { 495 ACMRandom rnd(ACMRandom::DeterministicSeed()); 496 const int count_test_block = 1000; 497 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); 498 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); 499 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 500 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); 501 #if CONFIG_VP9_HIGHBITDEPTH 502 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 503 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); 504 #endif // CONFIG_VP9_HIGHBITDEPTH 505 506 for (int i = 0; i < count_test_block; ++i) { 507 double out_r[kNumCoeffs]; 508 509 // Initialize a test block with input range [-255, 255]. 510 for (int j = 0; j < kNumCoeffs; ++j) { 511 if (bit_depth_ == VPX_BITS_8) { 512 src[j] = rnd.Rand8(); 513 dst[j] = rnd.Rand8(); 514 in[j] = src[j] - dst[j]; 515 #if CONFIG_VP9_HIGHBITDEPTH 516 } else { 517 src16[j] = rnd.Rand16() & mask_; 518 dst16[j] = rnd.Rand16() & mask_; 519 in[j] = src16[j] - dst16[j]; 520 #endif // CONFIG_VP9_HIGHBITDEPTH 521 } 522 } 523 524 reference_16x16_dct_2d(in, out_r); 525 for (int j = 0; j < kNumCoeffs; ++j) { 526 coeff[j] = static_cast<tran_low_t>(round(out_r[j])); 527 } 528 529 if (bit_depth_ == VPX_BITS_8) { 530 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16)); 531 #if CONFIG_VP9_HIGHBITDEPTH 532 } else { 533 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16)); 534 #endif // CONFIG_VP9_HIGHBITDEPTH 535 } 536 537 for (int j = 0; j < kNumCoeffs; ++j) { 538 #if CONFIG_VP9_HIGHBITDEPTH 539 const uint32_t diff = 540 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; 541 #else 542 const uint32_t diff = dst[j] - src[j]; 543 #endif // CONFIG_VP9_HIGHBITDEPTH 544 const uint32_t error = diff * diff; 545 EXPECT_GE(1u, error) 546 << "Error: 16x16 IDCT has error " << error << " at index " << j; 547 } 548 } 549 } 550 551 void CompareInvReference(IdctFunc ref_txfm, int thresh) { 552 ACMRandom rnd(ACMRandom::DeterministicSeed()); 553 const int count_test_block = 10000; 554 const int eob = 10; 555 const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan; 556 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); 557 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); 558 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); 559 #if CONFIG_VP9_HIGHBITDEPTH 560 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); 561 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); 562 #endif // CONFIG_VP9_HIGHBITDEPTH 563 564 for (int i = 0; i < count_test_block; ++i) { 565 for (int j = 0; j < kNumCoeffs; ++j) { 566 if (j < eob) { 567 // Random values less than the threshold, either positive or negative 568 coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2)); 569 } else { 570 coeff[scan[j]] = 0; 571 } 572 if (bit_depth_ == VPX_BITS_8) { 573 dst[j] = 0; 574 ref[j] = 0; 575 #if CONFIG_VP9_HIGHBITDEPTH 576 } else { 577 dst16[j] = 0; 578 ref16[j] = 0; 579 #endif // CONFIG_VP9_HIGHBITDEPTH 580 } 581 } 582 if (bit_depth_ == VPX_BITS_8) { 583 ref_txfm(coeff, ref, pitch_); 584 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); 585 } else { 586 #if CONFIG_VP9_HIGHBITDEPTH 587 ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_); 588 ASM_REGISTER_STATE_CHECK( 589 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); 590 #endif // CONFIG_VP9_HIGHBITDEPTH 591 } 592 593 for (int j = 0; j < kNumCoeffs; ++j) { 594 #if CONFIG_VP9_HIGHBITDEPTH 595 const uint32_t diff = 596 bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; 597 #else 598 const uint32_t diff = dst[j] - ref[j]; 599 #endif // CONFIG_VP9_HIGHBITDEPTH 600 const uint32_t error = diff * diff; 601 EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error " 602 << error << " at index " << j; 603 } 604 } 605 } 606 607 int pitch_; 608 int tx_type_; 609 vpx_bit_depth_t bit_depth_; 610 int mask_; 611 FhtFunc fwd_txfm_ref; 612 IhtFunc inv_txfm_ref; 613 }; 614 615 class Trans16x16DCT : public Trans16x16TestBase, 616 public ::testing::TestWithParam<Dct16x16Param> { 617 public: 618 virtual ~Trans16x16DCT() {} 619 620 virtual void SetUp() { 621 fwd_txfm_ = GET_PARAM(0); 622 inv_txfm_ = GET_PARAM(1); 623 tx_type_ = GET_PARAM(2); 624 bit_depth_ = GET_PARAM(3); 625 pitch_ = 16; 626 fwd_txfm_ref = fdct16x16_ref; 627 inv_txfm_ref = idct16x16_ref; 628 mask_ = (1 << bit_depth_) - 1; 629 #if CONFIG_VP9_HIGHBITDEPTH 630 switch (bit_depth_) { 631 case VPX_BITS_10: inv_txfm_ref = idct16x16_10_ref; break; 632 case VPX_BITS_12: inv_txfm_ref = idct16x16_12_ref; break; 633 default: inv_txfm_ref = idct16x16_ref; break; 634 } 635 #else 636 inv_txfm_ref = idct16x16_ref; 637 #endif 638 } 639 virtual void TearDown() { libvpx_test::ClearSystemState(); } 640 641 protected: 642 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { 643 fwd_txfm_(in, out, stride); 644 } 645 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { 646 inv_txfm_(out, dst, stride); 647 } 648 649 FdctFunc fwd_txfm_; 650 IdctFunc inv_txfm_; 651 }; 652 653 TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); } 654 655 TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); } 656 657 TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); } 658 659 TEST_P(Trans16x16DCT, QuantCheck) { 660 // Use maximally allowed quantization step sizes for DC and AC 661 // coefficients respectively. 662 RunQuantCheck(1336, 1828); 663 } 664 665 TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } 666 667 class Trans16x16HT : public Trans16x16TestBase, 668 public ::testing::TestWithParam<Ht16x16Param> { 669 public: 670 virtual ~Trans16x16HT() {} 671 672 virtual void SetUp() { 673 fwd_txfm_ = GET_PARAM(0); 674 inv_txfm_ = GET_PARAM(1); 675 tx_type_ = GET_PARAM(2); 676 bit_depth_ = GET_PARAM(3); 677 pitch_ = 16; 678 fwd_txfm_ref = fht16x16_ref; 679 inv_txfm_ref = iht16x16_ref; 680 mask_ = (1 << bit_depth_) - 1; 681 #if CONFIG_VP9_HIGHBITDEPTH 682 switch (bit_depth_) { 683 case VPX_BITS_10: inv_txfm_ref = iht16x16_10; break; 684 case VPX_BITS_12: inv_txfm_ref = iht16x16_12; break; 685 default: inv_txfm_ref = iht16x16_ref; break; 686 } 687 #else 688 inv_txfm_ref = iht16x16_ref; 689 #endif 690 } 691 virtual void TearDown() { libvpx_test::ClearSystemState(); } 692 693 protected: 694 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { 695 fwd_txfm_(in, out, stride, tx_type_); 696 } 697 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { 698 inv_txfm_(out, dst, stride, tx_type_); 699 } 700 701 FhtFunc fwd_txfm_; 702 IhtFunc inv_txfm_; 703 }; 704 705 TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); } 706 707 TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); } 708 709 TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); } 710 711 TEST_P(Trans16x16HT, QuantCheck) { 712 // The encoder skips any non-DC intra prediction modes, 713 // when the quantization step size goes beyond 988. 714 RunQuantCheck(429, 729); 715 } 716 717 class InvTrans16x16DCT : public Trans16x16TestBase, 718 public ::testing::TestWithParam<Idct16x16Param> { 719 public: 720 virtual ~InvTrans16x16DCT() {} 721 722 virtual void SetUp() { 723 ref_txfm_ = GET_PARAM(0); 724 inv_txfm_ = GET_PARAM(1); 725 thresh_ = GET_PARAM(2); 726 bit_depth_ = GET_PARAM(3); 727 pitch_ = 16; 728 mask_ = (1 << bit_depth_) - 1; 729 } 730 virtual void TearDown() { libvpx_test::ClearSystemState(); } 731 732 protected: 733 void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {} 734 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { 735 inv_txfm_(out, dst, stride); 736 } 737 738 IdctFunc ref_txfm_; 739 IdctFunc inv_txfm_; 740 int thresh_; 741 }; 742 743 TEST_P(InvTrans16x16DCT, CompareReference) { 744 CompareInvReference(ref_txfm_, thresh_); 745 } 746 747 using std::tr1::make_tuple; 748 749 #if CONFIG_VP9_HIGHBITDEPTH 750 INSTANTIATE_TEST_CASE_P( 751 C, Trans16x16DCT, 752 ::testing::Values( 753 make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10), 754 make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12), 755 make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); 756 #else 757 INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT, 758 ::testing::Values(make_tuple(&vpx_fdct16x16_c, 759 &vpx_idct16x16_256_add_c, 760 0, VPX_BITS_8))); 761 #endif // CONFIG_VP9_HIGHBITDEPTH 762 763 #if CONFIG_VP9_HIGHBITDEPTH 764 INSTANTIATE_TEST_CASE_P( 765 C, Trans16x16HT, 766 ::testing::Values( 767 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10), 768 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10), 769 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10), 770 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10), 771 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12), 772 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12), 773 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12), 774 make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12), 775 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), 776 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), 777 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), 778 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); 779 #else 780 INSTANTIATE_TEST_CASE_P( 781 C, Trans16x16HT, 782 ::testing::Values( 783 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), 784 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), 785 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), 786 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); 787 #endif // CONFIG_VP9_HIGHBITDEPTH 788 789 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE 790 INSTANTIATE_TEST_CASE_P( 791 NEON, Trans16x16DCT, 792 ::testing::Values(make_tuple(&vpx_fdct16x16_neon, 793 &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8))); 794 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE 795 796 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 797 INSTANTIATE_TEST_CASE_P( 798 SSE2, Trans16x16DCT, 799 ::testing::Values(make_tuple(&vpx_fdct16x16_sse2, 800 &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8))); 801 INSTANTIATE_TEST_CASE_P( 802 SSE2, Trans16x16HT, 803 ::testing::Values(make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 804 0, VPX_BITS_8), 805 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 806 1, VPX_BITS_8), 807 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 808 2, VPX_BITS_8), 809 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 810 3, VPX_BITS_8))); 811 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 812 813 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 814 INSTANTIATE_TEST_CASE_P( 815 SSE2, Trans16x16DCT, 816 ::testing::Values( 817 make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 0, VPX_BITS_10), 818 make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0, 819 VPX_BITS_10), 820 make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 0, VPX_BITS_12), 821 make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0, 822 VPX_BITS_12), 823 make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_c, 0, 824 VPX_BITS_8))); 825 INSTANTIATE_TEST_CASE_P( 826 SSE2, Trans16x16HT, 827 ::testing::Values( 828 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), 829 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), 830 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), 831 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3, 832 VPX_BITS_8))); 833 // Optimizations take effect at a threshold of 3155, so we use a value close to 834 // that to test both branches. 835 INSTANTIATE_TEST_CASE_P( 836 SSE2, InvTrans16x16DCT, 837 ::testing::Values(make_tuple(&idct16x16_10_add_10_c, 838 &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10), 839 make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2, 840 3167, VPX_BITS_10), 841 make_tuple(&idct16x16_10_add_12_c, 842 &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12), 843 make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2, 844 3167, VPX_BITS_12))); 845 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 846 847 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 848 INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT, 849 ::testing::Values(make_tuple(&vpx_fdct16x16_msa, 850 &vpx_idct16x16_256_add_msa, 851 0, VPX_BITS_8))); 852 INSTANTIATE_TEST_CASE_P( 853 MSA, Trans16x16HT, 854 ::testing::Values( 855 make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8), 856 make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8), 857 make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8), 858 make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3, 859 VPX_BITS_8))); 860 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 861 862 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 863 INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT, 864 ::testing::Values(make_tuple(&vpx_fdct16x16_c, 865 &vpx_idct16x16_256_add_vsx, 866 0, VPX_BITS_8))); 867 #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE 868 } // namespace 869