1 /* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "third_party/googletest/src/include/gtest/gtest.h" 16 #include "test/acm_random.h" 17 #include "test/clear_system_state.h" 18 #include "test/register_state_check.h" 19 #include "test/util.h" 20 21 #include "./vp9_rtcd.h" 22 #include "vp9/common/vp9_entropy.h" 23 #include "vpx/vpx_integer.h" 24 25 extern "C" { 26 void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch); 27 } 28 29 using libvpx_test::ACMRandom; 30 31 namespace { 32 33 #ifdef _MSC_VER 34 static int round(double x) { 35 if (x < 0) 36 return static_cast<int>(ceil(x - 0.5)); 37 else 38 return static_cast<int>(floor(x + 0.5)); 39 } 40 #endif 41 42 const int kNumCoeffs = 256; 43 const double PI = 3.1415926535898; 44 void reference2_16x16_idct_2d(double *input, double *output) { 45 double x; 46 for (int l = 0; l < 16; ++l) { 47 for (int k = 0; k < 16; ++k) { 48 double s = 0; 49 for (int i = 0; i < 16; ++i) { 50 for (int j = 0; j < 16; ++j) { 51 x = cos(PI * j * (l + 0.5) / 16.0) * 52 cos(PI * i * (k + 0.5) / 16.0) * 53 input[i * 16 + j] / 256; 54 if (i != 0) 55 x *= sqrt(2.0); 56 if (j != 0) 57 x *= sqrt(2.0); 58 s += x; 59 } 60 } 61 output[k*16+l] = s; 62 } 63 } 64 } 65 66 67 const double C1 = 0.995184726672197; 68 const double C2 = 0.98078528040323; 69 const double C3 = 0.956940335732209; 70 const double C4 = 0.923879532511287; 71 const double C5 = 0.881921264348355; 72 const double C6 = 0.831469612302545; 73 const double C7 = 0.773010453362737; 74 const double C8 = 0.707106781186548; 75 const double C9 = 0.634393284163646; 76 const double C10 = 0.555570233019602; 77 const double C11 = 0.471396736825998; 78 const double C12 = 0.38268343236509; 79 const double C13 = 0.290284677254462; 80 const double C14 = 0.195090322016128; 81 const double C15 = 0.098017140329561; 82 83 void butterfly_16x16_dct_1d(double input[16], double output[16]) { 84 double step[16]; 85 double intermediate[16]; 86 double temp1, temp2; 87 88 // step 1 89 step[ 0] = input[0] + input[15]; 90 step[ 1] = input[1] + input[14]; 91 step[ 2] = input[2] + input[13]; 92 step[ 3] = input[3] + input[12]; 93 step[ 4] = input[4] + input[11]; 94 step[ 5] = input[5] + input[10]; 95 step[ 6] = input[6] + input[ 9]; 96 step[ 7] = input[7] + input[ 8]; 97 step[ 8] = input[7] - input[ 8]; 98 step[ 9] = input[6] - input[ 9]; 99 step[10] = input[5] - input[10]; 100 step[11] = input[4] - input[11]; 101 step[12] = input[3] - input[12]; 102 step[13] = input[2] - input[13]; 103 step[14] = input[1] - input[14]; 104 step[15] = input[0] - input[15]; 105 106 // step 2 107 output[0] = step[0] + step[7]; 108 output[1] = step[1] + step[6]; 109 output[2] = step[2] + step[5]; 110 output[3] = step[3] + step[4]; 111 output[4] = step[3] - step[4]; 112 output[5] = step[2] - step[5]; 113 output[6] = step[1] - step[6]; 114 output[7] = step[0] - step[7]; 115 116 temp1 = step[ 8] * C7; 117 temp2 = step[15] * C9; 118 output[ 8] = temp1 + temp2; 119 120 temp1 = step[ 9] * C11; 121 temp2 = step[14] * C5; 122 output[ 9] = temp1 - temp2; 123 124 temp1 = step[10] * C3; 125 temp2 = step[13] * C13; 126 output[10] = temp1 + temp2; 127 128 temp1 = step[11] * C15; 129 temp2 = step[12] * C1; 130 output[11] = temp1 - temp2; 131 132 temp1 = step[11] * C1; 133 temp2 = step[12] * C15; 134 output[12] = temp2 + temp1; 135 136 temp1 = step[10] * C13; 137 temp2 = step[13] * C3; 138 output[13] = temp2 - temp1; 139 140 temp1 = step[ 9] * C5; 141 temp2 = step[14] * C11; 142 output[14] = temp2 + temp1; 143 144 temp1 = step[ 8] * C9; 145 temp2 = step[15] * C7; 146 output[15] = temp2 - temp1; 147 148 // step 3 149 step[ 0] = output[0] + output[3]; 150 step[ 1] = output[1] + output[2]; 151 step[ 2] = output[1] - output[2]; 152 step[ 3] = output[0] - output[3]; 153 154 temp1 = output[4] * C14; 155 temp2 = output[7] * C2; 156 step[ 4] = temp1 + temp2; 157 158 temp1 = output[5] * C10; 159 temp2 = output[6] * C6; 160 step[ 5] = temp1 + temp2; 161 162 temp1 = output[5] * C6; 163 temp2 = output[6] * C10; 164 step[ 6] = temp2 - temp1; 165 166 temp1 = output[4] * C2; 167 temp2 = output[7] * C14; 168 step[ 7] = temp2 - temp1; 169 170 step[ 8] = output[ 8] + output[11]; 171 step[ 9] = output[ 9] + output[10]; 172 step[10] = output[ 9] - output[10]; 173 step[11] = output[ 8] - output[11]; 174 175 step[12] = output[12] + output[15]; 176 step[13] = output[13] + output[14]; 177 step[14] = output[13] - output[14]; 178 step[15] = output[12] - output[15]; 179 180 // step 4 181 output[ 0] = (step[ 0] + step[ 1]); 182 output[ 8] = (step[ 0] - step[ 1]); 183 184 temp1 = step[2] * C12; 185 temp2 = step[3] * C4; 186 temp1 = temp1 + temp2; 187 output[ 4] = 2*(temp1 * C8); 188 189 temp1 = step[2] * C4; 190 temp2 = step[3] * C12; 191 temp1 = temp2 - temp1; 192 output[12] = 2 * (temp1 * C8); 193 194 output[ 2] = 2 * ((step[4] + step[ 5]) * C8); 195 output[14] = 2 * ((step[7] - step[ 6]) * C8); 196 197 temp1 = step[4] - step[5]; 198 temp2 = step[6] + step[7]; 199 output[ 6] = (temp1 + temp2); 200 output[10] = (temp1 - temp2); 201 202 intermediate[8] = step[8] + step[14]; 203 intermediate[9] = step[9] + step[15]; 204 205 temp1 = intermediate[8] * C12; 206 temp2 = intermediate[9] * C4; 207 temp1 = temp1 - temp2; 208 output[3] = 2 * (temp1 * C8); 209 210 temp1 = intermediate[8] * C4; 211 temp2 = intermediate[9] * C12; 212 temp1 = temp2 + temp1; 213 output[13] = 2 * (temp1 * C8); 214 215 output[ 9] = 2 * ((step[10] + step[11]) * C8); 216 217 intermediate[11] = step[10] - step[11]; 218 intermediate[12] = step[12] + step[13]; 219 intermediate[13] = step[12] - step[13]; 220 intermediate[14] = step[ 8] - step[14]; 221 intermediate[15] = step[ 9] - step[15]; 222 223 output[15] = (intermediate[11] + intermediate[12]); 224 output[ 1] = -(intermediate[11] - intermediate[12]); 225 226 output[ 7] = 2 * (intermediate[13] * C8); 227 228 temp1 = intermediate[14] * C12; 229 temp2 = intermediate[15] * C4; 230 temp1 = temp1 - temp2; 231 output[11] = -2 * (temp1 * C8); 232 233 temp1 = intermediate[14] * C4; 234 temp2 = intermediate[15] * C12; 235 temp1 = temp2 + temp1; 236 output[ 5] = 2 * (temp1 * C8); 237 } 238 239 void reference_16x16_dct_2d(int16_t input[256], double output[256]) { 240 // First transform columns 241 for (int i = 0; i < 16; ++i) { 242 double temp_in[16], temp_out[16]; 243 for (int j = 0; j < 16; ++j) 244 temp_in[j] = input[j * 16 + i]; 245 butterfly_16x16_dct_1d(temp_in, temp_out); 246 for (int j = 0; j < 16; ++j) 247 output[j * 16 + i] = temp_out[j]; 248 } 249 // Then transform rows 250 for (int i = 0; i < 16; ++i) { 251 double temp_in[16], temp_out[16]; 252 for (int j = 0; j < 16; ++j) 253 temp_in[j] = output[j + i * 16]; 254 butterfly_16x16_dct_1d(temp_in, temp_out); 255 // Scale by some magic number 256 for (int j = 0; j < 16; ++j) 257 output[j + i * 16] = temp_out[j]/2; 258 } 259 } 260 261 typedef void (*fdct_t)(const int16_t *in, int16_t *out, int stride); 262 typedef void (*idct_t)(const int16_t *in, uint8_t *out, int stride); 263 typedef void (*fht_t) (const int16_t *in, int16_t *out, int stride, 264 int tx_type); 265 typedef void (*iht_t) (const int16_t *in, uint8_t *out, int stride, 266 int tx_type); 267 268 typedef std::tr1::tuple<fdct_t, idct_t, int> dct_16x16_param_t; 269 typedef std::tr1::tuple<fht_t, iht_t, int> ht_16x16_param_t; 270 271 void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { 272 vp9_fdct16x16_c(in, out, stride); 273 } 274 275 void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { 276 vp9_fht16x16_c(in, out, stride, tx_type); 277 } 278 279 class Trans16x16TestBase { 280 public: 281 virtual ~Trans16x16TestBase() {} 282 283 protected: 284 virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0; 285 286 virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0; 287 288 void RunAccuracyCheck() { 289 ACMRandom rnd(ACMRandom::DeterministicSeed()); 290 uint32_t max_error = 0; 291 int64_t total_error = 0; 292 const int count_test_block = 10000; 293 for (int i = 0; i < count_test_block; ++i) { 294 DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs); 295 DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs); 296 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 297 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 298 299 // Initialize a test block with input range [-255, 255]. 300 for (int j = 0; j < kNumCoeffs; ++j) { 301 src[j] = rnd.Rand8(); 302 dst[j] = rnd.Rand8(); 303 test_input_block[j] = src[j] - dst[j]; 304 } 305 306 REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, 307 test_temp_block, pitch_)); 308 REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); 309 310 for (int j = 0; j < kNumCoeffs; ++j) { 311 const uint32_t diff = dst[j] - src[j]; 312 const uint32_t error = diff * diff; 313 if (max_error < error) 314 max_error = error; 315 total_error += error; 316 } 317 } 318 319 EXPECT_GE(1u, max_error) 320 << "Error: 16x16 FHT/IHT has an individual round trip error > 1"; 321 322 EXPECT_GE(count_test_block , total_error) 323 << "Error: 16x16 FHT/IHT has average round trip error > 1 per block"; 324 } 325 326 void RunCoeffCheck() { 327 ACMRandom rnd(ACMRandom::DeterministicSeed()); 328 const int count_test_block = 1000; 329 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 330 DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs); 331 DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs); 332 333 for (int i = 0; i < count_test_block; ++i) { 334 // Initialize a test block with input range [-255, 255]. 335 for (int j = 0; j < kNumCoeffs; ++j) 336 input_block[j] = rnd.Rand8() - rnd.Rand8(); 337 338 fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_); 339 REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_)); 340 341 // The minimum quant value is 4. 342 for (int j = 0; j < kNumCoeffs; ++j) 343 EXPECT_EQ(output_block[j], output_ref_block[j]); 344 } 345 } 346 347 void RunMemCheck() { 348 ACMRandom rnd(ACMRandom::DeterministicSeed()); 349 const int count_test_block = 1000; 350 DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs); 351 DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs); 352 DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs); 353 DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs); 354 355 for (int i = 0; i < count_test_block; ++i) { 356 // Initialize a test block with input range [-255, 255]. 357 for (int j = 0; j < kNumCoeffs; ++j) { 358 input_block[j] = rnd.Rand8() - rnd.Rand8(); 359 input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255; 360 } 361 if (i == 0) 362 for (int j = 0; j < kNumCoeffs; ++j) 363 input_extreme_block[j] = 255; 364 if (i == 1) 365 for (int j = 0; j < kNumCoeffs; ++j) 366 input_extreme_block[j] = -255; 367 368 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); 369 REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, 370 output_block, pitch_)); 371 372 // The minimum quant value is 4. 373 for (int j = 0; j < kNumCoeffs; ++j) { 374 EXPECT_EQ(output_block[j], output_ref_block[j]); 375 EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j])) 376 << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; 377 } 378 } 379 } 380 381 void RunInvAccuracyCheck() { 382 ACMRandom rnd(ACMRandom::DeterministicSeed()); 383 const int count_test_block = 1000; 384 DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs); 385 DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs); 386 DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs); 387 DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs); 388 389 for (int i = 0; i < count_test_block; ++i) { 390 double out_r[kNumCoeffs]; 391 392 // Initialize a test block with input range [-255, 255]. 393 for (int j = 0; j < kNumCoeffs; ++j) { 394 src[j] = rnd.Rand8(); 395 dst[j] = rnd.Rand8(); 396 in[j] = src[j] - dst[j]; 397 } 398 399 reference_16x16_dct_2d(in, out_r); 400 for (int j = 0; j < kNumCoeffs; ++j) 401 coeff[j] = round(out_r[j]); 402 403 REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16)); 404 405 for (int j = 0; j < kNumCoeffs; ++j) { 406 const uint32_t diff = dst[j] - src[j]; 407 const uint32_t error = diff * diff; 408 EXPECT_GE(1u, error) 409 << "Error: 16x16 IDCT has error " << error 410 << " at index " << j; 411 } 412 } 413 } 414 int pitch_; 415 int tx_type_; 416 fht_t fwd_txfm_ref; 417 }; 418 419 class Trans16x16DCT 420 : public Trans16x16TestBase, 421 public ::testing::TestWithParam<dct_16x16_param_t> { 422 public: 423 virtual ~Trans16x16DCT() {} 424 425 virtual void SetUp() { 426 fwd_txfm_ = GET_PARAM(0); 427 inv_txfm_ = GET_PARAM(1); 428 tx_type_ = GET_PARAM(2); 429 pitch_ = 16; 430 fwd_txfm_ref = fdct16x16_ref; 431 } 432 virtual void TearDown() { libvpx_test::ClearSystemState(); } 433 434 protected: 435 void RunFwdTxfm(int16_t *in, int16_t *out, int stride) { 436 fwd_txfm_(in, out, stride); 437 } 438 void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) { 439 inv_txfm_(out, dst, stride); 440 } 441 442 fdct_t fwd_txfm_; 443 idct_t inv_txfm_; 444 }; 445 446 TEST_P(Trans16x16DCT, AccuracyCheck) { 447 RunAccuracyCheck(); 448 } 449 450 TEST_P(Trans16x16DCT, CoeffCheck) { 451 RunCoeffCheck(); 452 } 453 454 TEST_P(Trans16x16DCT, MemCheck) { 455 RunMemCheck(); 456 } 457 458 TEST_P(Trans16x16DCT, InvAccuracyCheck) { 459 RunInvAccuracyCheck(); 460 } 461 462 class Trans16x16HT 463 : public Trans16x16TestBase, 464 public ::testing::TestWithParam<ht_16x16_param_t> { 465 public: 466 virtual ~Trans16x16HT() {} 467 468 virtual void SetUp() { 469 fwd_txfm_ = GET_PARAM(0); 470 inv_txfm_ = GET_PARAM(1); 471 tx_type_ = GET_PARAM(2); 472 pitch_ = 16; 473 fwd_txfm_ref = fht16x16_ref; 474 } 475 virtual void TearDown() { libvpx_test::ClearSystemState(); } 476 477 protected: 478 void RunFwdTxfm(int16_t *in, int16_t *out, int stride) { 479 fwd_txfm_(in, out, stride, tx_type_); 480 } 481 void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) { 482 inv_txfm_(out, dst, stride, tx_type_); 483 } 484 485 fht_t fwd_txfm_; 486 iht_t inv_txfm_; 487 }; 488 489 TEST_P(Trans16x16HT, AccuracyCheck) { 490 RunAccuracyCheck(); 491 } 492 493 TEST_P(Trans16x16HT, CoeffCheck) { 494 RunCoeffCheck(); 495 } 496 497 TEST_P(Trans16x16HT, MemCheck) { 498 RunMemCheck(); 499 } 500 501 using std::tr1::make_tuple; 502 503 INSTANTIATE_TEST_CASE_P( 504 C, Trans16x16DCT, 505 ::testing::Values( 506 make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0))); 507 INSTANTIATE_TEST_CASE_P( 508 C, Trans16x16HT, 509 ::testing::Values( 510 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0), 511 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1), 512 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2), 513 make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); 514 515 #if HAVE_NEON 516 INSTANTIATE_TEST_CASE_P( 517 NEON, Trans16x16DCT, 518 ::testing::Values( 519 make_tuple(&vp9_fdct16x16_c, 520 &vp9_idct16x16_256_add_neon, 0))); 521 #endif 522 523 #if HAVE_SSE2 524 INSTANTIATE_TEST_CASE_P( 525 SSE2, Trans16x16DCT, 526 ::testing::Values( 527 make_tuple(&vp9_fdct16x16_sse2, 528 &vp9_idct16x16_256_add_sse2, 0))); 529 INSTANTIATE_TEST_CASE_P( 530 SSE2, Trans16x16HT, 531 ::testing::Values( 532 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0), 533 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1), 534 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2), 535 make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3))); 536 #endif 537 } // namespace 538