1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cmath> 13 #include <cstdlib> 14 #include <string> 15 16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h" 17 18 #include "config/aom_config.h" 19 #include "config/aom_dsp_rtcd.h" 20 21 #include "aom_ports/mem.h" 22 #include "test/acm_random.h" 23 #include "test/clear_system_state.h" 24 #include "test/register_state_check.h" 25 #include "test/util.h" 26 #include "test/function_equivalence_test.h" 27 28 using libaom_test::ACMRandom; 29 using libaom_test::FunctionEquivalenceTest; 30 using ::testing::Combine; 31 using ::testing::Range; 32 using ::testing::Values; 33 using ::testing::ValuesIn; 34 35 namespace { 36 const int kNumIterations = 10000; 37 38 static const int16_t kInt13Max = (1 << 12) - 1; 39 40 typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width, 41 int height); 42 typedef libaom_test::FuncParam<SSI16Func> TestFuncs; 43 44 class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> { 45 public: 46 virtual ~SumSquaresTest() {} 47 virtual void SetUp() { 48 params_ = this->GetParam(); 49 rnd_.Reset(ACMRandom::DeterministicSeed()); 50 src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); 51 ASSERT_TRUE(src_ != NULL); 52 } 53 54 virtual void TearDown() { 55 libaom_test::ClearSystemState(); 56 aom_free(src_); 57 } 58 void RunTest(int isRandom); 59 void RunSpeedTest(); 60 61 void GenRandomData(int width, int height, int stride) { 62 const int msb = 11; // Up to 12 bit input 63 const int limit = 1 << (msb + 1); 64 for (int ii = 0; ii < height; ii++) { 65 for (int jj = 0; jj < width; jj++) { 66 src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); 67 } 68 } 69 } 70 71 void GenExtremeData(int width, int height, int stride) { 72 const int msb = 11; // Up to 12 bit input 73 const int limit = 1 << (msb + 1); 74 const int val = rnd_(2) ? limit - 1 : -(limit - 1); 75 for (int ii = 0; ii < height; ii++) { 76 for (int jj = 0; jj < width; jj++) { 77 src_[ii * stride + jj] = val; 78 } 79 } 80 } 81 82 protected: 83 TestFuncs params_; 84 int16_t *src_; 85 ACMRandom rnd_; 86 }; 87 88 void SumSquaresTest::RunTest(int isRandom) { 89 int failed = 0; 90 for (int k = 0; k < kNumIterations; k++) { 91 const int width = 4 * (rnd_(31) + 1); // Up to 128x128 92 const int height = 4 * (rnd_(31) + 1); // Up to 128x128 93 int stride = 4 << rnd_(7); // Up to 256 stride 94 while (stride < width) { // Make sure it's valid 95 stride = 4 << rnd_(7); 96 } 97 if (isRandom) { 98 GenRandomData(width, height, stride); 99 } else { 100 GenExtremeData(width, height, stride); 101 } 102 const uint64_t res_ref = params_.ref_func(src_, stride, width, height); 103 uint64_t res_tst; 104 ASM_REGISTER_STATE_CHECK(res_tst = 105 params_.tst_func(src_, stride, width, height)); 106 107 if (!failed) { 108 failed = res_ref != res_tst; 109 EXPECT_EQ(res_ref, res_tst) 110 << "Error: Sum Squares Test [" << width << "x" << height 111 << "] C output does not match optimized output."; 112 } 113 } 114 } 115 116 void SumSquaresTest::RunSpeedTest() { 117 for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { 118 const int width = block_size_wide[block]; // Up to 128x128 119 const int height = block_size_high[block]; // Up to 128x128 120 int stride = 4 << rnd_(7); // Up to 256 stride 121 while (stride < width) { // Make sure it's valid 122 stride = 4 << rnd_(7); 123 } 124 GenExtremeData(width, height, stride); 125 const int num_loops = 1000000000 / (width + height); 126 aom_usec_timer timer; 127 aom_usec_timer_start(&timer); 128 129 for (int i = 0; i < num_loops; ++i) 130 params_.ref_func(src_, stride, width, height); 131 132 aom_usec_timer_mark(&timer); 133 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 134 printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, 135 1000.0 * elapsed_time / num_loops); 136 137 aom_usec_timer timer1; 138 aom_usec_timer_start(&timer1); 139 for (int i = 0; i < num_loops; ++i) 140 params_.tst_func(src_, stride, width, height); 141 aom_usec_timer_mark(&timer1); 142 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); 143 printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, 144 1000.0 * elapsed_time1 / num_loops); 145 } 146 } 147 148 TEST_P(SumSquaresTest, OperationCheck) { 149 RunTest(1); // GenRandomData 150 } 151 152 TEST_P(SumSquaresTest, ExtremeValues) { 153 RunTest(0); // GenExtremeData 154 } 155 156 TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); } 157 158 #if HAVE_SSE2 159 160 INSTANTIATE_TEST_CASE_P( 161 SSE2, SumSquaresTest, 162 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 163 &aom_sum_squares_2d_i16_sse2))); 164 165 #endif // HAVE_SSE2 166 167 #if HAVE_AVX2 168 INSTANTIATE_TEST_CASE_P( 169 AVX2, SumSquaresTest, 170 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 171 &aom_sum_squares_2d_i16_avx2))); 172 #endif // HAVE_AVX2 173 174 ////////////////////////////////////////////////////////////////////////////// 175 // 1D version 176 ////////////////////////////////////////////////////////////////////////////// 177 178 typedef uint64_t (*F1D)(const int16_t *src, uint32_t N); 179 typedef libaom_test::FuncParam<F1D> TestFuncs1D; 180 181 class SumSquares1DTest : public FunctionEquivalenceTest<F1D> { 182 protected: 183 static const int kIterations = 1000; 184 static const int kMaxSize = 256; 185 }; 186 187 TEST_P(SumSquares1DTest, RandomValues) { 188 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); 189 190 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 191 for (int i = 0; i < kMaxSize * kMaxSize; ++i) 192 src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max; 193 194 const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize 195 : rng_(kMaxSize) + 1; 196 197 const uint64_t ref_res = params_.ref_func(src, N); 198 uint64_t tst_res; 199 ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N)); 200 201 ASSERT_EQ(ref_res, tst_res); 202 } 203 } 204 205 TEST_P(SumSquares1DTest, ExtremeValues) { 206 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); 207 208 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 209 if (rng_(2)) { 210 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max; 211 } else { 212 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max; 213 } 214 215 const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize 216 : rng_(kMaxSize) + 1; 217 218 const uint64_t ref_res = params_.ref_func(src, N); 219 uint64_t tst_res; 220 ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N)); 221 222 ASSERT_EQ(ref_res, tst_res); 223 } 224 } 225 226 #if HAVE_SSE2 227 INSTANTIATE_TEST_CASE_P(SSE2, SumSquares1DTest, 228 ::testing::Values(TestFuncs1D( 229 aom_sum_squares_i16_c, aom_sum_squares_i16_sse2))); 230 231 #endif // HAVE_SSE2 232 233 typedef int64_t (*sse_func)(const uint8_t *a, int a_stride, const uint8_t *b, 234 int b_stride, int width, int height); 235 typedef libaom_test::FuncParam<sse_func> TestSSEFuncs; 236 237 typedef ::testing::tuple<TestSSEFuncs, int> SSETestParam; 238 239 class SSETest : public ::testing::TestWithParam<SSETestParam> { 240 public: 241 virtual ~SSETest() {} 242 virtual void SetUp() { 243 params_ = GET_PARAM(0); 244 width_ = GET_PARAM(1); 245 isHbd_ = params_.ref_func == aom_highbd_sse_c; 246 rnd_.Reset(ACMRandom::DeterministicSeed()); 247 src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); 248 ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); 249 ASSERT_TRUE(src_ != NULL); 250 ASSERT_TRUE(ref_ != NULL); 251 } 252 253 virtual void TearDown() { 254 libaom_test::ClearSystemState(); 255 aom_free(src_); 256 aom_free(ref_); 257 } 258 void RunTest(int isRandom, int width, int height, int run_times); 259 260 void GenRandomData(int width, int height, int stride) { 261 uint16_t *pSrc = (uint16_t *)src_; 262 uint16_t *pRef = (uint16_t *)ref_; 263 const int msb = 11; // Up to 12 bit input 264 const int limit = 1 << (msb + 1); 265 for (int ii = 0; ii < height; ii++) { 266 for (int jj = 0; jj < width; jj++) { 267 if (!isHbd_) { 268 src_[ii * stride + jj] = rnd_.Rand8(); 269 ref_[ii * stride + jj] = rnd_.Rand8(); 270 } else { 271 pSrc[ii * stride + jj] = rnd_(limit); 272 pRef[ii * stride + jj] = rnd_(limit); 273 } 274 } 275 } 276 } 277 278 void GenExtremeData(int width, int height, int stride, uint8_t *data, 279 int16_t val) { 280 uint16_t *pData = (uint16_t *)data; 281 for (int ii = 0; ii < height; ii++) { 282 for (int jj = 0; jj < width; jj++) { 283 if (!isHbd_) { 284 data[ii * stride + jj] = (uint8_t)val; 285 } else { 286 pData[ii * stride + jj] = val; 287 } 288 } 289 } 290 } 291 292 protected: 293 int isHbd_; 294 int width_; 295 TestSSEFuncs params_; 296 uint8_t *src_; 297 uint8_t *ref_; 298 ACMRandom rnd_; 299 }; 300 301 void SSETest::RunTest(int isRandom, int width, int height, int run_times) { 302 int failed = 0; 303 aom_usec_timer ref_timer, test_timer; 304 for (int k = 0; k < 3; k++) { 305 int stride = 4 << rnd_(7); // Up to 256 stride 306 while (stride < width) { // Make sure it's valid 307 stride = 4 << rnd_(7); 308 } 309 if (isRandom) { 310 GenRandomData(width, height, stride); 311 } else { 312 const int msb = isHbd_ ? 12 : 8; // Up to 12 bit input 313 const int limit = (1 << msb) - 1; 314 if (k == 0) { 315 GenExtremeData(width, height, stride, src_, 0); 316 GenExtremeData(width, height, stride, ref_, limit); 317 } else { 318 GenExtremeData(width, height, stride, src_, limit); 319 GenExtremeData(width, height, stride, ref_, 0); 320 } 321 } 322 int64_t res_ref, res_tst; 323 uint8_t *pSrc = src_; 324 uint8_t *pRef = ref_; 325 if (isHbd_) { 326 pSrc = CONVERT_TO_BYTEPTR(src_); 327 pRef = CONVERT_TO_BYTEPTR(ref_); 328 } 329 res_ref = params_.ref_func(pSrc, stride, pRef, stride, width, height); 330 res_tst = params_.tst_func(pSrc, stride, pRef, stride, width, height); 331 if (run_times > 1) { 332 aom_usec_timer_start(&ref_timer); 333 for (int j = 0; j < run_times; j++) { 334 params_.ref_func(pSrc, stride, pRef, stride, width, height); 335 } 336 aom_usec_timer_mark(&ref_timer); 337 const int elapsed_time_c = 338 static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); 339 340 aom_usec_timer_start(&test_timer); 341 for (int j = 0; j < run_times; j++) { 342 params_.tst_func(pSrc, stride, pRef, stride, width, height); 343 } 344 aom_usec_timer_mark(&test_timer); 345 const int elapsed_time_simd = 346 static_cast<int>(aom_usec_timer_elapsed(&test_timer)); 347 348 printf( 349 "c_time=%d \t simd_time=%d \t " 350 "gain=%d\n", 351 elapsed_time_c, elapsed_time_simd, 352 (elapsed_time_c / elapsed_time_simd)); 353 } else { 354 if (!failed) { 355 failed = res_ref != res_tst; 356 EXPECT_EQ(res_ref, res_tst) 357 << "Error:" << (isHbd_ ? "hbd " : " ") << k << " SSE Test [" 358 << width << "x" << height 359 << "] C output does not match optimized output."; 360 } 361 } 362 } 363 } 364 365 TEST_P(SSETest, OperationCheck) { 366 for (int height = 4; height <= 128; height += 4) { 367 RunTest(1, width_, height, 1); // GenRandomData 368 } 369 } 370 371 TEST_P(SSETest, ExtremeValues) { 372 for (int height = 4; height <= 128; height += 4) { 373 RunTest(0, width_, height, 1); 374 } 375 } 376 377 TEST_P(SSETest, DISABLED_Speed) { 378 for (int height = 4; height <= 128; height += 4) { 379 RunTest(1, width_, height, 100); 380 } 381 } 382 #if HAVE_SSE4_1 383 TestSSEFuncs sse_sse4[] = { TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1), 384 TestSSEFuncs(&aom_highbd_sse_c, 385 &aom_highbd_sse_sse4_1) }; 386 INSTANTIATE_TEST_CASE_P(SSE4_1, SSETest, 387 Combine(ValuesIn(sse_sse4), Range(4, 129, 4))); 388 #endif // HAVE_SSE4_1 389 390 #if HAVE_AVX2 391 392 TestSSEFuncs sse_avx2[] = { TestSSEFuncs(&aom_sse_c, &aom_sse_avx2), 393 TestSSEFuncs(&aom_highbd_sse_c, 394 &aom_highbd_sse_avx2) }; 395 INSTANTIATE_TEST_CASE_P(AVX2, SSETest, 396 Combine(ValuesIn(sse_avx2), Range(4, 129, 4))); 397 #endif // HAVE_AVX2 398 } // namespace 399