1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> 12 13 #include "third_party/googletest/src/include/gtest/gtest.h" 14 15 #include "./vp9_rtcd.h" 16 #include "./vpx_config.h" 17 #include "./vpx_dsp_rtcd.h" 18 #include "test/acm_random.h" 19 #include "test/clear_system_state.h" 20 #include "test/register_state_check.h" 21 #include "test/util.h" 22 #include "vp9/common/vp9_common.h" 23 #include "vp9/common/vp9_filter.h" 24 #include "vpx_dsp/vpx_dsp_common.h" 25 #include "vpx_dsp/vpx_filter.h" 26 #include "vpx_mem/vpx_mem.h" 27 #include "vpx_ports/mem.h" 28 #include "vpx_ports/vpx_timer.h" 29 30 namespace { 31 32 static const unsigned int kMaxDimension = 64; 33 34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, 35 uint8_t *dst, ptrdiff_t dst_stride, 36 const InterpKernel *filter, int x0_q4, 37 int x_step_q4, int y0_q4, int y_step_q4, int w, 38 int h); 39 40 typedef void (*WrapperFilterBlock2d8Func)( 41 const uint8_t *src_ptr, const unsigned int src_stride, 42 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, 43 unsigned int dst_stride, unsigned int output_width, 44 unsigned int output_height, int use_highbd); 45 46 struct ConvolveFunctions { 47 ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8, 48 ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg, 49 ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8, 50 ConvolveFunc sh8_avg, ConvolveFunc sv8, 51 ConvolveFunc sv8_avg, ConvolveFunc shv8, 52 ConvolveFunc shv8_avg, int bd) 53 : use_highbd_(bd) { 54 copy_[0] = copy; 55 copy_[1] = avg; 56 h8_[0] = h8; 57 h8_[1] = h8_avg; 58 v8_[0] = v8; 59 v8_[1] = v8_avg; 60 hv8_[0] = hv8; 61 hv8_[1] = hv8_avg; 62 sh8_[0] = sh8; 63 sh8_[1] = sh8_avg; 64 sv8_[0] = sv8; 65 sv8_[1] = sv8_avg; 66 shv8_[0] = shv8; 67 shv8_[1] = shv8_avg; 68 } 69 70 ConvolveFunc copy_[2]; 71 ConvolveFunc h8_[2]; 72 ConvolveFunc v8_[2]; 73 ConvolveFunc hv8_[2]; 74 ConvolveFunc sh8_[2]; // scaled horiz 75 ConvolveFunc sv8_[2]; // scaled vert 76 ConvolveFunc shv8_[2]; // scaled horiz/vert 77 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. 78 }; 79 80 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; 81 82 #define ALL_SIZES(convolve_fn) \ 83 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ 84 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ 85 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ 86 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ 87 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ 88 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ 89 make_tuple(64, 64, &convolve_fn) 90 91 // Reference 8-tap subpixel filter, slightly modified to fit into this test. 92 #define VP9_FILTER_WEIGHT 128 93 #define VP9_FILTER_SHIFT 7 94 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } 95 96 void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, 97 const int16_t *hfilter, const int16_t *vfilter, 98 uint8_t *dst_ptr, unsigned int dst_stride, 99 unsigned int output_width, unsigned int output_height) { 100 // Between passes, we use an intermediate buffer whose height is extended to 101 // have enough horizontally filtered values as input for the vertical pass. 102 // This buffer is allocated to be big enough for the largest block type we 103 // support. 104 const int kInterp_Extend = 4; 105 const unsigned int intermediate_height = 106 (kInterp_Extend - 1) + output_height + kInterp_Extend; 107 unsigned int i, j; 108 109 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, 110 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 111 // + kInterp_Extend 112 // = 3 + 16 + 4 113 // = 23 114 // and filter_max_width = 16 115 // 116 uint8_t intermediate_buffer[71 * kMaxDimension]; 117 const int intermediate_next_stride = 118 1 - static_cast<int>(intermediate_height * output_width); 119 120 // Horizontal pass (src -> transposed intermediate). 121 uint8_t *output_ptr = intermediate_buffer; 122 const int src_next_row_stride = src_stride - output_width; 123 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 124 for (i = 0; i < intermediate_height; ++i) { 125 for (j = 0; j < output_width; ++j) { 126 // Apply filter... 127 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + 128 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + 129 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + 130 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + 131 (VP9_FILTER_WEIGHT >> 1); // Rounding 132 133 // Normalize back to 0-255... 134 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); 135 ++src_ptr; 136 output_ptr += intermediate_height; 137 } 138 src_ptr += src_next_row_stride; 139 output_ptr += intermediate_next_stride; 140 } 141 142 // Vertical pass (transposed intermediate -> dst). 143 src_ptr = intermediate_buffer; 144 const int dst_next_row_stride = dst_stride - output_width; 145 for (i = 0; i < output_height; ++i) { 146 for (j = 0; j < output_width; ++j) { 147 // Apply filter... 148 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + 149 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + 150 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + 151 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + 152 (VP9_FILTER_WEIGHT >> 1); // Rounding 153 154 // Normalize back to 0-255... 155 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); 156 src_ptr += intermediate_height; 157 } 158 src_ptr += intermediate_next_stride; 159 dst_ptr += dst_next_row_stride; 160 } 161 } 162 163 void block2d_average_c(uint8_t *src, unsigned int src_stride, 164 uint8_t *output_ptr, unsigned int output_stride, 165 unsigned int output_width, unsigned int output_height) { 166 unsigned int i, j; 167 for (i = 0; i < output_height; ++i) { 168 for (j = 0; j < output_width; ++j) { 169 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 170 } 171 output_ptr += output_stride; 172 } 173 } 174 175 void filter_average_block2d_8_c(const uint8_t *src_ptr, 176 const unsigned int src_stride, 177 const int16_t *hfilter, const int16_t *vfilter, 178 uint8_t *dst_ptr, unsigned int dst_stride, 179 unsigned int output_width, 180 unsigned int output_height) { 181 uint8_t tmp[kMaxDimension * kMaxDimension]; 182 183 assert(output_width <= kMaxDimension); 184 assert(output_height <= kMaxDimension); 185 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, 186 output_width, output_height); 187 block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height); 188 } 189 190 #if CONFIG_VP9_HIGHBITDEPTH 191 void highbd_filter_block2d_8_c(const uint16_t *src_ptr, 192 const unsigned int src_stride, 193 const int16_t *hfilter, const int16_t *vfilter, 194 uint16_t *dst_ptr, unsigned int dst_stride, 195 unsigned int output_width, 196 unsigned int output_height, int bd) { 197 // Between passes, we use an intermediate buffer whose height is extended to 198 // have enough horizontally filtered values as input for the vertical pass. 199 // This buffer is allocated to be big enough for the largest block type we 200 // support. 201 const int kInterp_Extend = 4; 202 const unsigned int intermediate_height = 203 (kInterp_Extend - 1) + output_height + kInterp_Extend; 204 205 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, 206 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 207 * + kInterp_Extend 208 * = 3 + 16 + 4 209 * = 23 210 * and filter_max_width = 16 211 */ 212 uint16_t intermediate_buffer[71 * kMaxDimension]; 213 const int intermediate_next_stride = 214 1 - static_cast<int>(intermediate_height * output_width); 215 216 // Horizontal pass (src -> transposed intermediate). 217 { 218 uint16_t *output_ptr = intermediate_buffer; 219 const int src_next_row_stride = src_stride - output_width; 220 unsigned int i, j; 221 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 222 for (i = 0; i < intermediate_height; ++i) { 223 for (j = 0; j < output_width; ++j) { 224 // Apply filter... 225 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + 226 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + 227 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + 228 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + 229 (VP9_FILTER_WEIGHT >> 1); // Rounding 230 231 // Normalize back to 0-255... 232 *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 233 ++src_ptr; 234 output_ptr += intermediate_height; 235 } 236 src_ptr += src_next_row_stride; 237 output_ptr += intermediate_next_stride; 238 } 239 } 240 241 // Vertical pass (transposed intermediate -> dst). 242 { 243 uint16_t *src_ptr = intermediate_buffer; 244 const int dst_next_row_stride = dst_stride - output_width; 245 unsigned int i, j; 246 for (i = 0; i < output_height; ++i) { 247 for (j = 0; j < output_width; ++j) { 248 // Apply filter... 249 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + 250 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + 251 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + 252 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + 253 (VP9_FILTER_WEIGHT >> 1); // Rounding 254 255 // Normalize back to 0-255... 256 *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); 257 src_ptr += intermediate_height; 258 } 259 src_ptr += intermediate_next_stride; 260 dst_ptr += dst_next_row_stride; 261 } 262 } 263 } 264 265 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, 266 uint16_t *output_ptr, unsigned int output_stride, 267 unsigned int output_width, 268 unsigned int output_height) { 269 unsigned int i, j; 270 for (i = 0; i < output_height; ++i) { 271 for (j = 0; j < output_width; ++j) { 272 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 273 } 274 output_ptr += output_stride; 275 } 276 } 277 278 void highbd_filter_average_block2d_8_c( 279 const uint16_t *src_ptr, const unsigned int src_stride, 280 const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr, 281 unsigned int dst_stride, unsigned int output_width, 282 unsigned int output_height, int bd) { 283 uint16_t tmp[kMaxDimension * kMaxDimension]; 284 285 assert(output_width <= kMaxDimension); 286 assert(output_height <= kMaxDimension); 287 highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, 288 output_width, output_height, bd); 289 highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, 290 output_height); 291 } 292 #endif // CONFIG_VP9_HIGHBITDEPTH 293 294 void wrapper_filter_average_block2d_8_c( 295 const uint8_t *src_ptr, const unsigned int src_stride, 296 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, 297 unsigned int dst_stride, unsigned int output_width, 298 unsigned int output_height, int use_highbd) { 299 #if CONFIG_VP9_HIGHBITDEPTH 300 if (use_highbd == 0) { 301 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 302 dst_stride, output_width, output_height); 303 } else { 304 highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, 305 hfilter, vfilter, 306 CAST_TO_SHORTPTR(dst_ptr), dst_stride, 307 output_width, output_height, use_highbd); 308 } 309 #else 310 ASSERT_EQ(0, use_highbd); 311 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 312 dst_stride, output_width, output_height); 313 #endif 314 } 315 316 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, 317 const unsigned int src_stride, 318 const int16_t *hfilter, const int16_t *vfilter, 319 uint8_t *dst_ptr, unsigned int dst_stride, 320 unsigned int output_width, 321 unsigned int output_height, int use_highbd) { 322 #if CONFIG_VP9_HIGHBITDEPTH 323 if (use_highbd == 0) { 324 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, 325 dst_stride, output_width, output_height); 326 } else { 327 highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter, 328 vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride, 329 output_width, output_height, use_highbd); 330 } 331 #else 332 ASSERT_EQ(0, use_highbd); 333 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, 334 output_width, output_height); 335 #endif 336 } 337 338 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { 339 public: 340 static void SetUpTestCase() { 341 // Force input_ to be unaligned, output to be 16 byte aligned. 342 input_ = reinterpret_cast<uint8_t *>( 343 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 344 1; 345 output_ = reinterpret_cast<uint8_t *>( 346 vpx_memalign(kDataAlignment, kOutputBufferSize)); 347 output_ref_ = reinterpret_cast<uint8_t *>( 348 vpx_memalign(kDataAlignment, kOutputBufferSize)); 349 #if CONFIG_VP9_HIGHBITDEPTH 350 input16_ = reinterpret_cast<uint16_t *>(vpx_memalign( 351 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + 352 1; 353 output16_ = reinterpret_cast<uint16_t *>( 354 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 355 output16_ref_ = reinterpret_cast<uint16_t *>( 356 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 357 #endif 358 } 359 360 virtual void TearDown() { libvpx_test::ClearSystemState(); } 361 362 static void TearDownTestCase() { 363 vpx_free(input_ - 1); 364 input_ = NULL; 365 vpx_free(output_); 366 output_ = NULL; 367 vpx_free(output_ref_); 368 output_ref_ = NULL; 369 #if CONFIG_VP9_HIGHBITDEPTH 370 vpx_free(input16_ - 1); 371 input16_ = NULL; 372 vpx_free(output16_); 373 output16_ = NULL; 374 vpx_free(output16_ref_); 375 output16_ref_ = NULL; 376 #endif 377 } 378 379 protected: 380 static const int kDataAlignment = 16; 381 static const int kOuterBlockSize = 256; 382 static const int kInputStride = kOuterBlockSize; 383 static const int kOutputStride = kOuterBlockSize; 384 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; 385 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; 386 387 int Width() const { return GET_PARAM(0); } 388 int Height() const { return GET_PARAM(1); } 389 int BorderLeft() const { 390 const int center = (kOuterBlockSize - Width()) / 2; 391 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 392 } 393 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 394 395 bool IsIndexInBorder(int i) { 396 return (i < BorderTop() * kOuterBlockSize || 397 i >= (BorderTop() + Height()) * kOuterBlockSize || 398 i % kOuterBlockSize < BorderLeft() || 399 i % kOuterBlockSize >= (BorderLeft() + Width())); 400 } 401 402 virtual void SetUp() { 403 UUT_ = GET_PARAM(2); 404 #if CONFIG_VP9_HIGHBITDEPTH 405 if (UUT_->use_highbd_ != 0) { 406 mask_ = (1 << UUT_->use_highbd_) - 1; 407 } else { 408 mask_ = 255; 409 } 410 #endif 411 /* Set up guard blocks for an inner block centered in the outer block */ 412 for (int i = 0; i < kOutputBufferSize; ++i) { 413 if (IsIndexInBorder(i)) { 414 output_[i] = 255; 415 } else { 416 output_[i] = 0; 417 } 418 } 419 420 ::libvpx_test::ACMRandom prng; 421 for (int i = 0; i < kInputBufferSize; ++i) { 422 if (i & 1) { 423 input_[i] = 255; 424 #if CONFIG_VP9_HIGHBITDEPTH 425 input16_[i] = mask_; 426 #endif 427 } else { 428 input_[i] = prng.Rand8Extremes(); 429 #if CONFIG_VP9_HIGHBITDEPTH 430 input16_[i] = prng.Rand16() & mask_; 431 #endif 432 } 433 } 434 } 435 436 void SetConstantInput(int value) { 437 memset(input_, value, kInputBufferSize); 438 #if CONFIG_VP9_HIGHBITDEPTH 439 vpx_memset16(input16_, value, kInputBufferSize); 440 #endif 441 } 442 443 void CopyOutputToRef() { 444 memcpy(output_ref_, output_, kOutputBufferSize); 445 #if CONFIG_VP9_HIGHBITDEPTH 446 memcpy(output16_ref_, output16_, 447 kOutputBufferSize * sizeof(output16_ref_[0])); 448 #endif 449 } 450 451 void CheckGuardBlocks() { 452 for (int i = 0; i < kOutputBufferSize; ++i) { 453 if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]); 454 } 455 } 456 457 uint8_t *input() const { 458 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 459 #if CONFIG_VP9_HIGHBITDEPTH 460 if (UUT_->use_highbd_ == 0) { 461 return input_ + offset; 462 } else { 463 return CAST_TO_BYTEPTR(input16_ + offset); 464 } 465 #else 466 return input_ + offset; 467 #endif 468 } 469 470 uint8_t *output() const { 471 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 472 #if CONFIG_VP9_HIGHBITDEPTH 473 if (UUT_->use_highbd_ == 0) { 474 return output_ + offset; 475 } else { 476 return CAST_TO_BYTEPTR(output16_ + offset); 477 } 478 #else 479 return output_ + offset; 480 #endif 481 } 482 483 uint8_t *output_ref() const { 484 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 485 #if CONFIG_VP9_HIGHBITDEPTH 486 if (UUT_->use_highbd_ == 0) { 487 return output_ref_ + offset; 488 } else { 489 return CAST_TO_BYTEPTR(output16_ref_ + offset); 490 } 491 #else 492 return output_ref_ + offset; 493 #endif 494 } 495 496 uint16_t lookup(uint8_t *list, int index) const { 497 #if CONFIG_VP9_HIGHBITDEPTH 498 if (UUT_->use_highbd_ == 0) { 499 return list[index]; 500 } else { 501 return CAST_TO_SHORTPTR(list)[index]; 502 } 503 #else 504 return list[index]; 505 #endif 506 } 507 508 void assign_val(uint8_t *list, int index, uint16_t val) const { 509 #if CONFIG_VP9_HIGHBITDEPTH 510 if (UUT_->use_highbd_ == 0) { 511 list[index] = (uint8_t)val; 512 } else { 513 CAST_TO_SHORTPTR(list)[index] = val; 514 } 515 #else 516 list[index] = (uint8_t)val; 517 #endif 518 } 519 520 const ConvolveFunctions *UUT_; 521 static uint8_t *input_; 522 static uint8_t *output_; 523 static uint8_t *output_ref_; 524 #if CONFIG_VP9_HIGHBITDEPTH 525 static uint16_t *input16_; 526 static uint16_t *output16_; 527 static uint16_t *output16_ref_; 528 int mask_; 529 #endif 530 }; 531 532 uint8_t *ConvolveTest::input_ = NULL; 533 uint8_t *ConvolveTest::output_ = NULL; 534 uint8_t *ConvolveTest::output_ref_ = NULL; 535 #if CONFIG_VP9_HIGHBITDEPTH 536 uint16_t *ConvolveTest::input16_ = NULL; 537 uint16_t *ConvolveTest::output16_ = NULL; 538 uint16_t *ConvolveTest::output16_ref_ = NULL; 539 #endif 540 541 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); } 542 543 TEST_P(ConvolveTest, DISABLED_Copy_Speed) { 544 const uint8_t *const in = input(); 545 uint8_t *const out = output(); 546 const int kNumTests = 5000000; 547 const int width = Width(); 548 const int height = Height(); 549 vpx_usec_timer timer; 550 551 vpx_usec_timer_start(&timer); 552 for (int n = 0; n < kNumTests; ++n) { 553 UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, 554 width, height); 555 } 556 vpx_usec_timer_mark(&timer); 557 558 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 559 printf("convolve_copy_%dx%d_%d: %d us\n", width, height, 560 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 561 } 562 563 TEST_P(ConvolveTest, DISABLED_Avg_Speed) { 564 const uint8_t *const in = input(); 565 uint8_t *const out = output(); 566 const int kNumTests = 5000000; 567 const int width = Width(); 568 const int height = Height(); 569 vpx_usec_timer timer; 570 571 vpx_usec_timer_start(&timer); 572 for (int n = 0; n < kNumTests; ++n) { 573 UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, 574 width, height); 575 } 576 vpx_usec_timer_mark(&timer); 577 578 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 579 printf("convolve_avg_%dx%d_%d: %d us\n", width, height, 580 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 581 } 582 583 TEST_P(ConvolveTest, DISABLED_Scale_Speed) { 584 const uint8_t *const in = input(); 585 uint8_t *const out = output(); 586 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP]; 587 const int kNumTests = 5000000; 588 const int width = Width(); 589 const int height = Height(); 590 vpx_usec_timer timer; 591 592 SetConstantInput(127); 593 594 vpx_usec_timer_start(&timer); 595 for (int n = 0; n < kNumTests; ++n) { 596 UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, 597 width, height); 598 } 599 vpx_usec_timer_mark(&timer); 600 601 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 602 printf("convolve_scale_%dx%d_%d: %d us\n", width, height, 603 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 604 } 605 606 TEST_P(ConvolveTest, DISABLED_8Tap_Speed) { 607 const uint8_t *const in = input(); 608 uint8_t *const out = output(); 609 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; 610 const int kNumTests = 5000000; 611 const int width = Width(); 612 const int height = Height(); 613 vpx_usec_timer timer; 614 615 SetConstantInput(127); 616 617 vpx_usec_timer_start(&timer); 618 for (int n = 0; n < kNumTests; ++n) { 619 UUT_->hv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, 620 width, height); 621 } 622 vpx_usec_timer_mark(&timer); 623 624 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 625 printf("convolve8_%dx%d_%d: %d us\n", width, height, 626 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 627 } 628 629 TEST_P(ConvolveTest, DISABLED_8Tap_Horiz_Speed) { 630 const uint8_t *const in = input(); 631 uint8_t *const out = output(); 632 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; 633 const int kNumTests = 5000000; 634 const int width = Width(); 635 const int height = Height(); 636 vpx_usec_timer timer; 637 638 SetConstantInput(127); 639 640 vpx_usec_timer_start(&timer); 641 for (int n = 0; n < kNumTests; ++n) { 642 UUT_->h8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, 643 width, height); 644 } 645 vpx_usec_timer_mark(&timer); 646 647 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 648 printf("convolve8_horiz_%dx%d_%d: %d us\n", width, height, 649 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 650 } 651 652 TEST_P(ConvolveTest, DISABLED_8Tap_Vert_Speed) { 653 const uint8_t *const in = input(); 654 uint8_t *const out = output(); 655 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; 656 const int kNumTests = 5000000; 657 const int width = Width(); 658 const int height = Height(); 659 vpx_usec_timer timer; 660 661 SetConstantInput(127); 662 663 vpx_usec_timer_start(&timer); 664 for (int n = 0; n < kNumTests; ++n) { 665 UUT_->v8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, 666 width, height); 667 } 668 vpx_usec_timer_mark(&timer); 669 670 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 671 printf("convolve8_vert_%dx%d_%d: %d us\n", width, height, 672 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 673 } 674 675 TEST_P(ConvolveTest, DISABLED_8Tap_Avg_Speed) { 676 const uint8_t *const in = input(); 677 uint8_t *const out = output(); 678 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; 679 const int kNumTests = 5000000; 680 const int width = Width(); 681 const int height = Height(); 682 vpx_usec_timer timer; 683 684 SetConstantInput(127); 685 686 vpx_usec_timer_start(&timer); 687 for (int n = 0; n < kNumTests; ++n) { 688 UUT_->hv8_[1](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, 689 width, height); 690 } 691 vpx_usec_timer_mark(&timer); 692 693 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer)); 694 printf("convolve8_avg_%dx%d_%d: %d us\n", width, height, 695 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); 696 } 697 698 TEST_P(ConvolveTest, Copy) { 699 uint8_t *const in = input(); 700 uint8_t *const out = output(); 701 702 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride, 703 NULL, 0, 0, 0, 0, Width(), Height())); 704 705 CheckGuardBlocks(); 706 707 for (int y = 0; y < Height(); ++y) { 708 for (int x = 0; x < Width(); ++x) 709 ASSERT_EQ(lookup(out, y * kOutputStride + x), 710 lookup(in, y * kInputStride + x)) 711 << "(" << x << "," << y << ")"; 712 } 713 } 714 715 TEST_P(ConvolveTest, Avg) { 716 uint8_t *const in = input(); 717 uint8_t *const out = output(); 718 uint8_t *const out_ref = output_ref(); 719 CopyOutputToRef(); 720 721 ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride, 722 NULL, 0, 0, 0, 0, Width(), Height())); 723 724 CheckGuardBlocks(); 725 726 for (int y = 0; y < Height(); ++y) { 727 for (int x = 0; x < Width(); ++x) 728 ASSERT_EQ(lookup(out, y * kOutputStride + x), 729 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + 730 lookup(out_ref, y * kOutputStride + x), 731 1)) 732 << "(" << x << "," << y << ")"; 733 } 734 } 735 736 TEST_P(ConvolveTest, CopyHoriz) { 737 uint8_t *const in = input(); 738 uint8_t *const out = output(); 739 740 ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride, 741 vp9_filter_kernels[0], 0, 16, 0, 16, 742 Width(), Height())); 743 744 CheckGuardBlocks(); 745 746 for (int y = 0; y < Height(); ++y) { 747 for (int x = 0; x < Width(); ++x) 748 ASSERT_EQ(lookup(out, y * kOutputStride + x), 749 lookup(in, y * kInputStride + x)) 750 << "(" << x << "," << y << ")"; 751 } 752 } 753 754 TEST_P(ConvolveTest, CopyVert) { 755 uint8_t *const in = input(); 756 uint8_t *const out = output(); 757 758 ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride, 759 vp9_filter_kernels[0], 0, 16, 0, 16, 760 Width(), Height())); 761 762 CheckGuardBlocks(); 763 764 for (int y = 0; y < Height(); ++y) { 765 for (int x = 0; x < Width(); ++x) 766 ASSERT_EQ(lookup(out, y * kOutputStride + x), 767 lookup(in, y * kInputStride + x)) 768 << "(" << x << "," << y << ")"; 769 } 770 } 771 772 TEST_P(ConvolveTest, Copy2D) { 773 uint8_t *const in = input(); 774 uint8_t *const out = output(); 775 776 ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride, 777 vp9_filter_kernels[0], 0, 16, 0, 16, 778 Width(), Height())); 779 780 CheckGuardBlocks(); 781 782 for (int y = 0; y < Height(); ++y) { 783 for (int x = 0; x < Width(); ++x) 784 ASSERT_EQ(lookup(out, y * kOutputStride + x), 785 lookup(in, y * kInputStride + x)) 786 << "(" << x << "," << y << ")"; 787 } 788 } 789 790 const int kNumFilterBanks = 4; 791 const int kNumFilters = 16; 792 793 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 794 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 795 const InterpKernel *filters = 796 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 797 for (int i = 0; i < kNumFilters; i++) { 798 const int p0 = filters[i][0] + filters[i][1]; 799 const int p1 = filters[i][2] + filters[i][3]; 800 const int p2 = filters[i][4] + filters[i][5]; 801 const int p3 = filters[i][6] + filters[i][7]; 802 EXPECT_LE(p0, 128); 803 EXPECT_LE(p1, 128); 804 EXPECT_LE(p2, 128); 805 EXPECT_LE(p3, 128); 806 EXPECT_LE(p0 + p3, 128); 807 EXPECT_LE(p0 + p3 + p1, 128); 808 EXPECT_LE(p0 + p3 + p1 + p2, 128); 809 EXPECT_EQ(p0 + p1 + p2 + p3, 128); 810 } 811 } 812 } 813 814 const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = { 815 wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c 816 }; 817 818 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { 819 for (int i = 0; i < 2; ++i) { 820 uint8_t *const in = input(); 821 uint8_t *const out = output(); 822 #if CONFIG_VP9_HIGHBITDEPTH 823 uint8_t ref8[kOutputStride * kMaxDimension]; 824 uint16_t ref16[kOutputStride * kMaxDimension]; 825 uint8_t *ref; 826 if (UUT_->use_highbd_ == 0) { 827 ref = ref8; 828 } else { 829 ref = CAST_TO_BYTEPTR(ref16); 830 } 831 #else 832 uint8_t ref[kOutputStride * kMaxDimension]; 833 #endif 834 835 // Populate ref and out with some random data 836 ::libvpx_test::ACMRandom prng; 837 for (int y = 0; y < Height(); ++y) { 838 for (int x = 0; x < Width(); ++x) { 839 uint16_t r; 840 #if CONFIG_VP9_HIGHBITDEPTH 841 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 842 r = prng.Rand8Extremes(); 843 } else { 844 r = prng.Rand16() & mask_; 845 } 846 #else 847 r = prng.Rand8Extremes(); 848 #endif 849 850 assign_val(out, y * kOutputStride + x, r); 851 assign_val(ref, y * kOutputStride + x, r); 852 } 853 } 854 855 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 856 const InterpKernel *filters = 857 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 858 859 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 860 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 861 wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x], 862 filters[filter_y], ref, kOutputStride, 863 Width(), Height(), UUT_->use_highbd_); 864 865 if (filter_x && filter_y) 866 ASM_REGISTER_STATE_CHECK( 867 UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters, 868 filter_x, 16, filter_y, 16, Width(), Height())); 869 else if (filter_y) 870 ASM_REGISTER_STATE_CHECK( 871 UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0, 872 16, filter_y, 16, Width(), Height())); 873 else if (filter_x) 874 ASM_REGISTER_STATE_CHECK( 875 UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters, 876 filter_x, 16, 0, 16, Width(), Height())); 877 else 878 ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out, 879 kOutputStride, NULL, 0, 0, 880 0, 0, Width(), Height())); 881 882 CheckGuardBlocks(); 883 884 for (int y = 0; y < Height(); ++y) { 885 for (int x = 0; x < Width(); ++x) 886 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 887 lookup(out, y * kOutputStride + x)) 888 << "mismatch at (" << x << "," << y << "), " 889 << "filters (" << filter_bank << "," << filter_x << "," 890 << filter_y << ")"; 891 } 892 } 893 } 894 } 895 } 896 } 897 898 TEST_P(ConvolveTest, FilterExtremes) { 899 uint8_t *const in = input(); 900 uint8_t *const out = output(); 901 #if CONFIG_VP9_HIGHBITDEPTH 902 uint8_t ref8[kOutputStride * kMaxDimension]; 903 uint16_t ref16[kOutputStride * kMaxDimension]; 904 uint8_t *ref; 905 if (UUT_->use_highbd_ == 0) { 906 ref = ref8; 907 } else { 908 ref = CAST_TO_BYTEPTR(ref16); 909 } 910 #else 911 uint8_t ref[kOutputStride * kMaxDimension]; 912 #endif 913 914 // Populate ref and out with some random data 915 ::libvpx_test::ACMRandom prng; 916 for (int y = 0; y < Height(); ++y) { 917 for (int x = 0; x < Width(); ++x) { 918 uint16_t r; 919 #if CONFIG_VP9_HIGHBITDEPTH 920 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 921 r = prng.Rand8Extremes(); 922 } else { 923 r = prng.Rand16() & mask_; 924 } 925 #else 926 r = prng.Rand8Extremes(); 927 #endif 928 assign_val(out, y * kOutputStride + x, r); 929 assign_val(ref, y * kOutputStride + x, r); 930 } 931 } 932 933 for (int axis = 0; axis < 2; axis++) { 934 int seed_val = 0; 935 while (seed_val < 256) { 936 for (int y = 0; y < 8; ++y) { 937 for (int x = 0; x < 8; ++x) { 938 #if CONFIG_VP9_HIGHBITDEPTH 939 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 940 ((seed_val >> (axis ? y : x)) & 1) * mask_); 941 #else 942 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 943 ((seed_val >> (axis ? y : x)) & 1) * 255); 944 #endif 945 if (axis) seed_val++; 946 } 947 if (axis) { 948 seed_val -= 8; 949 } else { 950 seed_val++; 951 } 952 } 953 if (axis) seed_val += 8; 954 955 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 956 const InterpKernel *filters = 957 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)]; 958 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 959 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 960 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], 961 filters[filter_y], ref, kOutputStride, 962 Width(), Height(), UUT_->use_highbd_); 963 if (filter_x && filter_y) 964 ASM_REGISTER_STATE_CHECK( 965 UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters, 966 filter_x, 16, filter_y, 16, Width(), Height())); 967 else if (filter_y) 968 ASM_REGISTER_STATE_CHECK( 969 UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0, 970 16, filter_y, 16, Width(), Height())); 971 else if (filter_x) 972 ASM_REGISTER_STATE_CHECK( 973 UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters, 974 filter_x, 16, 0, 16, Width(), Height())); 975 else 976 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, 977 kOutputStride, NULL, 0, 0, 978 0, 0, Width(), Height())); 979 980 for (int y = 0; y < Height(); ++y) { 981 for (int x = 0; x < Width(); ++x) 982 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 983 lookup(out, y * kOutputStride + x)) 984 << "mismatch at (" << x << "," << y << "), " 985 << "filters (" << filter_bank << "," << filter_x << "," 986 << filter_y << ")"; 987 } 988 } 989 } 990 } 991 } 992 } 993 } 994 995 /* This test exercises that enough rows and columns are filtered with every 996 possible initial fractional positions and scaling steps. */ 997 #if !CONFIG_VP9_HIGHBITDEPTH 998 static const ConvolveFunc scaled_2d_c_funcs[2] = { vpx_scaled_2d_c, 999 vpx_scaled_avg_2d_c }; 1000 1001 TEST_P(ConvolveTest, CheckScalingFiltering) { 1002 uint8_t *const in = input(); 1003 uint8_t *const out = output(); 1004 uint8_t ref[kOutputStride * kMaxDimension]; 1005 1006 ::libvpx_test::ACMRandom prng; 1007 for (int y = 0; y < Height(); ++y) { 1008 for (int x = 0; x < Width(); ++x) { 1009 const uint16_t r = prng.Rand8Extremes(); 1010 assign_val(in, y * kInputStride + x, r); 1011 } 1012 } 1013 1014 for (int i = 0; i < 2; ++i) { 1015 for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) { 1016 const InterpKernel *const eighttap = vp9_filter_kernels[filter_type]; 1017 for (int frac = 0; frac < 16; ++frac) { 1018 for (int step = 1; step <= 32; ++step) { 1019 /* Test the horizontal and vertical filters in combination. */ 1020 scaled_2d_c_funcs[i](in, kInputStride, ref, kOutputStride, eighttap, 1021 frac, step, frac, step, Width(), Height()); 1022 ASM_REGISTER_STATE_CHECK( 1023 UUT_->shv8_[i](in, kInputStride, out, kOutputStride, eighttap, 1024 frac, step, frac, step, Width(), Height())); 1025 1026 CheckGuardBlocks(); 1027 1028 for (int y = 0; y < Height(); ++y) { 1029 for (int x = 0; x < Width(); ++x) { 1030 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 1031 lookup(out, y * kOutputStride + x)) 1032 << "x == " << x << ", y == " << y << ", frac == " << frac 1033 << ", step == " << step; 1034 } 1035 } 1036 } 1037 } 1038 } 1039 } 1040 } 1041 #endif 1042 1043 using std::tr1::make_tuple; 1044 1045 #if CONFIG_VP9_HIGHBITDEPTH 1046 #define WRAP(func, bd) \ 1047 void wrap_##func##_##bd( \ 1048 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 1049 ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 1050 int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 1051 vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \ 1052 reinterpret_cast<uint16_t *>(dst), dst_stride, filter, \ 1053 x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ 1054 } 1055 1056 #if HAVE_SSE2 && ARCH_X86_64 1057 WRAP(convolve_copy_sse2, 8) 1058 WRAP(convolve_avg_sse2, 8) 1059 WRAP(convolve_copy_sse2, 10) 1060 WRAP(convolve_avg_sse2, 10) 1061 WRAP(convolve_copy_sse2, 12) 1062 WRAP(convolve_avg_sse2, 12) 1063 WRAP(convolve8_horiz_sse2, 8) 1064 WRAP(convolve8_avg_horiz_sse2, 8) 1065 WRAP(convolve8_vert_sse2, 8) 1066 WRAP(convolve8_avg_vert_sse2, 8) 1067 WRAP(convolve8_sse2, 8) 1068 WRAP(convolve8_avg_sse2, 8) 1069 WRAP(convolve8_horiz_sse2, 10) 1070 WRAP(convolve8_avg_horiz_sse2, 10) 1071 WRAP(convolve8_vert_sse2, 10) 1072 WRAP(convolve8_avg_vert_sse2, 10) 1073 WRAP(convolve8_sse2, 10) 1074 WRAP(convolve8_avg_sse2, 10) 1075 WRAP(convolve8_horiz_sse2, 12) 1076 WRAP(convolve8_avg_horiz_sse2, 12) 1077 WRAP(convolve8_vert_sse2, 12) 1078 WRAP(convolve8_avg_vert_sse2, 12) 1079 WRAP(convolve8_sse2, 12) 1080 WRAP(convolve8_avg_sse2, 12) 1081 #endif // HAVE_SSE2 && ARCH_X86_64 1082 1083 #if HAVE_AVX2 1084 WRAP(convolve_copy_avx2, 8) 1085 WRAP(convolve_avg_avx2, 8) 1086 WRAP(convolve8_horiz_avx2, 8) 1087 WRAP(convolve8_avg_horiz_avx2, 8) 1088 WRAP(convolve8_vert_avx2, 8) 1089 WRAP(convolve8_avg_vert_avx2, 8) 1090 WRAP(convolve8_avx2, 8) 1091 WRAP(convolve8_avg_avx2, 8) 1092 1093 WRAP(convolve_copy_avx2, 10) 1094 WRAP(convolve_avg_avx2, 10) 1095 WRAP(convolve8_avx2, 10) 1096 WRAP(convolve8_horiz_avx2, 10) 1097 WRAP(convolve8_vert_avx2, 10) 1098 WRAP(convolve8_avg_avx2, 10) 1099 WRAP(convolve8_avg_horiz_avx2, 10) 1100 WRAP(convolve8_avg_vert_avx2, 10) 1101 1102 WRAP(convolve_copy_avx2, 12) 1103 WRAP(convolve_avg_avx2, 12) 1104 WRAP(convolve8_avx2, 12) 1105 WRAP(convolve8_horiz_avx2, 12) 1106 WRAP(convolve8_vert_avx2, 12) 1107 WRAP(convolve8_avg_avx2, 12) 1108 WRAP(convolve8_avg_horiz_avx2, 12) 1109 WRAP(convolve8_avg_vert_avx2, 12) 1110 #endif // HAVE_AVX2 1111 1112 #if HAVE_NEON 1113 WRAP(convolve_copy_neon, 8) 1114 WRAP(convolve_avg_neon, 8) 1115 WRAP(convolve_copy_neon, 10) 1116 WRAP(convolve_avg_neon, 10) 1117 WRAP(convolve_copy_neon, 12) 1118 WRAP(convolve_avg_neon, 12) 1119 WRAP(convolve8_horiz_neon, 8) 1120 WRAP(convolve8_avg_horiz_neon, 8) 1121 WRAP(convolve8_vert_neon, 8) 1122 WRAP(convolve8_avg_vert_neon, 8) 1123 WRAP(convolve8_neon, 8) 1124 WRAP(convolve8_avg_neon, 8) 1125 WRAP(convolve8_horiz_neon, 10) 1126 WRAP(convolve8_avg_horiz_neon, 10) 1127 WRAP(convolve8_vert_neon, 10) 1128 WRAP(convolve8_avg_vert_neon, 10) 1129 WRAP(convolve8_neon, 10) 1130 WRAP(convolve8_avg_neon, 10) 1131 WRAP(convolve8_horiz_neon, 12) 1132 WRAP(convolve8_avg_horiz_neon, 12) 1133 WRAP(convolve8_vert_neon, 12) 1134 WRAP(convolve8_avg_vert_neon, 12) 1135 WRAP(convolve8_neon, 12) 1136 WRAP(convolve8_avg_neon, 12) 1137 #endif // HAVE_NEON 1138 1139 WRAP(convolve_copy_c, 8) 1140 WRAP(convolve_avg_c, 8) 1141 WRAP(convolve8_horiz_c, 8) 1142 WRAP(convolve8_avg_horiz_c, 8) 1143 WRAP(convolve8_vert_c, 8) 1144 WRAP(convolve8_avg_vert_c, 8) 1145 WRAP(convolve8_c, 8) 1146 WRAP(convolve8_avg_c, 8) 1147 WRAP(convolve_copy_c, 10) 1148 WRAP(convolve_avg_c, 10) 1149 WRAP(convolve8_horiz_c, 10) 1150 WRAP(convolve8_avg_horiz_c, 10) 1151 WRAP(convolve8_vert_c, 10) 1152 WRAP(convolve8_avg_vert_c, 10) 1153 WRAP(convolve8_c, 10) 1154 WRAP(convolve8_avg_c, 10) 1155 WRAP(convolve_copy_c, 12) 1156 WRAP(convolve_avg_c, 12) 1157 WRAP(convolve8_horiz_c, 12) 1158 WRAP(convolve8_avg_horiz_c, 12) 1159 WRAP(convolve8_vert_c, 12) 1160 WRAP(convolve8_avg_vert_c, 12) 1161 WRAP(convolve8_c, 12) 1162 WRAP(convolve8_avg_c, 12) 1163 #undef WRAP 1164 1165 const ConvolveFunctions convolve8_c( 1166 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8, 1167 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, 1168 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 1169 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, 1170 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, 1171 wrap_convolve8_avg_c_8, 8); 1172 const ConvolveFunctions convolve10_c( 1173 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10, 1174 wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, 1175 wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 1176 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1177 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, 1178 wrap_convolve8_avg_c_10, 10); 1179 const ConvolveFunctions convolve12_c( 1180 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12, 1181 wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, 1182 wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 1183 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1184 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, 1185 wrap_convolve8_avg_c_12, 12); 1186 const ConvolveParam kArrayConvolve_c[] = { 1187 ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c) 1188 }; 1189 1190 #else 1191 const ConvolveFunctions convolve8_c( 1192 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c, 1193 vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c, 1194 vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c, 1195 vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, 1196 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1197 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; 1198 #endif 1199 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); 1200 1201 #if HAVE_SSE2 && ARCH_X86_64 1202 #if CONFIG_VP9_HIGHBITDEPTH 1203 const ConvolveFunctions convolve8_sse2( 1204 wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, 1205 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1206 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1207 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 1208 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, 1209 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, 1210 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); 1211 const ConvolveFunctions convolve10_sse2( 1212 wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, 1213 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1214 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1215 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 1216 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, 1217 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, 1218 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); 1219 const ConvolveFunctions convolve12_sse2( 1220 wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, 1221 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1222 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1223 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 1224 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, 1225 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, 1226 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); 1227 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2), 1228 ALL_SIZES(convolve10_sse2), 1229 ALL_SIZES(convolve12_sse2) }; 1230 #else 1231 const ConvolveFunctions convolve8_sse2( 1232 vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2, 1233 vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2, 1234 vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2, 1235 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1236 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1237 1238 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; 1239 #endif // CONFIG_VP9_HIGHBITDEPTH 1240 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, 1241 ::testing::ValuesIn(kArrayConvolve_sse2)); 1242 #endif 1243 1244 #if HAVE_SSSE3 1245 const ConvolveFunctions convolve8_ssse3( 1246 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3, 1247 vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3, 1248 vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, 1249 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1250 vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0); 1251 1252 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; 1253 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, 1254 ::testing::ValuesIn(kArrayConvolve8_ssse3)); 1255 #endif 1256 1257 #if HAVE_AVX2 1258 #if CONFIG_VP9_HIGHBITDEPTH 1259 const ConvolveFunctions convolve8_avx2( 1260 wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8, 1261 wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8, 1262 wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8, 1263 wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8, 1264 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, 1265 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); 1266 const ConvolveFunctions convolve10_avx2( 1267 wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10, 1268 wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10, 1269 wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10, 1270 wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10, 1271 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, 1272 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, 1273 wrap_convolve8_avg_c_10, 10); 1274 const ConvolveFunctions convolve12_avx2( 1275 wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12, 1276 wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12, 1277 wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12, 1278 wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12, 1279 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, 1280 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, 1281 wrap_convolve8_avg_c_12, 12); 1282 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2), 1283 ALL_SIZES(convolve10_avx2), 1284 ALL_SIZES(convolve12_avx2) }; 1285 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, 1286 ::testing::ValuesIn(kArrayConvolve8_avx2)); 1287 #else // !CONFIG_VP9_HIGHBITDEPTH 1288 const ConvolveFunctions convolve8_avx2( 1289 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2, 1290 vpx_convolve8_avg_horiz_avx2, vpx_convolve8_vert_avx2, 1291 vpx_convolve8_avg_vert_avx2, vpx_convolve8_avx2, vpx_convolve8_avg_avx2, 1292 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1293 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1294 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; 1295 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, 1296 ::testing::ValuesIn(kArrayConvolve8_avx2)); 1297 #endif // CONFIG_VP9_HIGHBITDEPTH 1298 #endif // HAVE_AVX2 1299 1300 #if HAVE_NEON 1301 #if CONFIG_VP9_HIGHBITDEPTH 1302 const ConvolveFunctions convolve8_neon( 1303 wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8, 1304 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, 1305 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, 1306 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 1307 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, 1308 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, 1309 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8); 1310 const ConvolveFunctions convolve10_neon( 1311 wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10, 1312 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, 1313 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, 1314 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 1315 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, 1316 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, 1317 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10); 1318 const ConvolveFunctions convolve12_neon( 1319 wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12, 1320 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, 1321 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, 1322 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 1323 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, 1324 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, 1325 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12); 1326 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon), 1327 ALL_SIZES(convolve10_neon), 1328 ALL_SIZES(convolve12_neon) }; 1329 #else 1330 const ConvolveFunctions convolve8_neon( 1331 vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon, 1332 vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon, 1333 vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon, 1334 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1335 vpx_scaled_avg_vert_c, vpx_scaled_2d_neon, vpx_scaled_avg_2d_c, 0); 1336 1337 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) }; 1338 #endif // CONFIG_VP9_HIGHBITDEPTH 1339 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, 1340 ::testing::ValuesIn(kArrayConvolve_neon)); 1341 #endif // HAVE_NEON 1342 1343 #if HAVE_DSPR2 1344 const ConvolveFunctions convolve8_dspr2( 1345 vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2, 1346 vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2, 1347 vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, 1348 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1349 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1350 1351 const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) }; 1352 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, 1353 ::testing::ValuesIn(kArrayConvolve8_dspr2)); 1354 #endif // HAVE_DSPR2 1355 1356 #if HAVE_MSA 1357 const ConvolveFunctions convolve8_msa( 1358 vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa, 1359 vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa, 1360 vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa, 1361 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1362 vpx_scaled_avg_vert_c, vpx_scaled_2d_msa, vpx_scaled_avg_2d_c, 0); 1363 1364 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) }; 1365 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, 1366 ::testing::ValuesIn(kArrayConvolve8_msa)); 1367 #endif // HAVE_MSA 1368 1369 #if HAVE_VSX 1370 const ConvolveFunctions convolve8_vsx( 1371 vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx, 1372 vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx, 1373 vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx, 1374 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, 1375 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); 1376 const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) }; 1377 INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest, 1378 ::testing::ValuesIn(kArrayConvolve_vsx)); 1379 #endif // HAVE_VSX 1380 } // namespace 1381