1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <cfloat> 28 #include <cstdio> 29 30 #include "test-runner.h" 31 #include "test-utils.h" 32 33 #include "aarch64/test-simulator-inputs-aarch64.h" 34 #include "aarch64/test-simulator-traces-aarch64.h" 35 #include "aarch64/test-utils-aarch64.h" 36 37 #include "aarch64/macro-assembler-aarch64.h" 38 #include "aarch64/simulator-aarch64.h" 39 40 namespace vixl { 41 namespace aarch64 { 42 43 // ==== Simulator Tests ==== 44 // 45 // These simulator tests check instruction behaviour against a trace taken from 46 // real AArch64 hardware. The same test code is used to generate the trace; the 47 // results are printed to stdout when the test is run with 48 // --generate_test_trace. 49 // 50 // The input lists and expected results are stored in test/traces. The expected 51 // results can be regenerated using tools/generate_simulator_traces.py. Adding a 52 // test for a new instruction is described at the top of 53 // test-simulator-traces-aarch64.h. 54 55 #define __ masm. 56 #define TEST(name) TEST_(AARCH64_SIM_##name) 57 58 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 59 60 #define SETUP() \ 61 MacroAssembler masm; \ 62 Decoder decoder; \ 63 Simulator* simulator = \ 64 Test::run_debugger() ? new Debugger(&decoder) : new Simulator(&decoder); \ 65 simulator->SetColouredTrace(Test::coloured_trace()); \ 66 simulator->SetInstructionStats(Test::instruction_stats()); 67 68 #define START() \ 69 masm.Reset(); \ 70 simulator->ResetState(); \ 71 __ PushCalleeSavedRegisters(); \ 72 if (Test::trace_reg()) { \ 73 __ Trace(LOG_STATE, TRACE_ENABLE); \ 74 } \ 75 if (Test::trace_write()) { \ 76 __ Trace(LOG_WRITE, TRACE_ENABLE); \ 77 } \ 78 if (Test::trace_sim()) { \ 79 __ Trace(LOG_DISASM, TRACE_ENABLE); \ 80 } \ 81 if (Test::instruction_stats()) { \ 82 __ EnableInstrumentation(); \ 83 } 84 85 #define END() \ 86 if (Test::instruction_stats()) { \ 87 __ DisableInstrumentation(); \ 88 } \ 89 __ Trace(LOG_ALL, TRACE_DISABLE); \ 90 __ PopCalleeSavedRegisters(); \ 91 __ Ret(); \ 92 masm.FinalizeCode() 93 94 #define RUN() \ 95 simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()) 96 97 #define TEARDOWN() delete simulator; 98 99 #else // VIXL_INCLUDE_SIMULATOR_AARCH64 100 101 #define SETUP() \ 102 MacroAssembler masm; \ 103 CPU::SetUp() 104 105 #define START() \ 106 masm.Reset(); \ 107 __ PushCalleeSavedRegisters() 108 109 #define END() \ 110 __ PopCalleeSavedRegisters(); \ 111 __ Ret(); \ 112 masm.FinalizeCode() 113 114 #define RUN() \ 115 { \ 116 masm.GetBuffer()->SetExecutable(); \ 117 ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \ 118 masm.GetSizeOfCodeGenerated()); \ 119 masm.GetBuffer()->SetWritable(); \ 120 } 121 122 #define TEARDOWN() 123 124 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 125 126 127 // The maximum number of errors to report in detail for each test. 128 static const unsigned kErrorReportLimit = 8; 129 130 131 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the 132 // templated test functions. 133 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); } 134 135 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); } 136 137 138 // MacroAssembler member function pointers to pass to the test dispatchers. 139 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd, 140 const FPRegister& fn); 141 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd, 142 const FPRegister& fn, 143 const FPRegister& fm); 144 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd, 145 const FPRegister& fn, 146 const FPRegister& fm, 147 const FPRegister& fa); 148 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn, 149 const FPRegister& fm); 150 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn, 151 double value); 152 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd, 153 const FPRegister& fn); 154 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd, 155 const FPRegister& fn, 156 int fbits); 157 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd, 158 const Register& rn, 159 int fbits); 160 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be 161 // consolidated into one routine. 162 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, 163 const VRegister& vn); 164 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, 165 const VRegister& vn, 166 const VRegister& vm); 167 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, 168 const VRegister& vn, 169 const VRegister& vm, 170 int vm_index); 171 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( 172 const VRegister& vd, int imm1, const VRegister& vn, int imm2); 173 174 // This helps using the same typename for both the function pointer 175 // and the array of immediates passed to helper routines. 176 template <typename T> 177 class Test2OpImmediateNEONHelper_t { 178 public: 179 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, 180 const VRegister& vn, 181 T imm); 182 }; 183 184 185 // Maximum number of hex characters required to represent values of either 186 // templated type. 187 template <typename Ta, typename Tb> 188 static unsigned MaxHexCharCount() { 189 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); 190 return (count * 8) / 4; 191 } 192 193 194 // Standard test dispatchers. 195 196 197 static void Test1Op_Helper(Test1OpFPHelper_t helper, 198 uintptr_t inputs, 199 unsigned inputs_length, 200 uintptr_t results, 201 unsigned d_size, 202 unsigned n_size) { 203 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize)); 204 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 205 206 SETUP(); 207 START(); 208 209 // Roll up the loop to keep the code size down. 210 Label loop_n; 211 212 Register out = x0; 213 Register inputs_base = x1; 214 Register length = w2; 215 Register index_n = w3; 216 217 const int n_index_shift = 218 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 219 220 FPRegister fd = (d_size == kDRegSize) ? d0 : s0; 221 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 222 223 __ Mov(out, results); 224 __ Mov(inputs_base, inputs); 225 __ Mov(length, inputs_length); 226 227 __ Mov(index_n, 0); 228 __ Bind(&loop_n); 229 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 230 231 { 232 SingleEmissionCheckScope guard(&masm); 233 (masm.*helper)(fd, fn); 234 } 235 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 236 237 __ Add(index_n, index_n, 1); 238 __ Cmp(index_n, inputs_length); 239 __ B(lo, &loop_n); 240 241 END(); 242 RUN(); 243 TEARDOWN(); 244 } 245 246 247 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 248 // rawbits representations of doubles or floats. This ensures that exact bit 249 // comparisons can be performed. 250 template <typename Tn, typename Td> 251 static void Test1Op(const char* name, 252 Test1OpFPHelper_t helper, 253 const Tn inputs[], 254 unsigned inputs_length, 255 const Td expected[], 256 unsigned expected_length) { 257 VIXL_ASSERT(inputs_length > 0); 258 259 const unsigned results_length = inputs_length; 260 Td* results = new Td[results_length]; 261 262 const unsigned d_bits = sizeof(Td) * 8; 263 const unsigned n_bits = sizeof(Tn) * 8; 264 265 Test1Op_Helper(helper, 266 reinterpret_cast<uintptr_t>(inputs), 267 inputs_length, 268 reinterpret_cast<uintptr_t>(results), 269 d_bits, 270 n_bits); 271 272 if (Test::generate_test_trace()) { 273 // Print the results. 274 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 275 for (unsigned d = 0; d < results_length; d++) { 276 printf(" 0x%0*" PRIx64 ",\n", 277 d_bits / 4, 278 static_cast<uint64_t>(results[d])); 279 } 280 printf("};\n"); 281 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 282 } else { 283 // Check the results. 284 VIXL_CHECK(expected_length == results_length); 285 unsigned error_count = 0; 286 unsigned d = 0; 287 for (unsigned n = 0; n < inputs_length; n++, d++) { 288 if (results[d] != expected[d]) { 289 if (++error_count > kErrorReportLimit) continue; 290 291 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 292 name, 293 n_bits / 4, 294 static_cast<uint64_t>(inputs[n]), 295 name, 296 rawbits_to_fp(inputs[n])); 297 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 298 d_bits / 4, 299 static_cast<uint64_t>(expected[d]), 300 rawbits_to_fp(expected[d])); 301 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 302 d_bits / 4, 303 static_cast<uint64_t>(results[d]), 304 rawbits_to_fp(results[d])); 305 printf("\n"); 306 } 307 } 308 VIXL_ASSERT(d == expected_length); 309 if (error_count > kErrorReportLimit) { 310 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 311 } 312 VIXL_CHECK(error_count == 0); 313 } 314 delete[] results; 315 } 316 317 318 static void Test2Op_Helper(Test2OpFPHelper_t helper, 319 uintptr_t inputs, 320 unsigned inputs_length, 321 uintptr_t results, 322 unsigned reg_size) { 323 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 324 325 SETUP(); 326 START(); 327 328 // Roll up the loop to keep the code size down. 329 Label loop_n, loop_m; 330 331 Register out = x0; 332 Register inputs_base = x1; 333 Register length = w2; 334 Register index_n = w3; 335 Register index_m = w4; 336 337 bool double_op = reg_size == kDRegSize; 338 const int index_shift = 339 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 340 341 FPRegister fd = double_op ? d0 : s0; 342 FPRegister fn = double_op ? d1 : s1; 343 FPRegister fm = double_op ? d2 : s2; 344 345 __ Mov(out, results); 346 __ Mov(inputs_base, inputs); 347 __ Mov(length, inputs_length); 348 349 __ Mov(index_n, 0); 350 __ Bind(&loop_n); 351 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 352 353 __ Mov(index_m, 0); 354 __ Bind(&loop_m); 355 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 356 357 { 358 SingleEmissionCheckScope guard(&masm); 359 (masm.*helper)(fd, fn, fm); 360 } 361 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 362 363 __ Add(index_m, index_m, 1); 364 __ Cmp(index_m, inputs_length); 365 __ B(lo, &loop_m); 366 367 __ Add(index_n, index_n, 1); 368 __ Cmp(index_n, inputs_length); 369 __ B(lo, &loop_n); 370 371 END(); 372 RUN(); 373 TEARDOWN(); 374 } 375 376 377 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 378 // rawbits representations of doubles or floats. This ensures that exact bit 379 // comparisons can be performed. 380 template <typename T> 381 static void Test2Op(const char* name, 382 Test2OpFPHelper_t helper, 383 const T inputs[], 384 unsigned inputs_length, 385 const T expected[], 386 unsigned expected_length) { 387 VIXL_ASSERT(inputs_length > 0); 388 389 const unsigned results_length = inputs_length * inputs_length; 390 T* results = new T[results_length]; 391 392 const unsigned bits = sizeof(T) * 8; 393 394 Test2Op_Helper(helper, 395 reinterpret_cast<uintptr_t>(inputs), 396 inputs_length, 397 reinterpret_cast<uintptr_t>(results), 398 bits); 399 400 if (Test::generate_test_trace()) { 401 // Print the results. 402 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 403 for (unsigned d = 0; d < results_length; d++) { 404 printf(" 0x%0*" PRIx64 ",\n", 405 bits / 4, 406 static_cast<uint64_t>(results[d])); 407 } 408 printf("};\n"); 409 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 410 } else { 411 // Check the results. 412 VIXL_CHECK(expected_length == results_length); 413 unsigned error_count = 0; 414 unsigned d = 0; 415 for (unsigned n = 0; n < inputs_length; n++) { 416 for (unsigned m = 0; m < inputs_length; m++, d++) { 417 if (results[d] != expected[d]) { 418 if (++error_count > kErrorReportLimit) continue; 419 420 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 421 name, 422 bits / 4, 423 static_cast<uint64_t>(inputs[n]), 424 bits / 4, 425 static_cast<uint64_t>(inputs[m]), 426 name, 427 rawbits_to_fp(inputs[n]), 428 rawbits_to_fp(inputs[m])); 429 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 430 bits / 4, 431 static_cast<uint64_t>(expected[d]), 432 rawbits_to_fp(expected[d])); 433 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 434 bits / 4, 435 static_cast<uint64_t>(results[d]), 436 rawbits_to_fp(results[d])); 437 printf("\n"); 438 } 439 } 440 } 441 VIXL_ASSERT(d == expected_length); 442 if (error_count > kErrorReportLimit) { 443 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 444 } 445 VIXL_CHECK(error_count == 0); 446 } 447 delete[] results; 448 } 449 450 451 static void Test3Op_Helper(Test3OpFPHelper_t helper, 452 uintptr_t inputs, 453 unsigned inputs_length, 454 uintptr_t results, 455 unsigned reg_size) { 456 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 457 458 SETUP(); 459 START(); 460 461 // Roll up the loop to keep the code size down. 462 Label loop_n, loop_m, loop_a; 463 464 Register out = x0; 465 Register inputs_base = x1; 466 Register length = w2; 467 Register index_n = w3; 468 Register index_m = w4; 469 Register index_a = w5; 470 471 bool double_op = reg_size == kDRegSize; 472 const int index_shift = 473 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 474 475 FPRegister fd = double_op ? d0 : s0; 476 FPRegister fn = double_op ? d1 : s1; 477 FPRegister fm = double_op ? d2 : s2; 478 FPRegister fa = double_op ? d3 : s3; 479 480 __ Mov(out, results); 481 __ Mov(inputs_base, inputs); 482 __ Mov(length, inputs_length); 483 484 __ Mov(index_n, 0); 485 __ Bind(&loop_n); 486 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 487 488 __ Mov(index_m, 0); 489 __ Bind(&loop_m); 490 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 491 492 __ Mov(index_a, 0); 493 __ Bind(&loop_a); 494 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift)); 495 496 { 497 SingleEmissionCheckScope guard(&masm); 498 (masm.*helper)(fd, fn, fm, fa); 499 } 500 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 501 502 __ Add(index_a, index_a, 1); 503 __ Cmp(index_a, inputs_length); 504 __ B(lo, &loop_a); 505 506 __ Add(index_m, index_m, 1); 507 __ Cmp(index_m, inputs_length); 508 __ B(lo, &loop_m); 509 510 __ Add(index_n, index_n, 1); 511 __ Cmp(index_n, inputs_length); 512 __ B(lo, &loop_n); 513 514 END(); 515 RUN(); 516 TEARDOWN(); 517 } 518 519 520 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 521 // rawbits representations of doubles or floats. This ensures that exact bit 522 // comparisons can be performed. 523 template <typename T> 524 static void Test3Op(const char* name, 525 Test3OpFPHelper_t helper, 526 const T inputs[], 527 unsigned inputs_length, 528 const T expected[], 529 unsigned expected_length) { 530 VIXL_ASSERT(inputs_length > 0); 531 532 const unsigned results_length = inputs_length * inputs_length * inputs_length; 533 T* results = new T[results_length]; 534 535 const unsigned bits = sizeof(T) * 8; 536 537 Test3Op_Helper(helper, 538 reinterpret_cast<uintptr_t>(inputs), 539 inputs_length, 540 reinterpret_cast<uintptr_t>(results), 541 bits); 542 543 if (Test::generate_test_trace()) { 544 // Print the results. 545 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 546 for (unsigned d = 0; d < results_length; d++) { 547 printf(" 0x%0*" PRIx64 ",\n", 548 bits / 4, 549 static_cast<uint64_t>(results[d])); 550 } 551 printf("};\n"); 552 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 553 } else { 554 // Check the results. 555 VIXL_CHECK(expected_length == results_length); 556 unsigned error_count = 0; 557 unsigned d = 0; 558 for (unsigned n = 0; n < inputs_length; n++) { 559 for (unsigned m = 0; m < inputs_length; m++) { 560 for (unsigned a = 0; a < inputs_length; a++, d++) { 561 if (results[d] != expected[d]) { 562 if (++error_count > kErrorReportLimit) continue; 563 564 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64 565 " (%s %g %g %g):\n", 566 name, 567 bits / 4, 568 static_cast<uint64_t>(inputs[n]), 569 bits / 4, 570 static_cast<uint64_t>(inputs[m]), 571 bits / 4, 572 static_cast<uint64_t>(inputs[a]), 573 name, 574 rawbits_to_fp(inputs[n]), 575 rawbits_to_fp(inputs[m]), 576 rawbits_to_fp(inputs[a])); 577 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 578 bits / 4, 579 static_cast<uint64_t>(expected[d]), 580 rawbits_to_fp(expected[d])); 581 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 582 bits / 4, 583 static_cast<uint64_t>(results[d]), 584 rawbits_to_fp(results[d])); 585 printf("\n"); 586 } 587 } 588 } 589 } 590 VIXL_ASSERT(d == expected_length); 591 if (error_count > kErrorReportLimit) { 592 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 593 } 594 VIXL_CHECK(error_count == 0); 595 } 596 delete[] results; 597 } 598 599 600 static void TestCmp_Helper(TestFPCmpHelper_t helper, 601 uintptr_t inputs, 602 unsigned inputs_length, 603 uintptr_t results, 604 unsigned reg_size) { 605 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 606 607 SETUP(); 608 START(); 609 610 // Roll up the loop to keep the code size down. 611 Label loop_n, loop_m; 612 613 Register out = x0; 614 Register inputs_base = x1; 615 Register length = w2; 616 Register index_n = w3; 617 Register index_m = w4; 618 Register flags = x5; 619 620 bool double_op = reg_size == kDRegSize; 621 const int index_shift = 622 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 623 624 FPRegister fn = double_op ? d1 : s1; 625 FPRegister fm = double_op ? d2 : s2; 626 627 __ Mov(out, results); 628 __ Mov(inputs_base, inputs); 629 __ Mov(length, inputs_length); 630 631 __ Mov(index_n, 0); 632 __ Bind(&loop_n); 633 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 634 635 __ Mov(index_m, 0); 636 __ Bind(&loop_m); 637 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 638 639 { 640 SingleEmissionCheckScope guard(&masm); 641 (masm.*helper)(fn, fm); 642 } 643 __ Mrs(flags, NZCV); 644 __ Ubfx(flags, flags, 28, 4); 645 __ Strb(flags, MemOperand(out, 1, PostIndex)); 646 647 __ Add(index_m, index_m, 1); 648 __ Cmp(index_m, inputs_length); 649 __ B(lo, &loop_m); 650 651 __ Add(index_n, index_n, 1); 652 __ Cmp(index_n, inputs_length); 653 __ B(lo, &loop_n); 654 655 END(); 656 RUN(); 657 TEARDOWN(); 658 } 659 660 661 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 662 // rawbits representations of doubles or floats. This ensures that exact bit 663 // comparisons can be performed. 664 template <typename T> 665 static void TestCmp(const char* name, 666 TestFPCmpHelper_t helper, 667 const T inputs[], 668 unsigned inputs_length, 669 const uint8_t expected[], 670 unsigned expected_length) { 671 VIXL_ASSERT(inputs_length > 0); 672 673 const unsigned results_length = inputs_length * inputs_length; 674 uint8_t* results = new uint8_t[results_length]; 675 676 const unsigned bits = sizeof(T) * 8; 677 678 TestCmp_Helper(helper, 679 reinterpret_cast<uintptr_t>(inputs), 680 inputs_length, 681 reinterpret_cast<uintptr_t>(results), 682 bits); 683 684 if (Test::generate_test_trace()) { 685 // Print the results. 686 printf("const uint8_t kExpected_%s[] = {\n", name); 687 for (unsigned d = 0; d < results_length; d++) { 688 // Each NZCV result only requires 4 bits. 689 VIXL_ASSERT((results[d] & 0xf) == results[d]); 690 printf(" 0x%" PRIx8 ",\n", results[d]); 691 } 692 printf("};\n"); 693 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 694 } else { 695 // Check the results. 696 VIXL_CHECK(expected_length == results_length); 697 unsigned error_count = 0; 698 unsigned d = 0; 699 for (unsigned n = 0; n < inputs_length; n++) { 700 for (unsigned m = 0; m < inputs_length; m++, d++) { 701 if (results[d] != expected[d]) { 702 if (++error_count > kErrorReportLimit) continue; 703 704 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 705 name, 706 bits / 4, 707 static_cast<uint64_t>(inputs[n]), 708 bits / 4, 709 static_cast<uint64_t>(inputs[m]), 710 name, 711 rawbits_to_fp(inputs[n]), 712 rawbits_to_fp(inputs[m])); 713 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 714 (expected[d] & 0x8) ? 'N' : 'n', 715 (expected[d] & 0x4) ? 'Z' : 'z', 716 (expected[d] & 0x2) ? 'C' : 'c', 717 (expected[d] & 0x1) ? 'V' : 'v', 718 expected[d]); 719 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 720 (results[d] & 0x8) ? 'N' : 'n', 721 (results[d] & 0x4) ? 'Z' : 'z', 722 (results[d] & 0x2) ? 'C' : 'c', 723 (results[d] & 0x1) ? 'V' : 'v', 724 results[d]); 725 printf("\n"); 726 } 727 } 728 } 729 VIXL_ASSERT(d == expected_length); 730 if (error_count > kErrorReportLimit) { 731 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 732 } 733 VIXL_CHECK(error_count == 0); 734 } 735 delete[] results; 736 } 737 738 739 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, 740 uintptr_t inputs, 741 unsigned inputs_length, 742 uintptr_t results, 743 unsigned reg_size) { 744 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 745 746 SETUP(); 747 START(); 748 749 // Roll up the loop to keep the code size down. 750 Label loop_n, loop_m; 751 752 Register out = x0; 753 Register inputs_base = x1; 754 Register length = w2; 755 Register index_n = w3; 756 Register flags = x4; 757 758 bool double_op = reg_size == kDRegSize; 759 const int index_shift = 760 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 761 762 FPRegister fn = double_op ? d1 : s1; 763 764 __ Mov(out, results); 765 __ Mov(inputs_base, inputs); 766 __ Mov(length, inputs_length); 767 768 __ Mov(index_n, 0); 769 __ Bind(&loop_n); 770 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 771 772 { 773 SingleEmissionCheckScope guard(&masm); 774 (masm.*helper)(fn, 0.0); 775 } 776 __ Mrs(flags, NZCV); 777 __ Ubfx(flags, flags, 28, 4); 778 __ Strb(flags, MemOperand(out, 1, PostIndex)); 779 780 __ Add(index_n, index_n, 1); 781 __ Cmp(index_n, inputs_length); 782 __ B(lo, &loop_n); 783 784 END(); 785 RUN(); 786 TEARDOWN(); 787 } 788 789 790 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 791 // rawbits representations of doubles or floats. This ensures that exact bit 792 // comparisons can be performed. 793 template <typename T> 794 static void TestCmpZero(const char* name, 795 TestFPCmpZeroHelper_t helper, 796 const T inputs[], 797 unsigned inputs_length, 798 const uint8_t expected[], 799 unsigned expected_length) { 800 VIXL_ASSERT(inputs_length > 0); 801 802 const unsigned results_length = inputs_length; 803 uint8_t* results = new uint8_t[results_length]; 804 805 const unsigned bits = sizeof(T) * 8; 806 807 TestCmpZero_Helper(helper, 808 reinterpret_cast<uintptr_t>(inputs), 809 inputs_length, 810 reinterpret_cast<uintptr_t>(results), 811 bits); 812 813 if (Test::generate_test_trace()) { 814 // Print the results. 815 printf("const uint8_t kExpected_%s[] = {\n", name); 816 for (unsigned d = 0; d < results_length; d++) { 817 // Each NZCV result only requires 4 bits. 818 VIXL_ASSERT((results[d] & 0xf) == results[d]); 819 printf(" 0x%" PRIx8 ",\n", results[d]); 820 } 821 printf("};\n"); 822 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 823 } else { 824 // Check the results. 825 VIXL_CHECK(expected_length == results_length); 826 unsigned error_count = 0; 827 unsigned d = 0; 828 for (unsigned n = 0; n < inputs_length; n++, d++) { 829 if (results[d] != expected[d]) { 830 if (++error_count > kErrorReportLimit) continue; 831 832 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n", 833 name, 834 bits / 4, 835 static_cast<uint64_t>(inputs[n]), 836 bits / 4, 837 0, 838 name, 839 rawbits_to_fp(inputs[n])); 840 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 841 (expected[d] & 0x8) ? 'N' : 'n', 842 (expected[d] & 0x4) ? 'Z' : 'z', 843 (expected[d] & 0x2) ? 'C' : 'c', 844 (expected[d] & 0x1) ? 'V' : 'v', 845 expected[d]); 846 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 847 (results[d] & 0x8) ? 'N' : 'n', 848 (results[d] & 0x4) ? 'Z' : 'z', 849 (results[d] & 0x2) ? 'C' : 'c', 850 (results[d] & 0x1) ? 'V' : 'v', 851 results[d]); 852 printf("\n"); 853 } 854 } 855 VIXL_ASSERT(d == expected_length); 856 if (error_count > kErrorReportLimit) { 857 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 858 } 859 VIXL_CHECK(error_count == 0); 860 } 861 delete[] results; 862 } 863 864 865 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper, 866 uintptr_t inputs, 867 unsigned inputs_length, 868 uintptr_t results, 869 unsigned d_size, 870 unsigned n_size) { 871 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 872 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 873 874 SETUP(); 875 START(); 876 877 // Roll up the loop to keep the code size down. 878 Label loop_n; 879 880 Register out = x0; 881 Register inputs_base = x1; 882 Register length = w2; 883 Register index_n = w3; 884 885 const int n_index_shift = 886 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 887 888 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 889 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 890 891 __ Mov(out, results); 892 __ Mov(inputs_base, inputs); 893 __ Mov(length, inputs_length); 894 895 __ Mov(index_n, 0); 896 __ Bind(&loop_n); 897 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 898 899 for (unsigned fbits = 0; fbits <= d_size; ++fbits) { 900 { 901 SingleEmissionCheckScope guard(&masm); 902 (masm.*helper)(rd, fn, fbits); 903 } 904 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 905 } 906 907 __ Add(index_n, index_n, 1); 908 __ Cmp(index_n, inputs_length); 909 __ B(lo, &loop_n); 910 911 END(); 912 RUN(); 913 TEARDOWN(); 914 } 915 916 917 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, 918 uintptr_t inputs, 919 unsigned inputs_length, 920 uintptr_t results, 921 unsigned d_size, 922 unsigned n_size) { 923 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 924 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize)); 925 926 SETUP(); 927 START(); 928 929 // Roll up the loop to keep the code size down. 930 Label loop_n; 931 932 Register out = x0; 933 Register inputs_base = x1; 934 Register length = w2; 935 Register index_n = w3; 936 937 const int n_index_shift = 938 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 939 940 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 941 FPRegister fn = (n_size == kDRegSize) ? d1 : s1; 942 943 __ Mov(out, results); 944 __ Mov(inputs_base, inputs); 945 __ Mov(length, inputs_length); 946 947 __ Mov(index_n, 0); 948 __ Bind(&loop_n); 949 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 950 951 { 952 SingleEmissionCheckScope guard(&masm); 953 (masm.*helper)(rd, fn); 954 } 955 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 956 957 __ Add(index_n, index_n, 1); 958 __ Cmp(index_n, inputs_length); 959 __ B(lo, &loop_n); 960 961 END(); 962 RUN(); 963 TEARDOWN(); 964 } 965 966 967 // Test FP instructions. 968 // - The inputs[] array should be an array of rawbits representations of 969 // doubles or floats. This ensures that exact bit comparisons can be 970 // performed. 971 // - The expected[] array should be an array of signed integers. 972 template <typename Tn, typename Td> 973 static void TestFPToS(const char* name, 974 TestFPToIntHelper_t helper, 975 const Tn inputs[], 976 unsigned inputs_length, 977 const Td expected[], 978 unsigned expected_length) { 979 VIXL_ASSERT(inputs_length > 0); 980 981 const unsigned results_length = inputs_length; 982 Td* results = new Td[results_length]; 983 984 const unsigned d_bits = sizeof(Td) * 8; 985 const unsigned n_bits = sizeof(Tn) * 8; 986 987 TestFPToInt_Helper(helper, 988 reinterpret_cast<uintptr_t>(inputs), 989 inputs_length, 990 reinterpret_cast<uintptr_t>(results), 991 d_bits, 992 n_bits); 993 994 if (Test::generate_test_trace()) { 995 // Print the results. 996 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 997 // There is no simple C++ literal for INT*_MIN that doesn't produce 998 // warnings, so we use an appropriate constant in that case instead. 999 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1000 // the like) avoids warnings about comparing values with differing ranges. 1001 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1002 const int64_t int_d_min = -(int_d_max)-1; 1003 for (unsigned d = 0; d < results_length; d++) { 1004 if (results[d] == int_d_min) { 1005 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1006 } else { 1007 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1008 // trigger compiler warnings. To avoid these warnings, use an 1009 // appropriate macro to make the type explicit. 1010 int64_t result_int64 = static_cast<int64_t>(results[d]); 1011 if (result_int64 >= 0) { 1012 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1013 } else { 1014 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1015 } 1016 } 1017 } 1018 printf("};\n"); 1019 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1020 } else { 1021 // Check the results. 1022 VIXL_CHECK(expected_length == results_length); 1023 unsigned error_count = 0; 1024 unsigned d = 0; 1025 for (unsigned n = 0; n < inputs_length; n++, d++) { 1026 if (results[d] != expected[d]) { 1027 if (++error_count > kErrorReportLimit) continue; 1028 1029 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1030 name, 1031 n_bits / 4, 1032 static_cast<uint64_t>(inputs[n]), 1033 name, 1034 rawbits_to_fp(inputs[n])); 1035 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1036 d_bits / 4, 1037 static_cast<uint64_t>(expected[d]), 1038 static_cast<int64_t>(expected[d])); 1039 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1040 d_bits / 4, 1041 static_cast<uint64_t>(results[d]), 1042 static_cast<int64_t>(results[d])); 1043 printf("\n"); 1044 } 1045 } 1046 VIXL_ASSERT(d == expected_length); 1047 if (error_count > kErrorReportLimit) { 1048 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1049 } 1050 VIXL_CHECK(error_count == 0); 1051 } 1052 delete[] results; 1053 } 1054 1055 1056 // Test FP instructions. 1057 // - The inputs[] array should be an array of rawbits representations of 1058 // doubles or floats. This ensures that exact bit comparisons can be 1059 // performed. 1060 // - The expected[] array should be an array of unsigned integers. 1061 template <typename Tn, typename Td> 1062 static void TestFPToU(const char* name, 1063 TestFPToIntHelper_t helper, 1064 const Tn inputs[], 1065 unsigned inputs_length, 1066 const Td expected[], 1067 unsigned expected_length) { 1068 VIXL_ASSERT(inputs_length > 0); 1069 1070 const unsigned results_length = inputs_length; 1071 Td* results = new Td[results_length]; 1072 1073 const unsigned d_bits = sizeof(Td) * 8; 1074 const unsigned n_bits = sizeof(Tn) * 8; 1075 1076 TestFPToInt_Helper(helper, 1077 reinterpret_cast<uintptr_t>(inputs), 1078 inputs_length, 1079 reinterpret_cast<uintptr_t>(results), 1080 d_bits, 1081 n_bits); 1082 1083 if (Test::generate_test_trace()) { 1084 // Print the results. 1085 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1086 for (unsigned d = 0; d < results_length; d++) { 1087 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1088 } 1089 printf("};\n"); 1090 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1091 } else { 1092 // Check the results. 1093 VIXL_CHECK(expected_length == results_length); 1094 unsigned error_count = 0; 1095 unsigned d = 0; 1096 for (unsigned n = 0; n < inputs_length; n++, d++) { 1097 if (results[d] != expected[d]) { 1098 if (++error_count > kErrorReportLimit) continue; 1099 1100 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1101 name, 1102 n_bits / 4, 1103 static_cast<uint64_t>(inputs[n]), 1104 name, 1105 rawbits_to_fp(inputs[n])); 1106 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1107 d_bits / 4, 1108 static_cast<uint64_t>(expected[d]), 1109 static_cast<uint64_t>(expected[d])); 1110 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1111 d_bits / 4, 1112 static_cast<uint64_t>(results[d]), 1113 static_cast<uint64_t>(results[d])); 1114 printf("\n"); 1115 } 1116 } 1117 VIXL_ASSERT(d == expected_length); 1118 if (error_count > kErrorReportLimit) { 1119 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1120 } 1121 VIXL_CHECK(error_count == 0); 1122 } 1123 delete[] results; 1124 } 1125 1126 1127 // Test FP instructions. 1128 // - The inputs[] array should be an array of rawbits representations of 1129 // doubles or floats. This ensures that exact bit comparisons can be 1130 // performed. 1131 // - The expected[] array should be an array of signed integers. 1132 template <typename Tn, typename Td> 1133 static void TestFPToFixedS(const char* name, 1134 TestFPToFixedHelper_t helper, 1135 const Tn inputs[], 1136 unsigned inputs_length, 1137 const Td expected[], 1138 unsigned expected_length) { 1139 VIXL_ASSERT(inputs_length > 0); 1140 1141 const unsigned d_bits = sizeof(Td) * 8; 1142 const unsigned n_bits = sizeof(Tn) * 8; 1143 1144 const unsigned results_length = inputs_length * (d_bits + 1); 1145 Td* results = new Td[results_length]; 1146 1147 TestFPToFixed_Helper(helper, 1148 reinterpret_cast<uintptr_t>(inputs), 1149 inputs_length, 1150 reinterpret_cast<uintptr_t>(results), 1151 d_bits, 1152 n_bits); 1153 1154 if (Test::generate_test_trace()) { 1155 // Print the results. 1156 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1157 // There is no simple C++ literal for INT*_MIN that doesn't produce 1158 // warnings, so we use an appropriate constant in that case instead. 1159 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1160 // the like) avoids warnings about comparing values with differing ranges. 1161 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1162 const int64_t int_d_min = -(int_d_max)-1; 1163 for (unsigned d = 0; d < results_length; d++) { 1164 if (results[d] == int_d_min) { 1165 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1166 } else { 1167 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1168 // trigger compiler warnings. To avoid these warnings, use an 1169 // appropriate macro to make the type explicit. 1170 int64_t result_int64 = static_cast<int64_t>(results[d]); 1171 if (result_int64 >= 0) { 1172 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1173 } else { 1174 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1175 } 1176 } 1177 } 1178 printf("};\n"); 1179 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1180 } else { 1181 // Check the results. 1182 VIXL_CHECK(expected_length == results_length); 1183 unsigned error_count = 0; 1184 unsigned d = 0; 1185 for (unsigned n = 0; n < inputs_length; n++) { 1186 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1187 if (results[d] != expected[d]) { 1188 if (++error_count > kErrorReportLimit) continue; 1189 1190 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1191 name, 1192 n_bits / 4, 1193 static_cast<uint64_t>(inputs[n]), 1194 fbits, 1195 name, 1196 rawbits_to_fp(inputs[n]), 1197 fbits); 1198 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1199 d_bits / 4, 1200 static_cast<uint64_t>(expected[d]), 1201 static_cast<int64_t>(expected[d])); 1202 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1203 d_bits / 4, 1204 static_cast<uint64_t>(results[d]), 1205 static_cast<int64_t>(results[d])); 1206 printf("\n"); 1207 } 1208 } 1209 } 1210 VIXL_ASSERT(d == expected_length); 1211 if (error_count > kErrorReportLimit) { 1212 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1213 } 1214 VIXL_CHECK(error_count == 0); 1215 } 1216 delete[] results; 1217 } 1218 1219 1220 // Test FP instructions. 1221 // - The inputs[] array should be an array of rawbits representations of 1222 // doubles or floats. This ensures that exact bit comparisons can be 1223 // performed. 1224 // - The expected[] array should be an array of unsigned integers. 1225 template <typename Tn, typename Td> 1226 static void TestFPToFixedU(const char* name, 1227 TestFPToFixedHelper_t helper, 1228 const Tn inputs[], 1229 unsigned inputs_length, 1230 const Td expected[], 1231 unsigned expected_length) { 1232 VIXL_ASSERT(inputs_length > 0); 1233 1234 const unsigned d_bits = sizeof(Td) * 8; 1235 const unsigned n_bits = sizeof(Tn) * 8; 1236 1237 const unsigned results_length = inputs_length * (d_bits + 1); 1238 Td* results = new Td[results_length]; 1239 1240 TestFPToFixed_Helper(helper, 1241 reinterpret_cast<uintptr_t>(inputs), 1242 inputs_length, 1243 reinterpret_cast<uintptr_t>(results), 1244 d_bits, 1245 n_bits); 1246 1247 if (Test::generate_test_trace()) { 1248 // Print the results. 1249 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1250 for (unsigned d = 0; d < results_length; d++) { 1251 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1252 } 1253 printf("};\n"); 1254 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1255 } else { 1256 // Check the results. 1257 VIXL_CHECK(expected_length == results_length); 1258 unsigned error_count = 0; 1259 unsigned d = 0; 1260 for (unsigned n = 0; n < inputs_length; n++) { 1261 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1262 if (results[d] != expected[d]) { 1263 if (++error_count > kErrorReportLimit) continue; 1264 1265 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1266 name, 1267 n_bits / 4, 1268 static_cast<uint64_t>(inputs[n]), 1269 fbits, 1270 name, 1271 rawbits_to_fp(inputs[n]), 1272 fbits); 1273 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1274 d_bits / 4, 1275 static_cast<uint64_t>(expected[d]), 1276 static_cast<uint64_t>(expected[d])); 1277 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1278 d_bits / 4, 1279 static_cast<uint64_t>(results[d]), 1280 static_cast<uint64_t>(results[d])); 1281 printf("\n"); 1282 } 1283 } 1284 } 1285 VIXL_ASSERT(d == expected_length); 1286 if (error_count > kErrorReportLimit) { 1287 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1288 } 1289 VIXL_CHECK(error_count == 0); 1290 } 1291 delete[] results; 1292 } 1293 1294 1295 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== 1296 1297 1298 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, 1299 uintptr_t inputs_n, 1300 unsigned inputs_n_length, 1301 uintptr_t results, 1302 VectorFormat vd_form, 1303 VectorFormat vn_form) { 1304 VIXL_ASSERT(vd_form != kFormatUndefined); 1305 VIXL_ASSERT(vn_form != kFormatUndefined); 1306 1307 SETUP(); 1308 START(); 1309 1310 // Roll up the loop to keep the code size down. 1311 Label loop_n; 1312 1313 Register out = x0; 1314 Register inputs_n_base = x1; 1315 Register inputs_n_last_16bytes = x3; 1316 Register index_n = x5; 1317 1318 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1319 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1320 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1321 1322 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1323 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1324 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1325 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1326 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1327 1328 1329 // These will be either a D- or a Q-register form, with a single lane 1330 // (for use in scalar load and store operations). 1331 VRegister vd = VRegister(0, vd_bits); 1332 VRegister vn = v1.V16B(); 1333 VRegister vntmp = v3.V16B(); 1334 1335 // These will have the correct format for use when calling 'helper'. 1336 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 1337 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1338 1339 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1340 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1341 1342 __ Mov(out, results); 1343 1344 __ Mov(inputs_n_base, inputs_n); 1345 __ Mov(inputs_n_last_16bytes, 1346 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 1347 1348 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1349 1350 __ Mov(index_n, 0); 1351 __ Bind(&loop_n); 1352 1353 __ Ldr(vntmp_single, 1354 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1355 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1356 1357 // Set the destination to zero. 1358 // TODO: Setting the destination to values other than zero 1359 // might be a better test for instructions such as sqxtn2 1360 // which may leave parts of V registers unchanged. 1361 __ Movi(vd.V16B(), 0); 1362 1363 { 1364 SingleEmissionCheckScope guard(&masm); 1365 (masm.*helper)(vd_helper, vn_helper); 1366 } 1367 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1368 1369 __ Add(index_n, index_n, 1); 1370 __ Cmp(index_n, inputs_n_length); 1371 __ B(lo, &loop_n); 1372 1373 END(); 1374 RUN(); 1375 TEARDOWN(); 1376 } 1377 1378 1379 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1380 // arrays of rawbit representation of input values. This ensures that 1381 // exact bit comparisons can be performed. 1382 template <typename Td, typename Tn> 1383 static void Test1OpNEON(const char* name, 1384 Test1OpNEONHelper_t helper, 1385 const Tn inputs_n[], 1386 unsigned inputs_n_length, 1387 const Td expected[], 1388 unsigned expected_length, 1389 VectorFormat vd_form, 1390 VectorFormat vn_form) { 1391 VIXL_ASSERT(inputs_n_length > 0); 1392 1393 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1394 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1395 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1396 1397 const unsigned results_length = inputs_n_length; 1398 Td* results = new Td[results_length * vd_lane_count]; 1399 const unsigned lane_bit = sizeof(Td) * 8; 1400 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1401 1402 Test1OpNEON_Helper(helper, 1403 reinterpret_cast<uintptr_t>(inputs_n), 1404 inputs_n_length, 1405 reinterpret_cast<uintptr_t>(results), 1406 vd_form, 1407 vn_form); 1408 1409 if (Test::generate_test_trace()) { 1410 // Print the results. 1411 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1412 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1413 printf(" "); 1414 // Output a separate result for each element of the result vector. 1415 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1416 unsigned index = lane + (iteration * vd_lane_count); 1417 printf(" 0x%0*" PRIx64 ",", 1418 lane_len_in_hex, 1419 static_cast<uint64_t>(results[index])); 1420 } 1421 printf("\n"); 1422 } 1423 1424 printf("};\n"); 1425 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1426 name, 1427 results_length); 1428 } else { 1429 // Check the results. 1430 VIXL_CHECK(expected_length == results_length); 1431 unsigned error_count = 0; 1432 unsigned d = 0; 1433 const char* padding = " "; 1434 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1435 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1436 bool error_in_vector = false; 1437 1438 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1439 unsigned output_index = (n * vd_lane_count) + lane; 1440 1441 if (results[output_index] != expected[output_index]) { 1442 error_in_vector = true; 1443 break; 1444 } 1445 } 1446 1447 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1448 printf("%s\n", name); 1449 printf(" Vn%.*s| Vd%.*s| Expected\n", 1450 lane_len_in_hex + 1, 1451 padding, 1452 lane_len_in_hex + 1, 1453 padding); 1454 1455 const unsigned first_index_n = 1456 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 1457 1458 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 1459 lane++) { 1460 unsigned output_index = (n * vd_lane_count) + lane; 1461 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 1462 1463 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 1464 " " 1465 "| 0x%0*" PRIx64 "\n", 1466 results[output_index] != expected[output_index] ? '*' : ' ', 1467 lane_len_in_hex, 1468 static_cast<uint64_t>(inputs_n[input_index_n]), 1469 lane_len_in_hex, 1470 static_cast<uint64_t>(results[output_index]), 1471 lane_len_in_hex, 1472 static_cast<uint64_t>(expected[output_index])); 1473 } 1474 } 1475 } 1476 VIXL_ASSERT(d == expected_length); 1477 if (error_count > kErrorReportLimit) { 1478 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1479 } 1480 VIXL_CHECK(error_count == 0); 1481 } 1482 delete[] results; 1483 } 1484 1485 1486 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== 1487 // where <V> is one of B, H, S or D registers. 1488 // e.g. saddlv H1, v0.8B 1489 1490 // TODO: Change tests to store all lanes of the resulting V register. 1491 // Some tests store all 128 bits of the resulting V register to 1492 // check the simulator's behaviour on the rest of the register. 1493 // This is better than storing the affected lanes only. 1494 // Change any tests such as the 'Across' template to do the same. 1495 1496 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, 1497 uintptr_t inputs_n, 1498 unsigned inputs_n_length, 1499 uintptr_t results, 1500 VectorFormat vd_form, 1501 VectorFormat vn_form) { 1502 VIXL_ASSERT(vd_form != kFormatUndefined); 1503 VIXL_ASSERT(vn_form != kFormatUndefined); 1504 1505 SETUP(); 1506 START(); 1507 1508 // Roll up the loop to keep the code size down. 1509 Label loop_n; 1510 1511 Register out = x0; 1512 Register inputs_n_base = x1; 1513 Register inputs_n_last_vector = x3; 1514 Register index_n = x5; 1515 1516 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1517 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1518 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1519 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1520 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1521 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1522 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1523 1524 // Test destructive operations by (arbitrarily) using the same register for 1525 // B and S lane sizes. 1526 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); 1527 1528 // Create two aliases for v0; the first is the destination for the tested 1529 // instruction, the second, the whole Q register to check the results. 1530 VRegister vd = VRegister(0, vd_bits); 1531 VRegister vdstr = VRegister(0, kQRegSize); 1532 1533 VRegister vn = VRegister(1, vn_bits); 1534 VRegister vntmp = VRegister(3, vn_bits); 1535 1536 // These will have the correct format for use when calling 'helper'. 1537 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count); 1538 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1539 1540 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1541 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1542 1543 // Same registers for use in the 'ext' instructions. 1544 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 1545 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 1546 1547 __ Mov(out, results); 1548 1549 __ Mov(inputs_n_base, inputs_n); 1550 __ Mov(inputs_n_last_vector, 1551 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 1552 1553 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 1554 1555 __ Mov(index_n, 0); 1556 __ Bind(&loop_n); 1557 1558 __ Ldr(vntmp_single, 1559 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1560 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 1561 1562 if (destructive) { 1563 __ Mov(vd_helper, vn_helper); 1564 SingleEmissionCheckScope guard(&masm); 1565 (masm.*helper)(vd, vd_helper); 1566 } else { 1567 SingleEmissionCheckScope guard(&masm); 1568 (masm.*helper)(vd, vn_helper); 1569 } 1570 1571 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex)); 1572 1573 __ Add(index_n, index_n, 1); 1574 __ Cmp(index_n, inputs_n_length); 1575 __ B(lo, &loop_n); 1576 1577 END(); 1578 RUN(); 1579 TEARDOWN(); 1580 } 1581 1582 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1583 // arrays of rawbit representation of input values. This ensures that 1584 // exact bit comparisons can be performed. 1585 template <typename Td, typename Tn> 1586 static void Test1OpAcrossNEON(const char* name, 1587 Test1OpNEONHelper_t helper, 1588 const Tn inputs_n[], 1589 unsigned inputs_n_length, 1590 const Td expected[], 1591 unsigned expected_length, 1592 VectorFormat vd_form, 1593 VectorFormat vn_form) { 1594 VIXL_ASSERT(inputs_n_length > 0); 1595 1596 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1597 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); 1598 1599 const unsigned results_length = inputs_n_length; 1600 Td* results = new Td[results_length * vd_lanes_per_q]; 1601 const unsigned lane_bit = sizeof(Td) * 8; 1602 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1603 1604 Test1OpAcrossNEON_Helper(helper, 1605 reinterpret_cast<uintptr_t>(inputs_n), 1606 inputs_n_length, 1607 reinterpret_cast<uintptr_t>(results), 1608 vd_form, 1609 vn_form); 1610 1611 if (Test::generate_test_trace()) { 1612 // Print the results. 1613 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1614 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1615 printf(" "); 1616 // Output a separate result for each element of the result vector. 1617 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1618 unsigned index = lane + (iteration * vd_lane_count); 1619 printf(" 0x%0*" PRIx64 ",", 1620 lane_len_in_hex, 1621 static_cast<uint64_t>(results[index])); 1622 } 1623 printf("\n"); 1624 } 1625 1626 printf("};\n"); 1627 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1628 name, 1629 results_length); 1630 } else { 1631 // Check the results. 1632 VIXL_CHECK(expected_length == results_length); 1633 unsigned error_count = 0; 1634 unsigned d = 0; 1635 const char* padding = " "; 1636 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1637 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1638 bool error_in_vector = false; 1639 1640 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1641 unsigned expected_index = (n * vd_lane_count) + lane; 1642 unsigned results_index = (n * vd_lanes_per_q) + lane; 1643 1644 if (results[results_index] != expected[expected_index]) { 1645 error_in_vector = true; 1646 break; 1647 } 1648 } 1649 1650 // For across operations, the remaining lanes should be zero. 1651 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { 1652 unsigned results_index = (n * vd_lanes_per_q) + lane; 1653 if (results[results_index] != 0) { 1654 error_in_vector = true; 1655 break; 1656 } 1657 } 1658 1659 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1660 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1661 1662 printf("%s\n", name); 1663 printf(" Vn%.*s| Vd%.*s| Expected\n", 1664 lane_len_in_hex + 1, 1665 padding, 1666 lane_len_in_hex + 1, 1667 padding); 1668 1669 // TODO: In case of an error, all tests print out as many elements as 1670 // there are lanes in the output or input vectors. This way 1671 // the viewer can read all the values that were needed for the 1672 // operation but the output contains also unnecessary values. 1673 // These prints can be improved according to the arguments 1674 // passed to test functions. 1675 // This output for the 'Across' category has the required 1676 // modifications. 1677 for (unsigned lane = 0; lane < vn_lane_count; lane++) { 1678 unsigned results_index = 1679 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); 1680 unsigned input_index_n = 1681 (inputs_n_length - vn_lane_count + n + 1 + lane) % 1682 inputs_n_length; 1683 1684 Td expect = 0; 1685 if ((vn_lane_count - 1) == lane) { 1686 // This is the last lane to be printed, ie. the least-significant 1687 // lane, so use the expected value; any other lane should be zero. 1688 unsigned expected_index = n * vd_lane_count; 1689 expect = expected[expected_index]; 1690 } 1691 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1692 results[results_index] != expect ? '*' : ' ', 1693 lane_len_in_hex, 1694 static_cast<uint64_t>(inputs_n[input_index_n]), 1695 lane_len_in_hex, 1696 static_cast<uint64_t>(results[results_index]), 1697 lane_len_in_hex, 1698 static_cast<uint64_t>(expect)); 1699 } 1700 } 1701 } 1702 VIXL_ASSERT(d == expected_length); 1703 if (error_count > kErrorReportLimit) { 1704 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1705 } 1706 VIXL_CHECK(error_count == 0); 1707 } 1708 delete[] results; 1709 } 1710 1711 1712 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== 1713 1714 // TODO: Iterate over inputs_d once the traces file is split. 1715 1716 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, 1717 uintptr_t inputs_d, 1718 uintptr_t inputs_n, 1719 unsigned inputs_n_length, 1720 uintptr_t inputs_m, 1721 unsigned inputs_m_length, 1722 uintptr_t results, 1723 VectorFormat vd_form, 1724 VectorFormat vn_form, 1725 VectorFormat vm_form) { 1726 VIXL_ASSERT(vd_form != kFormatUndefined); 1727 VIXL_ASSERT(vn_form != kFormatUndefined); 1728 VIXL_ASSERT(vm_form != kFormatUndefined); 1729 1730 SETUP(); 1731 START(); 1732 1733 // Roll up the loop to keep the code size down. 1734 Label loop_n, loop_m; 1735 1736 Register out = x0; 1737 Register inputs_n_base = x1; 1738 Register inputs_m_base = x2; 1739 Register inputs_d_base = x3; 1740 Register inputs_n_last_16bytes = x4; 1741 Register inputs_m_last_16bytes = x5; 1742 Register index_n = x6; 1743 Register index_m = x7; 1744 1745 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1746 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1747 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1748 1749 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1750 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1751 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1752 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1753 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1754 1755 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1756 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1757 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1758 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1759 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1760 1761 1762 // Always load and store 128 bits regardless of the format. 1763 VRegister vd = v0.V16B(); 1764 VRegister vn = v1.V16B(); 1765 VRegister vm = v2.V16B(); 1766 VRegister vntmp = v3.V16B(); 1767 VRegister vmtmp = v4.V16B(); 1768 VRegister vres = v5.V16B(); 1769 1770 // These will have the correct format for calling the 'helper'. 1771 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1772 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1773 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1774 1775 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1776 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1777 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1778 1779 __ Mov(out, results); 1780 1781 __ Mov(inputs_d_base, inputs_d); 1782 1783 __ Mov(inputs_n_base, inputs_n); 1784 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1785 __ Mov(inputs_m_base, inputs_m); 1786 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1787 1788 __ Ldr(vd, MemOperand(inputs_d_base)); 1789 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1790 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1791 1792 __ Mov(index_n, 0); 1793 __ Bind(&loop_n); 1794 1795 __ Ldr(vntmp_single, 1796 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1797 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1798 1799 __ Mov(index_m, 0); 1800 __ Bind(&loop_m); 1801 1802 __ Ldr(vmtmp_single, 1803 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 1804 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1805 1806 __ Mov(vres, vd); 1807 { 1808 SingleEmissionCheckScope guard(&masm); 1809 (masm.*helper)(vres_helper, vn_helper, vm_helper); 1810 } 1811 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1812 1813 __ Add(index_m, index_m, 1); 1814 __ Cmp(index_m, inputs_m_length); 1815 __ B(lo, &loop_m); 1816 1817 __ Add(index_n, index_n, 1); 1818 __ Cmp(index_n, inputs_n_length); 1819 __ B(lo, &loop_n); 1820 1821 END(); 1822 RUN(); 1823 TEARDOWN(); 1824 } 1825 1826 1827 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1828 // arrays of rawbit representation of input values. This ensures that 1829 // exact bit comparisons can be performed. 1830 template <typename Td, typename Tn, typename Tm> 1831 static void Test2OpNEON(const char* name, 1832 Test2OpNEONHelper_t helper, 1833 const Td inputs_d[], 1834 const Tn inputs_n[], 1835 unsigned inputs_n_length, 1836 const Tm inputs_m[], 1837 unsigned inputs_m_length, 1838 const Td expected[], 1839 unsigned expected_length, 1840 VectorFormat vd_form, 1841 VectorFormat vn_form, 1842 VectorFormat vm_form) { 1843 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 1844 1845 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 1846 1847 const unsigned results_length = inputs_n_length * inputs_m_length; 1848 Td* results = new Td[results_length * vd_lane_count]; 1849 const unsigned lane_bit = sizeof(Td) * 8; 1850 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 1851 1852 Test2OpNEON_Helper(helper, 1853 reinterpret_cast<uintptr_t>(inputs_d), 1854 reinterpret_cast<uintptr_t>(inputs_n), 1855 inputs_n_length, 1856 reinterpret_cast<uintptr_t>(inputs_m), 1857 inputs_m_length, 1858 reinterpret_cast<uintptr_t>(results), 1859 vd_form, 1860 vn_form, 1861 vm_form); 1862 1863 if (Test::generate_test_trace()) { 1864 // Print the results. 1865 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1866 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1867 printf(" "); 1868 // Output a separate result for each element of the result vector. 1869 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1870 unsigned index = lane + (iteration * vd_lane_count); 1871 printf(" 0x%0*" PRIx64 ",", 1872 lane_len_in_hex, 1873 static_cast<uint64_t>(results[index])); 1874 } 1875 printf("\n"); 1876 } 1877 1878 printf("};\n"); 1879 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1880 name, 1881 results_length); 1882 } else { 1883 // Check the results. 1884 VIXL_CHECK(expected_length == results_length); 1885 unsigned error_count = 0; 1886 unsigned d = 0; 1887 const char* padding = " "; 1888 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1889 for (unsigned n = 0; n < inputs_n_length; n++) { 1890 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 1891 bool error_in_vector = false; 1892 1893 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1894 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1895 (m * vd_lane_count) + lane; 1896 1897 if (results[output_index] != expected[output_index]) { 1898 error_in_vector = true; 1899 break; 1900 } 1901 } 1902 1903 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1904 printf("%s\n", name); 1905 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", 1906 lane_len_in_hex + 1, 1907 padding, 1908 lane_len_in_hex + 1, 1909 padding, 1910 lane_len_in_hex + 1, 1911 padding, 1912 lane_len_in_hex + 1, 1913 padding); 1914 1915 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1916 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 1917 (m * vd_lane_count) + lane; 1918 unsigned input_index_n = 1919 (inputs_n_length - vd_lane_count + n + 1 + lane) % 1920 inputs_n_length; 1921 unsigned input_index_m = 1922 (inputs_m_length - vd_lane_count + m + 1 + lane) % 1923 inputs_m_length; 1924 1925 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 1926 " " 1927 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1928 results[output_index] != expected[output_index] ? '*' : ' ', 1929 lane_len_in_hex, 1930 static_cast<uint64_t>(inputs_d[lane]), 1931 lane_len_in_hex, 1932 static_cast<uint64_t>(inputs_n[input_index_n]), 1933 lane_len_in_hex, 1934 static_cast<uint64_t>(inputs_m[input_index_m]), 1935 lane_len_in_hex, 1936 static_cast<uint64_t>(results[output_index]), 1937 lane_len_in_hex, 1938 static_cast<uint64_t>(expected[output_index])); 1939 } 1940 } 1941 } 1942 } 1943 VIXL_ASSERT(d == expected_length); 1944 if (error_count > kErrorReportLimit) { 1945 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1946 } 1947 VIXL_CHECK(error_count == 0); 1948 } 1949 delete[] results; 1950 } 1951 1952 1953 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== 1954 1955 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, 1956 uintptr_t inputs_d, 1957 uintptr_t inputs_n, 1958 unsigned inputs_n_length, 1959 uintptr_t inputs_m, 1960 unsigned inputs_m_length, 1961 const int indices[], 1962 unsigned indices_length, 1963 uintptr_t results, 1964 VectorFormat vd_form, 1965 VectorFormat vn_form, 1966 VectorFormat vm_form) { 1967 VIXL_ASSERT(vd_form != kFormatUndefined); 1968 VIXL_ASSERT(vn_form != kFormatUndefined); 1969 VIXL_ASSERT(vm_form != kFormatUndefined); 1970 1971 SETUP(); 1972 START(); 1973 1974 // Roll up the loop to keep the code size down. 1975 Label loop_n, loop_m; 1976 1977 Register out = x0; 1978 Register inputs_n_base = x1; 1979 Register inputs_m_base = x2; 1980 Register inputs_d_base = x3; 1981 Register inputs_n_last_16bytes = x4; 1982 Register inputs_m_last_16bytes = x5; 1983 Register index_n = x6; 1984 Register index_m = x7; 1985 1986 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1987 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1988 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1989 1990 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1991 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1992 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1993 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1994 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1995 1996 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1997 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1998 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1999 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 2000 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 2001 2002 2003 // Always load and store 128 bits regardless of the format. 2004 VRegister vd = v0.V16B(); 2005 VRegister vn = v1.V16B(); 2006 VRegister vm = v2.V16B(); 2007 VRegister vntmp = v3.V16B(); 2008 VRegister vmtmp = v4.V16B(); 2009 VRegister vres = v5.V16B(); 2010 2011 // These will have the correct format for calling the 'helper'. 2012 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2013 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 2014 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2015 2016 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2017 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2018 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 2019 2020 __ Mov(out, results); 2021 2022 __ Mov(inputs_d_base, inputs_d); 2023 2024 __ Mov(inputs_n_base, inputs_n); 2025 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 2026 __ Mov(inputs_m_base, inputs_m); 2027 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 2028 2029 __ Ldr(vd, MemOperand(inputs_d_base)); 2030 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2031 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 2032 2033 __ Mov(index_n, 0); 2034 __ Bind(&loop_n); 2035 2036 __ Ldr(vntmp_single, 2037 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2038 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2039 2040 __ Mov(index_m, 0); 2041 __ Bind(&loop_m); 2042 2043 __ Ldr(vmtmp_single, 2044 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 2045 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 2046 2047 __ Mov(vres, vd); 2048 { 2049 for (unsigned i = 0; i < indices_length; i++) { 2050 { 2051 SingleEmissionCheckScope guard(&masm); 2052 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); 2053 } 2054 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2055 } 2056 } 2057 2058 __ Add(index_m, index_m, 1); 2059 __ Cmp(index_m, inputs_m_length); 2060 __ B(lo, &loop_m); 2061 2062 __ Add(index_n, index_n, 1); 2063 __ Cmp(index_n, inputs_n_length); 2064 __ B(lo, &loop_n); 2065 2066 END(); 2067 RUN(); 2068 TEARDOWN(); 2069 } 2070 2071 2072 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2073 // arrays of rawbit representation of input values. This ensures that 2074 // exact bit comparisons can be performed. 2075 template <typename Td, typename Tn, typename Tm> 2076 static void TestByElementNEON(const char* name, 2077 TestByElementNEONHelper_t helper, 2078 const Td inputs_d[], 2079 const Tn inputs_n[], 2080 unsigned inputs_n_length, 2081 const Tm inputs_m[], 2082 unsigned inputs_m_length, 2083 const int indices[], 2084 unsigned indices_length, 2085 const Td expected[], 2086 unsigned expected_length, 2087 VectorFormat vd_form, 2088 VectorFormat vn_form, 2089 VectorFormat vm_form) { 2090 VIXL_ASSERT(inputs_n_length > 0); 2091 VIXL_ASSERT(inputs_m_length > 0); 2092 VIXL_ASSERT(indices_length > 0); 2093 2094 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 2095 2096 const unsigned results_length = 2097 inputs_n_length * inputs_m_length * indices_length; 2098 Td* results = new Td[results_length * vd_lane_count]; 2099 const unsigned lane_bit = sizeof(Td) * 8; 2100 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 2101 2102 TestByElementNEON_Helper(helper, 2103 reinterpret_cast<uintptr_t>(inputs_d), 2104 reinterpret_cast<uintptr_t>(inputs_n), 2105 inputs_n_length, 2106 reinterpret_cast<uintptr_t>(inputs_m), 2107 inputs_m_length, 2108 indices, 2109 indices_length, 2110 reinterpret_cast<uintptr_t>(results), 2111 vd_form, 2112 vn_form, 2113 vm_form); 2114 2115 if (Test::generate_test_trace()) { 2116 // Print the results. 2117 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2118 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2119 printf(" "); 2120 // Output a separate result for each element of the result vector. 2121 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2122 unsigned index = lane + (iteration * vd_lane_count); 2123 printf(" 0x%0*" PRIx64 ",", 2124 lane_len_in_hex, 2125 static_cast<uint64_t>(results[index])); 2126 } 2127 printf("\n"); 2128 } 2129 2130 printf("};\n"); 2131 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2132 name, 2133 results_length); 2134 } else { 2135 // Check the results. 2136 VIXL_CHECK(expected_length == results_length); 2137 unsigned error_count = 0; 2138 unsigned d = 0; 2139 const char* padding = " "; 2140 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2141 for (unsigned n = 0; n < inputs_n_length; n++) { 2142 for (unsigned m = 0; m < inputs_m_length; m++) { 2143 for (unsigned index = 0; index < indices_length; index++, d++) { 2144 bool error_in_vector = false; 2145 2146 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2147 unsigned output_index = 2148 (n * inputs_m_length * indices_length * vd_lane_count) + 2149 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + 2150 lane; 2151 2152 if (results[output_index] != expected[output_index]) { 2153 error_in_vector = true; 2154 break; 2155 } 2156 } 2157 2158 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2159 printf("%s\n", name); 2160 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", 2161 lane_len_in_hex + 1, 2162 padding, 2163 lane_len_in_hex + 1, 2164 padding, 2165 lane_len_in_hex + 1, 2166 padding, 2167 lane_len_in_hex + 1, 2168 padding); 2169 2170 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2171 unsigned output_index = 2172 (n * inputs_m_length * indices_length * vd_lane_count) + 2173 (m * indices_length * vd_lane_count) + 2174 (index * vd_lane_count) + lane; 2175 unsigned input_index_n = 2176 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2177 inputs_n_length; 2178 unsigned input_index_m = 2179 (inputs_m_length - vd_lane_count + m + 1 + lane) % 2180 inputs_m_length; 2181 2182 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2183 " " 2184 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2185 results[output_index] != expected[output_index] ? '*' 2186 : ' ', 2187 lane_len_in_hex, 2188 static_cast<uint64_t>(inputs_d[lane]), 2189 lane_len_in_hex, 2190 static_cast<uint64_t>(inputs_n[input_index_n]), 2191 lane_len_in_hex, 2192 static_cast<uint64_t>(inputs_m[input_index_m]), 2193 indices[index], 2194 lane_len_in_hex, 2195 static_cast<uint64_t>(results[output_index]), 2196 lane_len_in_hex, 2197 static_cast<uint64_t>(expected[output_index])); 2198 } 2199 } 2200 } 2201 } 2202 } 2203 VIXL_ASSERT(d == expected_length); 2204 if (error_count > kErrorReportLimit) { 2205 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2206 } 2207 VIXL_CHECK(error_count == 0); 2208 } 2209 delete[] results; 2210 } 2211 2212 2213 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== 2214 2215 2216 template <typename Tm> 2217 void Test2OpImmNEON_Helper( 2218 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2219 uintptr_t inputs_n, 2220 unsigned inputs_n_length, 2221 const Tm inputs_m[], 2222 unsigned inputs_m_length, 2223 uintptr_t results, 2224 VectorFormat vd_form, 2225 VectorFormat vn_form) { 2226 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined); 2227 2228 SETUP(); 2229 START(); 2230 2231 // Roll up the loop to keep the code size down. 2232 Label loop_n; 2233 2234 Register out = x0; 2235 Register inputs_n_base = x1; 2236 Register inputs_n_last_16bytes = x3; 2237 Register index_n = x5; 2238 2239 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2240 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2241 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2242 2243 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2244 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2245 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2246 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2247 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2248 2249 2250 // These will be either a D- or a Q-register form, with a single lane 2251 // (for use in scalar load and store operations). 2252 VRegister vd = VRegister(0, vd_bits); 2253 VRegister vn = v1.V16B(); 2254 VRegister vntmp = v3.V16B(); 2255 2256 // These will have the correct format for use when calling 'helper'. 2257 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 2258 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2259 2260 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2261 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2262 2263 __ Mov(out, results); 2264 2265 __ Mov(inputs_n_base, inputs_n); 2266 __ Mov(inputs_n_last_16bytes, 2267 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 2268 2269 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2270 2271 __ Mov(index_n, 0); 2272 __ Bind(&loop_n); 2273 2274 __ Ldr(vntmp_single, 2275 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2276 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2277 2278 // Set the destination to zero for tests such as '[r]shrn2'. 2279 // TODO: Setting the destination to values other than zero might be a better 2280 // test for shift and accumulate instructions (srsra/ssra/usra/ursra). 2281 __ Movi(vd.V16B(), 0); 2282 2283 { 2284 for (unsigned i = 0; i < inputs_m_length; i++) { 2285 { 2286 SingleEmissionCheckScope guard(&masm); 2287 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); 2288 } 2289 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2290 } 2291 } 2292 2293 __ Add(index_n, index_n, 1); 2294 __ Cmp(index_n, inputs_n_length); 2295 __ B(lo, &loop_n); 2296 2297 END(); 2298 RUN(); 2299 TEARDOWN(); 2300 } 2301 2302 2303 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2304 // arrays of rawbit representation of input values. This ensures that 2305 // exact bit comparisons can be performed. 2306 template <typename Td, typename Tn, typename Tm> 2307 static void Test2OpImmNEON( 2308 const char* name, 2309 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2310 const Tn inputs_n[], 2311 unsigned inputs_n_length, 2312 const Tm inputs_m[], 2313 unsigned inputs_m_length, 2314 const Td expected[], 2315 unsigned expected_length, 2316 VectorFormat vd_form, 2317 VectorFormat vn_form) { 2318 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2319 2320 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2321 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2322 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2323 2324 const unsigned results_length = inputs_n_length * inputs_m_length; 2325 Td* results = new Td[results_length * vd_lane_count]; 2326 const unsigned lane_bit = sizeof(Td) * 8; 2327 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2328 2329 Test2OpImmNEON_Helper(helper, 2330 reinterpret_cast<uintptr_t>(inputs_n), 2331 inputs_n_length, 2332 inputs_m, 2333 inputs_m_length, 2334 reinterpret_cast<uintptr_t>(results), 2335 vd_form, 2336 vn_form); 2337 2338 if (Test::generate_test_trace()) { 2339 // Print the results. 2340 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2341 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2342 printf(" "); 2343 // Output a separate result for each element of the result vector. 2344 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2345 unsigned index = lane + (iteration * vd_lane_count); 2346 printf(" 0x%0*" PRIx64 ",", 2347 lane_len_in_hex, 2348 static_cast<uint64_t>(results[index])); 2349 } 2350 printf("\n"); 2351 } 2352 2353 printf("};\n"); 2354 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2355 name, 2356 results_length); 2357 } else { 2358 // Check the results. 2359 VIXL_CHECK(expected_length == results_length); 2360 unsigned error_count = 0; 2361 unsigned d = 0; 2362 const char* padding = " "; 2363 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2364 for (unsigned n = 0; n < inputs_n_length; n++) { 2365 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2366 bool error_in_vector = false; 2367 2368 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2369 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2370 (m * vd_lane_count) + lane; 2371 2372 if (results[output_index] != expected[output_index]) { 2373 error_in_vector = true; 2374 break; 2375 } 2376 } 2377 2378 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2379 printf("%s\n", name); 2380 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2381 lane_len_in_hex + 1, 2382 padding, 2383 lane_len_in_hex, 2384 padding, 2385 lane_len_in_hex + 1, 2386 padding); 2387 2388 const unsigned first_index_n = 2389 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 2390 2391 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 2392 lane++) { 2393 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2394 (m * vd_lane_count) + lane; 2395 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 2396 unsigned input_index_m = m; 2397 2398 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 2399 " " 2400 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2401 results[output_index] != expected[output_index] ? '*' : ' ', 2402 lane_len_in_hex, 2403 static_cast<uint64_t>(inputs_n[input_index_n]), 2404 lane_len_in_hex, 2405 static_cast<uint64_t>(inputs_m[input_index_m]), 2406 lane_len_in_hex, 2407 static_cast<uint64_t>(results[output_index]), 2408 lane_len_in_hex, 2409 static_cast<uint64_t>(expected[output_index])); 2410 } 2411 } 2412 } 2413 } 2414 VIXL_ASSERT(d == expected_length); 2415 if (error_count > kErrorReportLimit) { 2416 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2417 } 2418 VIXL_CHECK(error_count == 0); 2419 } 2420 delete[] results; 2421 } 2422 2423 2424 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== 2425 2426 2427 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, 2428 uintptr_t inputs_d, 2429 const int inputs_imm1[], 2430 unsigned inputs_imm1_length, 2431 uintptr_t inputs_n, 2432 unsigned inputs_n_length, 2433 const int inputs_imm2[], 2434 unsigned inputs_imm2_length, 2435 uintptr_t results, 2436 VectorFormat vd_form, 2437 VectorFormat vn_form) { 2438 VIXL_ASSERT(vd_form != kFormatUndefined); 2439 VIXL_ASSERT(vn_form != kFormatUndefined); 2440 2441 SETUP(); 2442 START(); 2443 2444 // Roll up the loop to keep the code size down. 2445 Label loop_n; 2446 2447 Register out = x0; 2448 Register inputs_d_base = x1; 2449 Register inputs_n_base = x2; 2450 Register inputs_n_last_vector = x4; 2451 Register index_n = x6; 2452 2453 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2454 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2455 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2456 2457 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2458 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2459 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2460 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2461 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2462 2463 2464 // These will be either a D- or a Q-register form, with a single lane 2465 // (for use in scalar load and store operations). 2466 VRegister vd = VRegister(0, vd_bits); 2467 VRegister vn = VRegister(1, vn_bits); 2468 VRegister vntmp = VRegister(4, vn_bits); 2469 VRegister vres = VRegister(5, vn_bits); 2470 2471 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2472 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2473 2474 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2475 VRegister vntmp_single = VRegister(4, vn_lane_bits); 2476 2477 // Same registers for use in the 'ext' instructions. 2478 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 2479 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 2480 2481 __ Mov(out, results); 2482 2483 __ Mov(inputs_d_base, inputs_d); 2484 2485 __ Mov(inputs_n_base, inputs_n); 2486 __ Mov(inputs_n_last_vector, 2487 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 2488 2489 __ Ldr(vd, MemOperand(inputs_d_base)); 2490 2491 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 2492 2493 __ Mov(index_n, 0); 2494 __ Bind(&loop_n); 2495 2496 __ Ldr(vntmp_single, 2497 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2498 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 2499 2500 { 2501 EmissionCheckScope guard(&masm, 2502 kInstructionSize * inputs_imm1_length * 2503 inputs_imm2_length * 3); 2504 for (unsigned i = 0; i < inputs_imm1_length; i++) { 2505 for (unsigned j = 0; j < inputs_imm2_length; j++) { 2506 __ Mov(vres, vd); 2507 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); 2508 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2509 } 2510 } 2511 } 2512 2513 __ Add(index_n, index_n, 1); 2514 __ Cmp(index_n, inputs_n_length); 2515 __ B(lo, &loop_n); 2516 2517 END(); 2518 RUN(); 2519 TEARDOWN(); 2520 } 2521 2522 2523 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2524 // arrays of rawbit representation of input values. This ensures that 2525 // exact bit comparisons can be performed. 2526 template <typename Td, typename Tn> 2527 static void TestOpImmOpImmNEON(const char* name, 2528 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2529 const Td inputs_d[], 2530 const int inputs_imm1[], 2531 unsigned inputs_imm1_length, 2532 const Tn inputs_n[], 2533 unsigned inputs_n_length, 2534 const int inputs_imm2[], 2535 unsigned inputs_imm2_length, 2536 const Td expected[], 2537 unsigned expected_length, 2538 VectorFormat vd_form, 2539 VectorFormat vn_form) { 2540 VIXL_ASSERT(inputs_n_length > 0); 2541 VIXL_ASSERT(inputs_imm1_length > 0); 2542 VIXL_ASSERT(inputs_imm2_length > 0); 2543 2544 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2545 2546 const unsigned results_length = 2547 inputs_n_length * inputs_imm1_length * inputs_imm2_length; 2548 2549 Td* results = new Td[results_length * vd_lane_count]; 2550 const unsigned lane_bit = sizeof(Td) * 8; 2551 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2552 2553 TestOpImmOpImmNEON_Helper(helper, 2554 reinterpret_cast<uintptr_t>(inputs_d), 2555 inputs_imm1, 2556 inputs_imm1_length, 2557 reinterpret_cast<uintptr_t>(inputs_n), 2558 inputs_n_length, 2559 inputs_imm2, 2560 inputs_imm2_length, 2561 reinterpret_cast<uintptr_t>(results), 2562 vd_form, 2563 vn_form); 2564 2565 if (Test::generate_test_trace()) { 2566 // Print the results. 2567 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2568 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2569 printf(" "); 2570 // Output a separate result for each element of the result vector. 2571 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2572 unsigned index = lane + (iteration * vd_lane_count); 2573 printf(" 0x%0*" PRIx64 ",", 2574 lane_len_in_hex, 2575 static_cast<uint64_t>(results[index])); 2576 } 2577 printf("\n"); 2578 } 2579 2580 printf("};\n"); 2581 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2582 name, 2583 results_length); 2584 } else { 2585 // Check the results. 2586 VIXL_CHECK(expected_length == results_length); 2587 unsigned error_count = 0; 2588 unsigned counted_length = 0; 2589 const char* padding = " "; 2590 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2591 for (unsigned n = 0; n < inputs_n_length; n++) { 2592 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { 2593 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { 2594 bool error_in_vector = false; 2595 2596 counted_length++; 2597 2598 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2599 unsigned output_index = 2600 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + 2601 (imm1 * inputs_imm2_length * vd_lane_count) + 2602 (imm2 * vd_lane_count) + lane; 2603 2604 if (results[output_index] != expected[output_index]) { 2605 error_in_vector = true; 2606 break; 2607 } 2608 } 2609 2610 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2611 printf("%s\n", name); 2612 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2613 lane_len_in_hex + 1, 2614 padding, 2615 lane_len_in_hex, 2616 padding, 2617 lane_len_in_hex + 1, 2618 padding, 2619 lane_len_in_hex, 2620 padding, 2621 lane_len_in_hex + 1, 2622 padding); 2623 2624 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2625 unsigned output_index = 2626 (n * inputs_imm1_length * inputs_imm2_length * 2627 vd_lane_count) + 2628 (imm1 * inputs_imm2_length * vd_lane_count) + 2629 (imm2 * vd_lane_count) + lane; 2630 unsigned input_index_n = 2631 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2632 inputs_n_length; 2633 unsigned input_index_imm1 = imm1; 2634 unsigned input_index_imm2 = imm2; 2635 2636 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2637 " " 2638 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2639 results[output_index] != expected[output_index] ? '*' 2640 : ' ', 2641 lane_len_in_hex, 2642 static_cast<uint64_t>(inputs_d[lane]), 2643 lane_len_in_hex, 2644 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), 2645 lane_len_in_hex, 2646 static_cast<uint64_t>(inputs_n[input_index_n]), 2647 lane_len_in_hex, 2648 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), 2649 lane_len_in_hex, 2650 static_cast<uint64_t>(results[output_index]), 2651 lane_len_in_hex, 2652 static_cast<uint64_t>(expected[output_index])); 2653 } 2654 } 2655 } 2656 } 2657 } 2658 VIXL_ASSERT(counted_length == expected_length); 2659 if (error_count > kErrorReportLimit) { 2660 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2661 } 2662 VIXL_CHECK(error_count == 0); 2663 } 2664 delete[] results; 2665 } 2666 2667 2668 // ==== Floating-point tests. ==== 2669 2670 2671 // Standard floating-point test expansion for both double- and single-precision 2672 // operations. 2673 #define STRINGIFY(s) #s 2674 2675 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \ 2676 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \ 2677 &MacroAssembler::mnemonic, \ 2678 input, \ 2679 sizeof(input) / sizeof(input[0]), \ 2680 kExpected_##mnemonic##_##variant, \ 2681 kExpectedCount_##mnemonic##_##variant) 2682 2683 #define DEFINE_TEST_FP(mnemonic, type, input) \ 2684 TEST(mnemonic##_d) { \ 2685 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2686 } \ 2687 TEST(mnemonic##_s) { \ 2688 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2689 } 2690 2691 // TODO: Test with a newer version of valgrind. 2692 // 2693 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64. 2694 // Therefore this test will be exiting though an ASSERT and thus leaking 2695 // memory. 2696 DEFINE_TEST_FP(fmadd, 3Op, Basic) 2697 DEFINE_TEST_FP(fmsub, 3Op, Basic) 2698 DEFINE_TEST_FP(fnmadd, 3Op, Basic) 2699 DEFINE_TEST_FP(fnmsub, 3Op, Basic) 2700 2701 DEFINE_TEST_FP(fadd, 2Op, Basic) 2702 DEFINE_TEST_FP(fdiv, 2Op, Basic) 2703 DEFINE_TEST_FP(fmax, 2Op, Basic) 2704 DEFINE_TEST_FP(fmaxnm, 2Op, Basic) 2705 DEFINE_TEST_FP(fmin, 2Op, Basic) 2706 DEFINE_TEST_FP(fminnm, 2Op, Basic) 2707 DEFINE_TEST_FP(fmul, 2Op, Basic) 2708 DEFINE_TEST_FP(fsub, 2Op, Basic) 2709 DEFINE_TEST_FP(fnmul, 2Op, Basic) 2710 2711 DEFINE_TEST_FP(fabs, 1Op, Basic) 2712 DEFINE_TEST_FP(fmov, 1Op, Basic) 2713 DEFINE_TEST_FP(fneg, 1Op, Basic) 2714 DEFINE_TEST_FP(fsqrt, 1Op, Basic) 2715 DEFINE_TEST_FP(frinta, 1Op, Conversions) 2716 DEFINE_TEST_FP(frinti, 1Op, Conversions) 2717 DEFINE_TEST_FP(frintm, 1Op, Conversions) 2718 DEFINE_TEST_FP(frintn, 1Op, Conversions) 2719 DEFINE_TEST_FP(frintp, 1Op, Conversions) 2720 DEFINE_TEST_FP(frintx, 1Op, Conversions) 2721 DEFINE_TEST_FP(frintz, 1Op, Conversions) 2722 2723 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); } 2724 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); } 2725 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); } 2726 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); } 2727 2728 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); } 2729 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); } 2730 2731 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \ 2732 TEST(mnemonic##_xd) { \ 2733 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \ 2734 } \ 2735 TEST(mnemonic##_xs) { \ 2736 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \ 2737 } \ 2738 TEST(mnemonic##_wd) { \ 2739 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2740 } \ 2741 TEST(mnemonic##_ws) { \ 2742 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \ 2743 } 2744 2745 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions) 2746 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions) 2747 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions) 2748 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions) 2749 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions) 2750 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions) 2751 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions) 2752 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions) 2753 2754 // TODO: Scvtf-fixed-point 2755 // TODO: Scvtf-integer 2756 // TODO: Ucvtf-fixed-point 2757 // TODO: Ucvtf-integer 2758 2759 // TODO: Fccmp 2760 // TODO: Fcsel 2761 2762 2763 // ==== NEON Tests. ==== 2764 2765 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ 2766 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2767 &MacroAssembler::mnemonic, \ 2768 input_n, \ 2769 (sizeof(input_n) / sizeof(input_n[0])), \ 2770 kExpected_NEON_##mnemonic##_##vdform, \ 2771 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2772 kFormat##vdform, \ 2773 kFormat##vnform) 2774 2775 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ 2776 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 2777 vnform), \ 2778 &MacroAssembler::mnemonic, \ 2779 input_n, \ 2780 (sizeof(input_n) / sizeof(input_n[0])), \ 2781 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ 2782 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \ 2783 kFormat##vdform, \ 2784 kFormat##vnform) 2785 2786 #define CALL_TEST_NEON_HELPER_2Op( \ 2787 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \ 2788 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2789 &MacroAssembler::mnemonic, \ 2790 input_d, \ 2791 input_n, \ 2792 (sizeof(input_n) / sizeof(input_n[0])), \ 2793 input_m, \ 2794 (sizeof(input_m) / sizeof(input_m[0])), \ 2795 kExpected_NEON_##mnemonic##_##vdform, \ 2796 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2797 kFormat##vdform, \ 2798 kFormat##vnform, \ 2799 kFormat##vmform) 2800 2801 #define CALL_TEST_NEON_HELPER_2OpImm( \ 2802 mnemonic, vdform, vnform, input_n, input_m) \ 2803 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ 2804 &MacroAssembler::mnemonic, \ 2805 input_n, \ 2806 (sizeof(input_n) / sizeof(input_n[0])), \ 2807 input_m, \ 2808 (sizeof(input_m) / sizeof(input_m[0])), \ 2809 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2810 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ 2811 kFormat##vdform, \ 2812 kFormat##vnform) 2813 2814 #define CALL_TEST_NEON_HELPER_ByElement( \ 2815 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \ 2816 TestByElementNEON( \ 2817 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 2818 vnform) "_" STRINGIFY(vmform), \ 2819 &MacroAssembler::mnemonic, \ 2820 input_d, \ 2821 input_n, \ 2822 (sizeof(input_n) / sizeof(input_n[0])), \ 2823 input_m, \ 2824 (sizeof(input_m) / sizeof(input_m[0])), \ 2825 indices, \ 2826 (sizeof(indices) / sizeof(indices[0])), \ 2827 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2828 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 2829 kFormat##vdform, \ 2830 kFormat##vnform, \ 2831 kFormat##vmform) 2832 2833 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \ 2834 mnemonic, \ 2835 vdform, \ 2836 vnform, \ 2837 input_d, \ 2838 input_imm1, \ 2839 input_n, \ 2840 input_imm2) \ 2841 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2842 helper, \ 2843 input_d, \ 2844 input_imm1, \ 2845 (sizeof(input_imm1) / sizeof(input_imm1[0])), \ 2846 input_n, \ 2847 (sizeof(input_n) / sizeof(input_n[0])), \ 2848 input_imm2, \ 2849 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ 2850 kExpected_NEON_##mnemonic##_##vdform, \ 2851 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2852 kFormat##vdform, \ 2853 kFormat##vnform) 2854 2855 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ 2856 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) 2857 2858 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2859 TEST(mnemonic##_8B) { \ 2860 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ 2861 } \ 2862 TEST(mnemonic##_16B) { \ 2863 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ 2864 } 2865 2866 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ 2867 TEST(mnemonic##_4H) { \ 2868 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ 2869 } \ 2870 TEST(mnemonic##_8H) { \ 2871 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ 2872 } 2873 2874 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2875 TEST(mnemonic##_2S) { \ 2876 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ 2877 } \ 2878 TEST(mnemonic##_4S) { \ 2879 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ 2880 } 2881 2882 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2883 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 2884 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) 2885 2886 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2887 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 2888 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) 2889 2890 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ 2891 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 2892 TEST(mnemonic##_2D) { \ 2893 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2894 } 2895 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ 2896 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 2897 TEST(mnemonic##_2D) { \ 2898 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 2899 } 2900 2901 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 2902 TEST(mnemonic##_2S) { \ 2903 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ 2904 } \ 2905 TEST(mnemonic##_4S) { \ 2906 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ 2907 } \ 2908 TEST(mnemonic##_2D) { \ 2909 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ 2910 } 2911 2912 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ 2913 TEST(mnemonic##_S) { \ 2914 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ 2915 } \ 2916 TEST(mnemonic##_D) { \ 2917 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ 2918 } 2919 2920 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2921 TEST(mnemonic##_B) { \ 2922 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ 2923 } 2924 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2925 TEST(mnemonic##_H) { \ 2926 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ 2927 } 2928 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2929 TEST(mnemonic##_S) { \ 2930 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ 2931 } 2932 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ 2933 TEST(mnemonic##_D) { \ 2934 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ 2935 } 2936 2937 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ 2938 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 2939 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 2940 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2941 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2942 2943 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ 2944 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 2945 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 2946 2947 2948 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ 2949 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) 2950 2951 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ 2952 TEST(mnemonic##_B_8B) { \ 2953 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ 2954 } \ 2955 TEST(mnemonic##_B_16B) { \ 2956 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ 2957 } \ 2958 TEST(mnemonic##_H_4H) { \ 2959 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ 2960 } \ 2961 TEST(mnemonic##_H_8H) { \ 2962 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ 2963 } \ 2964 TEST(mnemonic##_S_4S) { \ 2965 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ 2966 } 2967 2968 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ 2969 TEST(mnemonic##_H_8B) { \ 2970 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ 2971 } \ 2972 TEST(mnemonic##_H_16B) { \ 2973 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ 2974 } \ 2975 TEST(mnemonic##_S_4H) { \ 2976 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ 2977 } \ 2978 TEST(mnemonic##_S_8H) { \ 2979 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ 2980 } \ 2981 TEST(mnemonic##_D_4S) { \ 2982 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ 2983 } 2984 2985 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ 2986 TEST(mnemonic##_S_4S) { \ 2987 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ 2988 } 2989 2990 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ 2991 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) 2992 2993 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ 2994 TEST(mnemonic##_4H) { \ 2995 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ 2996 } \ 2997 TEST(mnemonic##_8H) { \ 2998 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ 2999 } \ 3000 TEST(mnemonic##_2S) { \ 3001 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ 3002 } \ 3003 TEST(mnemonic##_4S) { \ 3004 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ 3005 } \ 3006 TEST(mnemonic##_1D) { \ 3007 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ 3008 } \ 3009 TEST(mnemonic##_2D) { \ 3010 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ 3011 } 3012 3013 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ 3014 TEST(mnemonic##_8B) { \ 3015 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ 3016 } \ 3017 TEST(mnemonic##_4H) { \ 3018 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ 3019 } \ 3020 TEST(mnemonic##_2S) { \ 3021 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ 3022 } \ 3023 TEST(mnemonic##2_16B) { \ 3024 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ 3025 } \ 3026 TEST(mnemonic##2_8H) { \ 3027 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ 3028 } \ 3029 TEST(mnemonic##2_4S) { \ 3030 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ 3031 } 3032 3033 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ 3034 TEST(mnemonic##_4S) { \ 3035 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ 3036 } \ 3037 TEST(mnemonic##_2D) { \ 3038 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ 3039 } \ 3040 TEST(mnemonic##2_4S) { \ 3041 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ 3042 } \ 3043 TEST(mnemonic##2_2D) { \ 3044 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ 3045 } 3046 3047 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ 3048 TEST(mnemonic##_4H) { \ 3049 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ 3050 } \ 3051 TEST(mnemonic##_2S) { \ 3052 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3053 } \ 3054 TEST(mnemonic##2_8H) { \ 3055 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ 3056 } \ 3057 TEST(mnemonic##2_4S) { \ 3058 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3059 } 3060 3061 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ 3062 TEST(mnemonic##_2S) { \ 3063 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3064 } \ 3065 TEST(mnemonic##2_4S) { \ 3066 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3067 } 3068 3069 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ 3070 TEST(mnemonic##_B) { \ 3071 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ 3072 } \ 3073 TEST(mnemonic##_H) { \ 3074 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ 3075 } \ 3076 TEST(mnemonic##_S) { \ 3077 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ 3078 } 3079 3080 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ 3081 TEST(mnemonic##_S) { \ 3082 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ 3083 } \ 3084 TEST(mnemonic##_D) { \ 3085 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ 3086 } 3087 3088 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ 3089 { \ 3090 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3091 variant, \ 3092 variant, \ 3093 variant, \ 3094 input_d, \ 3095 input_nm, \ 3096 input_nm); \ 3097 } 3098 3099 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3100 TEST(mnemonic##_8B) { \ 3101 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3102 8B, \ 3103 kInput8bitsAccDestination, \ 3104 kInput8bits##input); \ 3105 } \ 3106 TEST(mnemonic##_16B) { \ 3107 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3108 16B, \ 3109 kInput8bitsAccDestination, \ 3110 kInput8bits##input); \ 3111 } 3112 3113 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ 3114 TEST(mnemonic##_4H) { \ 3115 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3116 4H, \ 3117 kInput16bitsAccDestination, \ 3118 kInput16bits##input); \ 3119 } \ 3120 TEST(mnemonic##_8H) { \ 3121 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3122 8H, \ 3123 kInput16bitsAccDestination, \ 3124 kInput16bits##input); \ 3125 } \ 3126 TEST(mnemonic##_2S) { \ 3127 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3128 2S, \ 3129 kInput32bitsAccDestination, \ 3130 kInput32bits##input); \ 3131 } \ 3132 TEST(mnemonic##_4S) { \ 3133 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3134 4S, \ 3135 kInput32bitsAccDestination, \ 3136 kInput32bits##input); \ 3137 } 3138 3139 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3140 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3141 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) 3142 3143 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ 3144 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3145 TEST(mnemonic##_2D) { \ 3146 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3147 2D, \ 3148 kInput64bitsAccDestination, \ 3149 kInput64bits##input); \ 3150 } 3151 3152 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ 3153 TEST(mnemonic##_2S) { \ 3154 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3155 2S, \ 3156 kInputFloatAccDestination, \ 3157 kInputFloat##input); \ 3158 } \ 3159 TEST(mnemonic##_4S) { \ 3160 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3161 4S, \ 3162 kInputFloatAccDestination, \ 3163 kInputFloat##input); \ 3164 } \ 3165 TEST(mnemonic##_2D) { \ 3166 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3167 2D, \ 3168 kInputDoubleAccDestination, \ 3169 kInputDouble##input); \ 3170 } 3171 3172 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ 3173 TEST(mnemonic##_D) { \ 3174 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3175 D, \ 3176 kInput64bitsAccDestination, \ 3177 kInput64bits##input); \ 3178 } 3179 3180 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ 3181 TEST(mnemonic##_H) { \ 3182 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3183 H, \ 3184 kInput16bitsAccDestination, \ 3185 kInput16bits##input); \ 3186 } \ 3187 TEST(mnemonic##_S) { \ 3188 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3189 S, \ 3190 kInput32bitsAccDestination, \ 3191 kInput32bits##input); \ 3192 } 3193 3194 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ 3195 TEST(mnemonic##_B) { \ 3196 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3197 B, \ 3198 kInput8bitsAccDestination, \ 3199 kInput8bits##input); \ 3200 } \ 3201 TEST(mnemonic##_H) { \ 3202 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3203 H, \ 3204 kInput16bitsAccDestination, \ 3205 kInput16bits##input); \ 3206 } \ 3207 TEST(mnemonic##_S) { \ 3208 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3209 S, \ 3210 kInput32bitsAccDestination, \ 3211 kInput32bits##input); \ 3212 } \ 3213 TEST(mnemonic##_D) { \ 3214 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3215 D, \ 3216 kInput64bitsAccDestination, \ 3217 kInput64bits##input); \ 3218 } 3219 3220 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ 3221 TEST(mnemonic##_S) { \ 3222 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3223 S, \ 3224 kInputFloatAccDestination, \ 3225 kInputFloat##input); \ 3226 } \ 3227 TEST(mnemonic##_D) { \ 3228 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3229 D, \ 3230 kInputDoubleAccDestination, \ 3231 kInputDouble##input); \ 3232 } 3233 3234 #define CALL_TEST_NEON_HELPER_3DIFF( \ 3235 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \ 3236 { \ 3237 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3238 vdform, \ 3239 vnform, \ 3240 vmform, \ 3241 input_d, \ 3242 input_n, \ 3243 input_m); \ 3244 } 3245 3246 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3247 TEST(mnemonic##_8H) { \ 3248 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3249 8H, \ 3250 8B, \ 3251 8B, \ 3252 kInput16bitsAccDestination, \ 3253 kInput8bits##input, \ 3254 kInput8bits##input); \ 3255 } \ 3256 TEST(mnemonic##2_8H) { \ 3257 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3258 8H, \ 3259 16B, \ 3260 16B, \ 3261 kInput16bitsAccDestination, \ 3262 kInput8bits##input, \ 3263 kInput8bits##input); \ 3264 } 3265 3266 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3267 TEST(mnemonic##_4S) { \ 3268 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3269 4S, \ 3270 4H, \ 3271 4H, \ 3272 kInput32bitsAccDestination, \ 3273 kInput16bits##input, \ 3274 kInput16bits##input); \ 3275 } \ 3276 TEST(mnemonic##2_4S) { \ 3277 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3278 4S, \ 3279 8H, \ 3280 8H, \ 3281 kInput32bitsAccDestination, \ 3282 kInput16bits##input, \ 3283 kInput16bits##input); \ 3284 } 3285 3286 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ 3287 TEST(mnemonic##_2D) { \ 3288 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3289 2D, \ 3290 2S, \ 3291 2S, \ 3292 kInput64bitsAccDestination, \ 3293 kInput32bits##input, \ 3294 kInput32bits##input); \ 3295 } \ 3296 TEST(mnemonic##2_2D) { \ 3297 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3298 2D, \ 3299 4S, \ 3300 4S, \ 3301 kInput64bitsAccDestination, \ 3302 kInput32bits##input, \ 3303 kInput32bits##input); \ 3304 } 3305 3306 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ 3307 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3308 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3309 3310 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ 3311 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3312 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3313 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3314 3315 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3316 TEST(mnemonic##_S) { \ 3317 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3318 S, \ 3319 H, \ 3320 H, \ 3321 kInput32bitsAccDestination, \ 3322 kInput16bits##input, \ 3323 kInput16bits##input); \ 3324 } 3325 3326 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ 3327 TEST(mnemonic##_D) { \ 3328 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3329 D, \ 3330 S, \ 3331 S, \ 3332 kInput64bitsAccDestination, \ 3333 kInput32bits##input, \ 3334 kInput32bits##input); \ 3335 } 3336 3337 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ 3338 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3339 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) 3340 3341 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ 3342 TEST(mnemonic##_8H) { \ 3343 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3344 8H, \ 3345 8H, \ 3346 8B, \ 3347 kInput16bitsAccDestination, \ 3348 kInput16bits##input, \ 3349 kInput8bits##input); \ 3350 } \ 3351 TEST(mnemonic##_4S) { \ 3352 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3353 4S, \ 3354 4S, \ 3355 4H, \ 3356 kInput32bitsAccDestination, \ 3357 kInput32bits##input, \ 3358 kInput16bits##input); \ 3359 } \ 3360 TEST(mnemonic##_2D) { \ 3361 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3362 2D, \ 3363 2D, \ 3364 2S, \ 3365 kInput64bitsAccDestination, \ 3366 kInput64bits##input, \ 3367 kInput32bits##input); \ 3368 } \ 3369 TEST(mnemonic##2_8H) { \ 3370 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3371 8H, \ 3372 8H, \ 3373 16B, \ 3374 kInput16bitsAccDestination, \ 3375 kInput16bits##input, \ 3376 kInput8bits##input); \ 3377 } \ 3378 TEST(mnemonic##2_4S) { \ 3379 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3380 4S, \ 3381 4S, \ 3382 8H, \ 3383 kInput32bitsAccDestination, \ 3384 kInput32bits##input, \ 3385 kInput16bits##input); \ 3386 } \ 3387 TEST(mnemonic##2_2D) { \ 3388 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3389 2D, \ 3390 2D, \ 3391 4S, \ 3392 kInput64bitsAccDestination, \ 3393 kInput64bits##input, \ 3394 kInput32bits##input); \ 3395 } 3396 3397 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ 3398 TEST(mnemonic##_8B) { \ 3399 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3400 8B, \ 3401 8H, \ 3402 8H, \ 3403 kInput8bitsAccDestination, \ 3404 kInput16bits##input, \ 3405 kInput16bits##input); \ 3406 } \ 3407 TEST(mnemonic##_4H) { \ 3408 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3409 4H, \ 3410 4S, \ 3411 4S, \ 3412 kInput16bitsAccDestination, \ 3413 kInput32bits##input, \ 3414 kInput32bits##input); \ 3415 } \ 3416 TEST(mnemonic##_2S) { \ 3417 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3418 2S, \ 3419 2D, \ 3420 2D, \ 3421 kInput32bitsAccDestination, \ 3422 kInput64bits##input, \ 3423 kInput64bits##input); \ 3424 } \ 3425 TEST(mnemonic##2_16B) { \ 3426 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3427 16B, \ 3428 8H, \ 3429 8H, \ 3430 kInput8bitsAccDestination, \ 3431 kInput16bits##input, \ 3432 kInput16bits##input); \ 3433 } \ 3434 TEST(mnemonic##2_8H) { \ 3435 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3436 8H, \ 3437 4S, \ 3438 4S, \ 3439 kInput16bitsAccDestination, \ 3440 kInput32bits##input, \ 3441 kInput32bits##input); \ 3442 } \ 3443 TEST(mnemonic##2_4S) { \ 3444 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3445 4S, \ 3446 2D, \ 3447 2D, \ 3448 kInput32bitsAccDestination, \ 3449 kInput64bits##input, \ 3450 kInput64bits##input); \ 3451 } 3452 3453 #define CALL_TEST_NEON_HELPER_2OPIMM( \ 3454 mnemonic, vdform, vnform, input_n, input_imm) \ 3455 { \ 3456 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3457 vdform, \ 3458 vnform, \ 3459 input_n, \ 3460 input_imm); \ 3461 } 3462 3463 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ 3464 TEST(mnemonic##_8B_2OPIMM) { \ 3465 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3466 8B, \ 3467 8B, \ 3468 kInput8bits##input, \ 3469 kInput8bitsImm##input_imm); \ 3470 } \ 3471 TEST(mnemonic##_16B_2OPIMM) { \ 3472 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3473 16B, \ 3474 16B, \ 3475 kInput8bits##input, \ 3476 kInput8bitsImm##input_imm); \ 3477 } \ 3478 TEST(mnemonic##_4H_2OPIMM) { \ 3479 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3480 4H, \ 3481 4H, \ 3482 kInput16bits##input, \ 3483 kInput16bitsImm##input_imm); \ 3484 } \ 3485 TEST(mnemonic##_8H_2OPIMM) { \ 3486 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3487 8H, \ 3488 8H, \ 3489 kInput16bits##input, \ 3490 kInput16bitsImm##input_imm); \ 3491 } \ 3492 TEST(mnemonic##_2S_2OPIMM) { \ 3493 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3494 2S, \ 3495 2S, \ 3496 kInput32bits##input, \ 3497 kInput32bitsImm##input_imm); \ 3498 } \ 3499 TEST(mnemonic##_4S_2OPIMM) { \ 3500 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3501 4S, \ 3502 4S, \ 3503 kInput32bits##input, \ 3504 kInput32bitsImm##input_imm); \ 3505 } \ 3506 TEST(mnemonic##_2D_2OPIMM) { \ 3507 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3508 2D, \ 3509 2D, \ 3510 kInput64bits##input, \ 3511 kInput64bitsImm##input_imm); \ 3512 } 3513 3514 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ 3515 TEST(mnemonic##_8B_2OPIMM) { \ 3516 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3517 8B, \ 3518 B, \ 3519 kInput8bits##input, \ 3520 kInput8bitsImm##input_imm); \ 3521 } \ 3522 TEST(mnemonic##_16B_2OPIMM) { \ 3523 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3524 16B, \ 3525 B, \ 3526 kInput8bits##input, \ 3527 kInput8bitsImm##input_imm); \ 3528 } \ 3529 TEST(mnemonic##_4H_2OPIMM) { \ 3530 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3531 4H, \ 3532 H, \ 3533 kInput16bits##input, \ 3534 kInput16bitsImm##input_imm); \ 3535 } \ 3536 TEST(mnemonic##_8H_2OPIMM) { \ 3537 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3538 8H, \ 3539 H, \ 3540 kInput16bits##input, \ 3541 kInput16bitsImm##input_imm); \ 3542 } \ 3543 TEST(mnemonic##_2S_2OPIMM) { \ 3544 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3545 2S, \ 3546 S, \ 3547 kInput32bits##input, \ 3548 kInput32bitsImm##input_imm); \ 3549 } \ 3550 TEST(mnemonic##_4S_2OPIMM) { \ 3551 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3552 4S, \ 3553 S, \ 3554 kInput32bits##input, \ 3555 kInput32bitsImm##input_imm); \ 3556 } \ 3557 TEST(mnemonic##_2D_2OPIMM) { \ 3558 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3559 2D, \ 3560 D, \ 3561 kInput64bits##input, \ 3562 kInput64bitsImm##input_imm); \ 3563 } 3564 3565 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ 3566 TEST(mnemonic##_8B_2OPIMM) { \ 3567 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3568 8B, \ 3569 8H, \ 3570 kInput16bits##input, \ 3571 kInput8bitsImm##input_imm); \ 3572 } \ 3573 TEST(mnemonic##_4H_2OPIMM) { \ 3574 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3575 4H, \ 3576 4S, \ 3577 kInput32bits##input, \ 3578 kInput16bitsImm##input_imm); \ 3579 } \ 3580 TEST(mnemonic##_2S_2OPIMM) { \ 3581 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3582 2S, \ 3583 2D, \ 3584 kInput64bits##input, \ 3585 kInput32bitsImm##input_imm); \ 3586 } \ 3587 TEST(mnemonic##2_16B_2OPIMM) { \ 3588 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3589 16B, \ 3590 8H, \ 3591 kInput16bits##input, \ 3592 kInput8bitsImm##input_imm); \ 3593 } \ 3594 TEST(mnemonic##2_8H_2OPIMM) { \ 3595 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3596 8H, \ 3597 4S, \ 3598 kInput32bits##input, \ 3599 kInput16bitsImm##input_imm); \ 3600 } \ 3601 TEST(mnemonic##2_4S_2OPIMM) { \ 3602 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3603 4S, \ 3604 2D, \ 3605 kInput64bits##input, \ 3606 kInput32bitsImm##input_imm); \ 3607 } 3608 3609 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ 3610 TEST(mnemonic##_B_2OPIMM) { \ 3611 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3612 B, \ 3613 H, \ 3614 kInput16bits##input, \ 3615 kInput8bitsImm##input_imm); \ 3616 } \ 3617 TEST(mnemonic##_H_2OPIMM) { \ 3618 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3619 H, \ 3620 S, \ 3621 kInput32bits##input, \ 3622 kInput16bitsImm##input_imm); \ 3623 } \ 3624 TEST(mnemonic##_S_2OPIMM) { \ 3625 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3626 S, \ 3627 D, \ 3628 kInput64bits##input, \ 3629 kInput32bitsImm##input_imm); \ 3630 } 3631 3632 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ 3633 TEST(mnemonic##_2S_2OPIMM) { \ 3634 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3635 2S, \ 3636 2S, \ 3637 kInputFloat##Basic, \ 3638 kInputDoubleImm##input_imm) \ 3639 } \ 3640 TEST(mnemonic##_4S_2OPIMM) { \ 3641 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3642 4S, \ 3643 4S, \ 3644 kInputFloat##input, \ 3645 kInputDoubleImm##input_imm); \ 3646 } \ 3647 TEST(mnemonic##_2D_2OPIMM) { \ 3648 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3649 2D, \ 3650 2D, \ 3651 kInputDouble##input, \ 3652 kInputDoubleImm##input_imm); \ 3653 } 3654 3655 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ 3656 TEST(mnemonic##_2S_2OPIMM) { \ 3657 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3658 2S, \ 3659 2S, \ 3660 kInputFloat##Basic, \ 3661 kInput32bitsImm##input_imm) \ 3662 } \ 3663 TEST(mnemonic##_4S_2OPIMM) { \ 3664 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3665 4S, \ 3666 4S, \ 3667 kInputFloat##input, \ 3668 kInput32bitsImm##input_imm) \ 3669 } \ 3670 TEST(mnemonic##_2D_2OPIMM) { \ 3671 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3672 2D, \ 3673 2D, \ 3674 kInputDouble##input, \ 3675 kInput64bitsImm##input_imm) \ 3676 } 3677 3678 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ 3679 TEST(mnemonic##_S_2OPIMM) { \ 3680 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3681 S, \ 3682 S, \ 3683 kInputFloat##Basic, \ 3684 kInput32bitsImm##input_imm) \ 3685 } \ 3686 TEST(mnemonic##_D_2OPIMM) { \ 3687 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3688 D, \ 3689 D, \ 3690 kInputDouble##input, \ 3691 kInput64bitsImm##input_imm) \ 3692 } 3693 3694 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ 3695 TEST(mnemonic##_2S_2OPIMM) { \ 3696 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3697 2S, \ 3698 2S, \ 3699 kInput32bits##input, \ 3700 kInput32bitsImm##input_imm); \ 3701 } \ 3702 TEST(mnemonic##_4S_2OPIMM) { \ 3703 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3704 4S, \ 3705 4S, \ 3706 kInput32bits##input, \ 3707 kInput32bitsImm##input_imm); \ 3708 } \ 3709 TEST(mnemonic##_2D_2OPIMM) { \ 3710 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3711 2D, \ 3712 2D, \ 3713 kInput64bits##input, \ 3714 kInput64bitsImm##input_imm); \ 3715 } 3716 3717 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ 3718 TEST(mnemonic##_D_2OPIMM) { \ 3719 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3720 D, \ 3721 D, \ 3722 kInput64bits##input, \ 3723 kInput64bitsImm##input_imm); \ 3724 } 3725 3726 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ 3727 TEST(mnemonic##_S_2OPIMM) { \ 3728 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3729 S, \ 3730 S, \ 3731 kInput32bits##input, \ 3732 kInput32bitsImm##input_imm); \ 3733 } \ 3734 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) 3735 3736 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ 3737 TEST(mnemonic##_D_2OPIMM) { \ 3738 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3739 D, \ 3740 D, \ 3741 kInputDouble##input, \ 3742 kInputDoubleImm##input_imm); \ 3743 } 3744 3745 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ 3746 TEST(mnemonic##_S_2OPIMM) { \ 3747 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3748 S, \ 3749 S, \ 3750 kInputFloat##input, \ 3751 kInputDoubleImm##input_imm); \ 3752 } \ 3753 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) 3754 3755 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ 3756 TEST(mnemonic##_B_2OPIMM) { \ 3757 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3758 B, \ 3759 B, \ 3760 kInput8bits##input, \ 3761 kInput8bitsImm##input_imm); \ 3762 } \ 3763 TEST(mnemonic##_H_2OPIMM) { \ 3764 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3765 H, \ 3766 H, \ 3767 kInput16bits##input, \ 3768 kInput16bitsImm##input_imm); \ 3769 } \ 3770 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) 3771 3772 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ 3773 TEST(mnemonic##_8H_2OPIMM) { \ 3774 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3775 8H, \ 3776 8B, \ 3777 kInput8bits##input, \ 3778 kInput8bitsImm##input_imm); \ 3779 } \ 3780 TEST(mnemonic##_4S_2OPIMM) { \ 3781 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3782 4S, \ 3783 4H, \ 3784 kInput16bits##input, \ 3785 kInput16bitsImm##input_imm); \ 3786 } \ 3787 TEST(mnemonic##_2D_2OPIMM) { \ 3788 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3789 2D, \ 3790 2S, \ 3791 kInput32bits##input, \ 3792 kInput32bitsImm##input_imm); \ 3793 } \ 3794 TEST(mnemonic##2_8H_2OPIMM) { \ 3795 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3796 8H, \ 3797 16B, \ 3798 kInput8bits##input, \ 3799 kInput8bitsImm##input_imm); \ 3800 } \ 3801 TEST(mnemonic##2_4S_2OPIMM) { \ 3802 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3803 4S, \ 3804 8H, \ 3805 kInput16bits##input, \ 3806 kInput16bitsImm##input_imm); \ 3807 } \ 3808 TEST(mnemonic##2_2D_2OPIMM) { \ 3809 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3810 2D, \ 3811 4S, \ 3812 kInput32bits##input, \ 3813 kInput32bitsImm##input_imm); \ 3814 } 3815 3816 #define CALL_TEST_NEON_HELPER_BYELEMENT( \ 3817 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \ 3818 { \ 3819 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 3820 vdform, \ 3821 vnform, \ 3822 vmform, \ 3823 input_d, \ 3824 input_n, \ 3825 input_m, \ 3826 indices); \ 3827 } 3828 3829 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3830 TEST(mnemonic##_4H_4H_H) { \ 3831 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3832 4H, \ 3833 4H, \ 3834 H, \ 3835 kInput16bits##input_d, \ 3836 kInput16bits##input_n, \ 3837 kInput16bits##input_m, \ 3838 kInputHIndices); \ 3839 } \ 3840 TEST(mnemonic##_8H_8H_H) { \ 3841 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3842 8H, \ 3843 8H, \ 3844 H, \ 3845 kInput16bits##input_d, \ 3846 kInput16bits##input_n, \ 3847 kInput16bits##input_m, \ 3848 kInputHIndices); \ 3849 } \ 3850 TEST(mnemonic##_2S_2S_S) { \ 3851 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3852 2S, \ 3853 2S, \ 3854 S, \ 3855 kInput32bits##input_d, \ 3856 kInput32bits##input_n, \ 3857 kInput32bits##input_m, \ 3858 kInputSIndices); \ 3859 } \ 3860 TEST(mnemonic##_4S_4S_S) { \ 3861 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3862 4S, \ 3863 4S, \ 3864 S, \ 3865 kInput32bits##input_d, \ 3866 kInput32bits##input_n, \ 3867 kInput32bits##input_m, \ 3868 kInputSIndices); \ 3869 } 3870 3871 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ 3872 TEST(mnemonic##_H_H_H) { \ 3873 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3874 H, \ 3875 H, \ 3876 H, \ 3877 kInput16bits##input_d, \ 3878 kInput16bits##input_n, \ 3879 kInput16bits##input_m, \ 3880 kInputHIndices); \ 3881 } \ 3882 TEST(mnemonic##_S_S_S) { \ 3883 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3884 S, \ 3885 S, \ 3886 S, \ 3887 kInput32bits##input_d, \ 3888 kInput32bits##input_n, \ 3889 kInput32bits##input_m, \ 3890 kInputSIndices); \ 3891 } 3892 3893 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 3894 TEST(mnemonic##_2S_2S_S) { \ 3895 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3896 2S, \ 3897 2S, \ 3898 S, \ 3899 kInputFloat##input_d, \ 3900 kInputFloat##input_n, \ 3901 kInputFloat##input_m, \ 3902 kInputSIndices); \ 3903 } \ 3904 TEST(mnemonic##_4S_4S_S) { \ 3905 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3906 4S, \ 3907 4S, \ 3908 S, \ 3909 kInputFloat##input_d, \ 3910 kInputFloat##input_n, \ 3911 kInputFloat##input_m, \ 3912 kInputSIndices); \ 3913 } \ 3914 TEST(mnemonic##_2D_2D_D) { \ 3915 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3916 2D, \ 3917 2D, \ 3918 D, \ 3919 kInputDouble##input_d, \ 3920 kInputDouble##input_n, \ 3921 kInputDouble##input_m, \ 3922 kInputDIndices); \ 3923 } 3924 3925 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ 3926 TEST(mnemonic##_S_S_S) { \ 3927 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3928 S, \ 3929 S, \ 3930 S, \ 3931 kInputFloat##inp_d, \ 3932 kInputFloat##inp_n, \ 3933 kInputFloat##inp_m, \ 3934 kInputSIndices); \ 3935 } \ 3936 TEST(mnemonic##_D_D_D) { \ 3937 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3938 D, \ 3939 D, \ 3940 D, \ 3941 kInputDouble##inp_d, \ 3942 kInputDouble##inp_n, \ 3943 kInputDouble##inp_m, \ 3944 kInputDIndices); \ 3945 } 3946 3947 3948 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ 3949 TEST(mnemonic##_4S_4H_H) { \ 3950 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3951 4S, \ 3952 4H, \ 3953 H, \ 3954 kInput32bits##input_d, \ 3955 kInput16bits##input_n, \ 3956 kInput16bits##input_m, \ 3957 kInputHIndices); \ 3958 } \ 3959 TEST(mnemonic##2_4S_8H_H) { \ 3960 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3961 4S, \ 3962 8H, \ 3963 H, \ 3964 kInput32bits##input_d, \ 3965 kInput16bits##input_n, \ 3966 kInput16bits##input_m, \ 3967 kInputHIndices); \ 3968 } \ 3969 TEST(mnemonic##_2D_2S_S) { \ 3970 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3971 2D, \ 3972 2S, \ 3973 S, \ 3974 kInput64bits##input_d, \ 3975 kInput32bits##input_n, \ 3976 kInput32bits##input_m, \ 3977 kInputSIndices); \ 3978 } \ 3979 TEST(mnemonic##2_2D_4S_S) { \ 3980 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 3981 2D, \ 3982 4S, \ 3983 S, \ 3984 kInput64bits##input_d, \ 3985 kInput32bits##input_n, \ 3986 kInput32bits##input_m, \ 3987 kInputSIndices); \ 3988 } 3989 3990 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR( \ 3991 mnemonic, input_d, input_n, input_m) \ 3992 TEST(mnemonic##_S_H_H) { \ 3993 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 3994 S, \ 3995 H, \ 3996 H, \ 3997 kInput32bits##input_d, \ 3998 kInput16bits##input_n, \ 3999 kInput16bits##input_m, \ 4000 kInputHIndices); \ 4001 } \ 4002 TEST(mnemonic##_D_S_S) { \ 4003 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4004 D, \ 4005 S, \ 4006 S, \ 4007 kInput64bits##input_d, \ 4008 kInput32bits##input_n, \ 4009 kInput32bits##input_m, \ 4010 kInputSIndices); \ 4011 } 4012 4013 4014 #define CALL_TEST_NEON_HELPER_2OP2IMM( \ 4015 mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \ 4016 { \ 4017 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \ 4018 mnemonic, \ 4019 variant, \ 4020 variant, \ 4021 input_d, \ 4022 input_imm1, \ 4023 input_n, \ 4024 input_imm2); \ 4025 } 4026 4027 #define DEFINE_TEST_NEON_2OP2IMM( \ 4028 mnemonic, input_d, input_imm1, input_n, input_imm2) \ 4029 TEST(mnemonic##_B) { \ 4030 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4031 16B, \ 4032 kInput8bits##input_d, \ 4033 kInput8bitsImm##input_imm1, \ 4034 kInput8bits##input_n, \ 4035 kInput8bitsImm##input_imm2); \ 4036 } \ 4037 TEST(mnemonic##_H) { \ 4038 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4039 8H, \ 4040 kInput16bits##input_d, \ 4041 kInput16bitsImm##input_imm1, \ 4042 kInput16bits##input_n, \ 4043 kInput16bitsImm##input_imm2); \ 4044 } \ 4045 TEST(mnemonic##_S) { \ 4046 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4047 4S, \ 4048 kInput32bits##input_d, \ 4049 kInput32bitsImm##input_imm1, \ 4050 kInput32bits##input_n, \ 4051 kInput32bitsImm##input_imm2); \ 4052 } \ 4053 TEST(mnemonic##_D) { \ 4054 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4055 2D, \ 4056 kInput64bits##input_d, \ 4057 kInput64bitsImm##input_imm1, \ 4058 kInput64bits##input_n, \ 4059 kInput64bitsImm##input_imm2); \ 4060 } 4061 4062 4063 // Advanced SIMD copy. 4064 DEFINE_TEST_NEON_2OP2IMM( 4065 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero) 4066 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) 4067 4068 4069 // Advanced SIMD scalar copy. 4070 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) 4071 4072 4073 // Advanced SIMD three same. 4074 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) 4075 DEFINE_TEST_NEON_3SAME(sqadd, Basic) 4076 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) 4077 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) 4078 DEFINE_TEST_NEON_3SAME(sqsub, Basic) 4079 DEFINE_TEST_NEON_3SAME(cmgt, Basic) 4080 DEFINE_TEST_NEON_3SAME(cmge, Basic) 4081 DEFINE_TEST_NEON_3SAME(sshl, Basic) 4082 DEFINE_TEST_NEON_3SAME(sqshl, Basic) 4083 DEFINE_TEST_NEON_3SAME(srshl, Basic) 4084 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) 4085 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) 4086 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) 4087 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) 4088 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) 4089 DEFINE_TEST_NEON_3SAME(add, Basic) 4090 DEFINE_TEST_NEON_3SAME(cmtst, Basic) 4091 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) 4092 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) 4093 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) 4094 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) 4095 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) 4096 DEFINE_TEST_NEON_3SAME(addp, Basic) 4097 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) 4098 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) 4099 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) 4100 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) 4101 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) 4102 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) 4103 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) 4104 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) 4105 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) 4106 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) 4107 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) 4108 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) 4109 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) 4110 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) 4111 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) 4112 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) 4113 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) 4114 DEFINE_TEST_NEON_3SAME(uqadd, Basic) 4115 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) 4116 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) 4117 DEFINE_TEST_NEON_3SAME(uqsub, Basic) 4118 DEFINE_TEST_NEON_3SAME(cmhi, Basic) 4119 DEFINE_TEST_NEON_3SAME(cmhs, Basic) 4120 DEFINE_TEST_NEON_3SAME(ushl, Basic) 4121 DEFINE_TEST_NEON_3SAME(uqshl, Basic) 4122 DEFINE_TEST_NEON_3SAME(urshl, Basic) 4123 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) 4124 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) 4125 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) 4126 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) 4127 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) 4128 DEFINE_TEST_NEON_3SAME(sub, Basic) 4129 DEFINE_TEST_NEON_3SAME(cmeq, Basic) 4130 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) 4131 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) 4132 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) 4133 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) 4134 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) 4135 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) 4136 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) 4137 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) 4138 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) 4139 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) 4140 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) 4141 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) 4142 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) 4143 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) 4144 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) 4145 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) 4146 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) 4147 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) 4148 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) 4149 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) 4150 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) 4151 4152 4153 // Advanced SIMD scalar three same. 4154 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) 4155 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) 4156 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) 4157 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) 4158 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) 4159 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) 4160 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) 4161 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) 4162 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) 4163 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) 4164 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) 4165 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) 4166 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) 4167 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) 4168 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) 4169 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) 4170 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) 4171 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) 4172 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) 4173 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) 4174 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) 4175 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) 4176 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) 4177 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) 4178 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) 4179 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) 4180 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) 4181 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) 4182 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) 4183 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) 4184 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) 4185 4186 4187 // Advanced SIMD three different. 4188 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) 4189 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) 4190 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) 4191 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) 4192 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) 4193 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) 4194 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) 4195 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) 4196 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) 4197 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) 4198 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) 4199 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) 4200 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) 4201 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) 4202 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) 4203 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) 4204 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) 4205 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) 4206 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) 4207 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) 4208 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) 4209 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) 4210 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) 4211 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) 4212 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) 4213 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) 4214 4215 4216 // Advanced SIMD scalar three different. 4217 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) 4218 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) 4219 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) 4220 4221 4222 // Advanced SIMD scalar pairwise. 4223 TEST(addp_SCALAR) { 4224 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); 4225 } 4226 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) 4227 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) 4228 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) 4229 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) 4230 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) 4231 4232 4233 // Advanced SIMD shift by immediate. 4234 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) 4235 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) 4236 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) 4237 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) 4238 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) 4239 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) 4240 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) 4241 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) 4242 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) 4243 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) 4244 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) 4245 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, 4246 FixedPointConversions, 4247 TypeWidthFromZeroToWidth) 4248 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4249 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) 4250 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) 4251 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) 4252 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) 4253 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) 4254 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) 4255 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) 4256 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) 4257 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) 4258 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) 4259 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) 4260 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) 4261 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) 4262 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, 4263 FixedPointConversions, 4264 TypeWidthFromZeroToWidth) 4265 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4266 4267 4268 // Advanced SIMD scalar shift by immediate.. 4269 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) 4270 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) 4271 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) 4272 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) 4273 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) 4274 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) 4275 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) 4276 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) 4277 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, 4278 FixedPointConversions, 4279 TypeWidthFromZeroToWidth) 4280 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4281 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) 4282 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) 4283 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) 4284 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) 4285 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) 4286 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) 4287 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) 4288 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) 4289 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) 4290 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) 4291 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) 4292 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) 4293 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, 4294 FixedPointConversions, 4295 TypeWidthFromZeroToWidth) 4296 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4297 4298 4299 // Advanced SIMD two-register miscellaneous. 4300 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) 4301 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) 4302 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) 4303 DEFINE_TEST_NEON_2SAME(suqadd, Basic) 4304 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) 4305 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) 4306 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) 4307 DEFINE_TEST_NEON_2SAME(sqabs, Basic) 4308 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) 4309 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) 4310 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) 4311 DEFINE_TEST_NEON_2SAME(abs, Basic) 4312 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) 4313 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) 4314 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) 4315 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) 4316 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) 4317 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) 4318 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) 4319 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) 4320 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) 4321 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4322 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) 4323 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) 4324 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) 4325 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) 4326 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) 4327 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) 4328 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) 4329 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4330 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) 4331 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) 4332 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) 4333 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) 4334 DEFINE_TEST_NEON_2SAME(usqadd, Basic) 4335 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) 4336 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) 4337 DEFINE_TEST_NEON_2SAME(sqneg, Basic) 4338 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) 4339 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) 4340 DEFINE_TEST_NEON_2SAME(neg, Basic) 4341 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) 4342 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) 4343 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) 4344 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) 4345 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) 4346 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) 4347 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) 4348 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) 4349 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) 4350 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4351 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) 4352 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) 4353 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) 4354 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) 4355 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) 4356 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) 4357 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) 4358 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4359 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) 4360 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) 4361 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) 4362 4363 4364 // Advanced SIMD scalar two-register miscellaneous. 4365 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) 4366 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) 4367 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) 4368 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) 4369 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) 4370 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) 4371 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) 4372 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) 4373 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) 4374 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) 4375 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4376 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) 4377 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) 4378 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) 4379 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) 4380 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4381 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) 4382 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) 4383 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) 4384 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) 4385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) 4386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) 4387 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) 4388 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) 4389 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) 4390 TEST(fcvtxn_SCALAR) { 4391 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); 4392 } 4393 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) 4394 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) 4395 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) 4396 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4397 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) 4398 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) 4399 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) 4400 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4401 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) 4402 4403 4404 // Advanced SIMD across lanes. 4405 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) 4406 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) 4407 DEFINE_TEST_NEON_ACROSS(sminv, Basic) 4408 DEFINE_TEST_NEON_ACROSS(addv, Basic) 4409 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) 4410 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) 4411 DEFINE_TEST_NEON_ACROSS(uminv, Basic) 4412 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) 4413 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) 4414 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) 4415 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) 4416 4417 4418 // Advanced SIMD permute. 4419 DEFINE_TEST_NEON_3SAME(uzp1, Basic) 4420 DEFINE_TEST_NEON_3SAME(trn1, Basic) 4421 DEFINE_TEST_NEON_3SAME(zip1, Basic) 4422 DEFINE_TEST_NEON_3SAME(uzp2, Basic) 4423 DEFINE_TEST_NEON_3SAME(trn2, Basic) 4424 DEFINE_TEST_NEON_3SAME(zip2, Basic) 4425 4426 4427 // Advanced SIMD vector x indexed element. 4428 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) 4429 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) 4430 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) 4431 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) 4432 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) 4433 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) 4434 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) 4435 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) 4436 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) 4437 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) 4438 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) 4439 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) 4440 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) 4441 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) 4442 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) 4443 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) 4444 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) 4445 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) 4446 4447 4448 // Advanced SIMD scalar x indexed element. 4449 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) 4450 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) 4451 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) 4452 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) 4453 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) 4454 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) 4455 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) 4456 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) 4457 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) 4458 4459 } // namespace aarch64 4460 } // namespace vixl 4461