1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <cfloat> 28 #include <cstdio> 29 30 #include <sstream> 31 32 #include "test-runner.h" 33 #include "test-utils.h" 34 35 #include "aarch64/test-simulator-inputs-aarch64.h" 36 #include "aarch64/test-simulator-traces-aarch64.h" 37 #include "aarch64/test-utils-aarch64.h" 38 39 #include "aarch64/cpu-features-auditor-aarch64.h" 40 #include "aarch64/macro-assembler-aarch64.h" 41 #include "aarch64/simulator-aarch64.h" 42 43 namespace vixl { 44 namespace aarch64 { 45 46 // ==== Simulator Tests ==== 47 // 48 // These simulator tests check instruction behaviour against a trace taken from 49 // real AArch64 hardware. The same test code is used to generate the trace; the 50 // results are printed to stdout when the test is run with 51 // --generate_test_trace. 52 // 53 // The input lists and expected results are stored in test/traces. The expected 54 // results can be regenerated using tools/generate_simulator_traces.py. Adding a 55 // test for a new instruction is described at the top of 56 // test-simulator-traces-aarch64.h. 57 58 #define __ masm. 59 #define TEST(name) TEST_(AARCH64_SIM_##name) 60 61 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures()) 62 63 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 64 65 #define SETUP_WITH_FEATURES(...) \ 66 MacroAssembler masm; \ 67 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \ 68 Decoder decoder; \ 69 Simulator simulator(&decoder); \ 70 simulator.SetColouredTrace(Test::coloured_trace()); \ 71 simulator.SetInstructionStats(Test::instruction_stats()); 72 73 #define START() \ 74 masm.Reset(); \ 75 simulator.ResetState(); \ 76 __ PushCalleeSavedRegisters(); \ 77 if (Test::trace_reg()) { \ 78 __ Trace(LOG_STATE, TRACE_ENABLE); \ 79 } \ 80 if (Test::trace_write()) { \ 81 __ Trace(LOG_WRITE, TRACE_ENABLE); \ 82 } \ 83 if (Test::trace_sim()) { \ 84 __ Trace(LOG_DISASM, TRACE_ENABLE); \ 85 } \ 86 if (Test::instruction_stats()) { \ 87 __ EnableInstrumentation(); \ 88 } 89 90 #define END() \ 91 if (Test::instruction_stats()) { \ 92 __ DisableInstrumentation(); \ 93 } \ 94 __ Trace(LOG_ALL, TRACE_DISABLE); \ 95 __ PopCalleeSavedRegisters(); \ 96 __ Ret(); \ 97 masm.FinalizeCode() 98 99 #define TRY_RUN(skipped) \ 100 DISASSEMBLE(); \ 101 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \ 102 /* The simulator can run every test. */ \ 103 *skipped = false 104 105 #define TEARDOWN() 106 107 #else // VIXL_INCLUDE_SIMULATOR_AARCH64 108 109 #define SETUP_WITH_FEATURES(...) \ 110 MacroAssembler masm; \ 111 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \ 112 CPU::SetUp() 113 114 #define START() \ 115 masm.Reset(); \ 116 __ PushCalleeSavedRegisters() 117 118 #define END() \ 119 __ PopCalleeSavedRegisters(); \ 120 __ Ret(); \ 121 masm.FinalizeCode() 122 123 #define TRY_RUN(skipped) \ 124 DISASSEMBLE(); \ 125 /* If the test uses features that the current CPU doesn't support, don't */ \ 126 /* attempt to run it natively. */ \ 127 { \ 128 Decoder decoder; \ 129 /* TODO: Once available, use runtime feature detection. The use of */ \ 130 /* AArch64LegacyBaseline is a stopgap. */ \ 131 const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline(); \ 132 CPUFeaturesAuditor auditor(&decoder, this_machine); \ 133 CodeBuffer* buffer = masm.GetBuffer(); \ 134 decoder.Decode(buffer->GetStartAddress<Instruction*>(), \ 135 buffer->GetEndAddress<Instruction*>()); \ 136 const CPUFeatures& requirements = auditor.GetSeenFeatures(); \ 137 if (this_machine.Has(requirements)) { \ 138 masm.GetBuffer()->SetExecutable(); \ 139 ExecuteMemory(buffer->GetStartAddress<byte*>(), \ 140 masm.GetSizeOfCodeGenerated()); \ 141 masm.GetBuffer()->SetWritable(); \ 142 *skipped = false; \ 143 } else { \ 144 std::stringstream os; \ 145 os << "Warning: skipping test due to missing CPU features.\n"; \ 146 os << " Missing: {" << requirements.Without(this_machine) << "}\n"; \ 147 printf("%s", os.str().c_str()); \ 148 *skipped = true; \ 149 } \ 150 } 151 152 #define TEARDOWN() 153 154 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 155 156 157 #define DISASSEMBLE() \ 158 if (Test::disassemble()) { \ 159 PrintDisassembler disasm(stdout); \ 160 CodeBuffer* buffer = masm.GetBuffer(); \ 161 Instruction* start = buffer->GetStartAddress<Instruction*>(); \ 162 Instruction* end = buffer->GetEndAddress<Instruction*>(); \ 163 disasm.DisassembleBuffer(start, end); \ 164 } 165 166 // The maximum number of errors to report in detail for each test. 167 static const unsigned kErrorReportLimit = 8; 168 169 170 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the 171 // templated test functions. 172 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); } 173 174 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); } 175 176 // The rawbits_to_fp functions are only used for printing decimal values so we 177 // just approximate FP16 as double. 178 static double rawbits_to_fp(uint16_t bits) { 179 return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN); 180 } 181 182 183 // MacroAssembler member function pointers to pass to the test dispatchers. 184 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd, 185 const FPRegister& fn); 186 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd, 187 const FPRegister& fn, 188 const FPRegister& fm); 189 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd, 190 const FPRegister& fn, 191 const FPRegister& fm, 192 const FPRegister& fa); 193 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn, 194 const FPRegister& fm); 195 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn, 196 double value); 197 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd, 198 const FPRegister& fn); 199 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd, 200 const FPRegister& fn, 201 int fbits); 202 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd, 203 const Register& rn, 204 int fbits); 205 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be 206 // consolidated into one routine. 207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, 208 const VRegister& vn); 209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, 210 const VRegister& vn, 211 const VRegister& vm); 212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, 213 const VRegister& vn, 214 const VRegister& vm, 215 int vm_index); 216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( 217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); 218 219 // This helps using the same typename for both the function pointer 220 // and the array of immediates passed to helper routines. 221 template <typename T> 222 class Test2OpImmediateNEONHelper_t { 223 public: 224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, 225 const VRegister& vn, 226 T imm); 227 }; 228 229 230 // Maximum number of hex characters required to represent values of either 231 // templated type. 232 template <typename Ta, typename Tb> 233 static unsigned MaxHexCharCount() { 234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); 235 return (count * 8) / 4; 236 } 237 238 239 // Standard test dispatchers. 240 241 242 static void Test1Op_Helper(Test1OpFPHelper_t helper, 243 uintptr_t inputs, 244 unsigned inputs_length, 245 uintptr_t results, 246 unsigned d_size, 247 unsigned n_size, 248 bool* skipped) { 249 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) || 250 (d_size == kHRegSize)); 251 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 252 (n_size == kHRegSize)); 253 254 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 255 START(); 256 257 // Roll up the loop to keep the code size down. 258 Label loop_n; 259 260 Register out = x0; 261 Register inputs_base = x1; 262 Register length = w2; 263 Register index_n = w3; 264 265 int n_index_shift; 266 FPRegister fd; 267 FPRegister fn; 268 if (n_size == kDRegSize) { 269 n_index_shift = kDRegSizeInBytesLog2; 270 fn = d1; 271 } else if (n_size == kSRegSize) { 272 n_index_shift = kSRegSizeInBytesLog2; 273 fn = s1; 274 } else { 275 n_index_shift = kHRegSizeInBytesLog2; 276 fn = h1; 277 } 278 279 if (d_size == kDRegSize) { 280 fd = d0; 281 } else if (d_size == kSRegSize) { 282 fd = s0; 283 } else { 284 fd = h0; 285 } 286 287 288 __ Mov(out, results); 289 __ Mov(inputs_base, inputs); 290 __ Mov(length, inputs_length); 291 292 __ Mov(index_n, 0); 293 __ Bind(&loop_n); 294 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 295 296 { 297 SingleEmissionCheckScope guard(&masm); 298 (masm.*helper)(fd, fn); 299 } 300 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 301 302 __ Add(index_n, index_n, 1); 303 __ Cmp(index_n, inputs_length); 304 __ B(lo, &loop_n); 305 306 END(); 307 TRY_RUN(skipped); 308 TEARDOWN(); 309 } 310 311 312 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 313 // rawbits representations of doubles or floats. This ensures that exact bit 314 // comparisons can be performed. 315 template <typename Tn, typename Td> 316 static void Test1Op(const char* name, 317 Test1OpFPHelper_t helper, 318 const Tn inputs[], 319 unsigned inputs_length, 320 const Td expected[], 321 unsigned expected_length) { 322 VIXL_ASSERT(inputs_length > 0); 323 324 const unsigned results_length = inputs_length; 325 Td* results = new Td[results_length]; 326 327 const unsigned d_bits = sizeof(Td) * 8; 328 const unsigned n_bits = sizeof(Tn) * 8; 329 bool skipped; 330 331 Test1Op_Helper(helper, 332 reinterpret_cast<uintptr_t>(inputs), 333 inputs_length, 334 reinterpret_cast<uintptr_t>(results), 335 d_bits, 336 n_bits, 337 &skipped); 338 339 if (Test::generate_test_trace()) { 340 // Print the results. 341 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 342 for (unsigned d = 0; d < results_length; d++) { 343 printf(" 0x%0*" PRIx64 ",\n", 344 d_bits / 4, 345 static_cast<uint64_t>(results[d])); 346 } 347 printf("};\n"); 348 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 349 } else if (!skipped) { 350 // Check the results. 351 VIXL_CHECK(expected_length == results_length); 352 unsigned error_count = 0; 353 unsigned d = 0; 354 for (unsigned n = 0; n < inputs_length; n++, d++) { 355 if (results[d] != expected[d]) { 356 if (++error_count > kErrorReportLimit) continue; 357 358 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 359 name, 360 n_bits / 4, 361 static_cast<uint64_t>(inputs[n]), 362 name, 363 rawbits_to_fp(inputs[n])); 364 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 365 d_bits / 4, 366 static_cast<uint64_t>(expected[d]), 367 rawbits_to_fp(expected[d])); 368 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 369 d_bits / 4, 370 static_cast<uint64_t>(results[d]), 371 rawbits_to_fp(results[d])); 372 printf("\n"); 373 } 374 } 375 VIXL_ASSERT(d == expected_length); 376 if (error_count > kErrorReportLimit) { 377 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 378 } 379 VIXL_CHECK(error_count == 0); 380 } 381 delete[] results; 382 } 383 384 385 static void Test2Op_Helper(Test2OpFPHelper_t helper, 386 uintptr_t inputs, 387 unsigned inputs_length, 388 uintptr_t results, 389 unsigned reg_size, 390 bool* skipped) { 391 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) || 392 (reg_size == kHRegSize)); 393 394 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 395 START(); 396 397 // Roll up the loop to keep the code size down. 398 Label loop_n, loop_m; 399 400 Register out = x0; 401 Register inputs_base = x1; 402 Register length = w2; 403 Register index_n = w3; 404 Register index_m = w4; 405 406 bool double_op = reg_size == kDRegSize; 407 bool float_op = reg_size == kSRegSize; 408 int index_shift; 409 if (double_op) { 410 index_shift = kDRegSizeInBytesLog2; 411 } else if (float_op) { 412 index_shift = kSRegSizeInBytesLog2; 413 } else { 414 index_shift = kHRegSizeInBytesLog2; 415 } 416 417 FPRegister fd; 418 FPRegister fn; 419 FPRegister fm; 420 421 if (double_op) { 422 fd = d0; 423 fn = d1; 424 fm = d2; 425 } else if (float_op) { 426 fd = s0; 427 fn = s1; 428 fm = s2; 429 } else { 430 fd = h0; 431 fn = h1; 432 fm = h2; 433 } 434 435 __ Mov(out, results); 436 __ Mov(inputs_base, inputs); 437 __ Mov(length, inputs_length); 438 439 __ Mov(index_n, 0); 440 __ Bind(&loop_n); 441 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 442 443 __ Mov(index_m, 0); 444 __ Bind(&loop_m); 445 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 446 447 { 448 SingleEmissionCheckScope guard(&masm); 449 (masm.*helper)(fd, fn, fm); 450 } 451 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 452 453 __ Add(index_m, index_m, 1); 454 __ Cmp(index_m, inputs_length); 455 __ B(lo, &loop_m); 456 457 __ Add(index_n, index_n, 1); 458 __ Cmp(index_n, inputs_length); 459 __ B(lo, &loop_n); 460 461 END(); 462 TRY_RUN(skipped); 463 TEARDOWN(); 464 } 465 466 467 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 468 // rawbits representations of doubles or floats. This ensures that exact bit 469 // comparisons can be performed. 470 template <typename T> 471 static void Test2Op(const char* name, 472 Test2OpFPHelper_t helper, 473 const T inputs[], 474 unsigned inputs_length, 475 const T expected[], 476 unsigned expected_length) { 477 VIXL_ASSERT(inputs_length > 0); 478 479 const unsigned results_length = inputs_length * inputs_length; 480 T* results = new T[results_length]; 481 482 const unsigned bits = sizeof(T) * 8; 483 bool skipped; 484 485 Test2Op_Helper(helper, 486 reinterpret_cast<uintptr_t>(inputs), 487 inputs_length, 488 reinterpret_cast<uintptr_t>(results), 489 bits, 490 &skipped); 491 492 if (Test::generate_test_trace()) { 493 // Print the results. 494 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 495 for (unsigned d = 0; d < results_length; d++) { 496 printf(" 0x%0*" PRIx64 ",\n", 497 bits / 4, 498 static_cast<uint64_t>(results[d])); 499 } 500 printf("};\n"); 501 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 502 } else if (!skipped) { 503 // Check the results. 504 VIXL_CHECK(expected_length == results_length); 505 unsigned error_count = 0; 506 unsigned d = 0; 507 for (unsigned n = 0; n < inputs_length; n++) { 508 for (unsigned m = 0; m < inputs_length; m++, d++) { 509 if (results[d] != expected[d]) { 510 if (++error_count > kErrorReportLimit) continue; 511 512 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 513 name, 514 bits / 4, 515 static_cast<uint64_t>(inputs[n]), 516 bits / 4, 517 static_cast<uint64_t>(inputs[m]), 518 name, 519 rawbits_to_fp(inputs[n]), 520 rawbits_to_fp(inputs[m])); 521 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 522 bits / 4, 523 static_cast<uint64_t>(expected[d]), 524 rawbits_to_fp(expected[d])); 525 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 526 bits / 4, 527 static_cast<uint64_t>(results[d]), 528 rawbits_to_fp(results[d])); 529 printf("\n"); 530 } 531 } 532 } 533 VIXL_ASSERT(d == expected_length); 534 if (error_count > kErrorReportLimit) { 535 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 536 } 537 VIXL_CHECK(error_count == 0); 538 } 539 delete[] results; 540 } 541 542 543 static void Test3Op_Helper(Test3OpFPHelper_t helper, 544 uintptr_t inputs, 545 unsigned inputs_length, 546 uintptr_t results, 547 unsigned reg_size, 548 bool* skipped) { 549 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) || 550 (reg_size == kHRegSize)); 551 552 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 553 START(); 554 555 // Roll up the loop to keep the code size down. 556 Label loop_n, loop_m, loop_a; 557 558 Register out = x0; 559 Register inputs_base = x1; 560 Register length = w2; 561 Register index_n = w3; 562 Register index_m = w4; 563 Register index_a = w5; 564 565 bool double_op = reg_size == kDRegSize; 566 bool single_op = reg_size == kSRegSize; 567 int index_shift; 568 FPRegister fd(0, reg_size); 569 FPRegister fn(1, reg_size); 570 FPRegister fm(2, reg_size); 571 FPRegister fa(3, reg_size); 572 if (double_op) { 573 index_shift = kDRegSizeInBytesLog2; 574 } else if (single_op) { 575 index_shift = kSRegSizeInBytesLog2; 576 } else { 577 index_shift = kHRegSizeInBytesLog2; 578 } 579 580 __ Mov(out, results); 581 __ Mov(inputs_base, inputs); 582 __ Mov(length, inputs_length); 583 584 __ Mov(index_n, 0); 585 __ Bind(&loop_n); 586 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 587 588 __ Mov(index_m, 0); 589 __ Bind(&loop_m); 590 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 591 592 __ Mov(index_a, 0); 593 __ Bind(&loop_a); 594 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift)); 595 596 { 597 SingleEmissionCheckScope guard(&masm); 598 (masm.*helper)(fd, fn, fm, fa); 599 } 600 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex)); 601 602 __ Add(index_a, index_a, 1); 603 __ Cmp(index_a, inputs_length); 604 __ B(lo, &loop_a); 605 606 __ Add(index_m, index_m, 1); 607 __ Cmp(index_m, inputs_length); 608 __ B(lo, &loop_m); 609 610 __ Add(index_n, index_n, 1); 611 __ Cmp(index_n, inputs_length); 612 __ B(lo, &loop_n); 613 614 END(); 615 TRY_RUN(skipped); 616 TEARDOWN(); 617 } 618 619 620 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 621 // rawbits representations of doubles or floats. This ensures that exact bit 622 // comparisons can be performed. 623 template <typename T> 624 static void Test3Op(const char* name, 625 Test3OpFPHelper_t helper, 626 const T inputs[], 627 unsigned inputs_length, 628 const T expected[], 629 unsigned expected_length) { 630 VIXL_ASSERT(inputs_length > 0); 631 632 const unsigned results_length = inputs_length * inputs_length * inputs_length; 633 T* results = new T[results_length]; 634 635 const unsigned bits = sizeof(T) * 8; 636 bool skipped; 637 638 Test3Op_Helper(helper, 639 reinterpret_cast<uintptr_t>(inputs), 640 inputs_length, 641 reinterpret_cast<uintptr_t>(results), 642 bits, 643 &skipped); 644 645 if (Test::generate_test_trace()) { 646 // Print the results. 647 printf("const uint%u_t kExpected_%s[] = {\n", bits, name); 648 for (unsigned d = 0; d < results_length; d++) { 649 printf(" 0x%0*" PRIx64 ",\n", 650 bits / 4, 651 static_cast<uint64_t>(results[d])); 652 } 653 printf("};\n"); 654 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 655 } else if (!skipped) { 656 // Check the results. 657 VIXL_CHECK(expected_length == results_length); 658 unsigned error_count = 0; 659 unsigned d = 0; 660 for (unsigned n = 0; n < inputs_length; n++) { 661 for (unsigned m = 0; m < inputs_length; m++) { 662 for (unsigned a = 0; a < inputs_length; a++, d++) { 663 if (results[d] != expected[d]) { 664 if (++error_count > kErrorReportLimit) continue; 665 666 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64 667 " (%s %g %g %g):\n", 668 name, 669 bits / 4, 670 static_cast<uint64_t>(inputs[n]), 671 bits / 4, 672 static_cast<uint64_t>(inputs[m]), 673 bits / 4, 674 static_cast<uint64_t>(inputs[a]), 675 name, 676 rawbits_to_fp(inputs[n]), 677 rawbits_to_fp(inputs[m]), 678 rawbits_to_fp(inputs[a])); 679 printf(" Expected: 0x%0*" PRIx64 " (%g)\n", 680 bits / 4, 681 static_cast<uint64_t>(expected[d]), 682 rawbits_to_fp(expected[d])); 683 printf(" Found: 0x%0*" PRIx64 " (%g)\n", 684 bits / 4, 685 static_cast<uint64_t>(results[d]), 686 rawbits_to_fp(results[d])); 687 printf("\n"); 688 } 689 } 690 } 691 } 692 VIXL_ASSERT(d == expected_length); 693 if (error_count > kErrorReportLimit) { 694 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 695 } 696 VIXL_CHECK(error_count == 0); 697 } 698 delete[] results; 699 } 700 701 702 static void TestCmp_Helper(TestFPCmpHelper_t helper, 703 uintptr_t inputs, 704 unsigned inputs_length, 705 uintptr_t results, 706 unsigned reg_size, 707 bool* skipped) { 708 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 709 710 SETUP_WITH_FEATURES(CPUFeatures::kFP); 711 START(); 712 713 // Roll up the loop to keep the code size down. 714 Label loop_n, loop_m; 715 716 Register out = x0; 717 Register inputs_base = x1; 718 Register length = w2; 719 Register index_n = w3; 720 Register index_m = w4; 721 Register flags = x5; 722 723 bool double_op = reg_size == kDRegSize; 724 const int index_shift = 725 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 726 727 FPRegister fn = double_op ? d1 : s1; 728 FPRegister fm = double_op ? d2 : s2; 729 730 __ Mov(out, results); 731 __ Mov(inputs_base, inputs); 732 __ Mov(length, inputs_length); 733 734 __ Mov(index_n, 0); 735 __ Bind(&loop_n); 736 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 737 738 __ Mov(index_m, 0); 739 __ Bind(&loop_m); 740 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift)); 741 742 { 743 SingleEmissionCheckScope guard(&masm); 744 (masm.*helper)(fn, fm); 745 } 746 __ Mrs(flags, NZCV); 747 __ Ubfx(flags, flags, 28, 4); 748 __ Strb(flags, MemOperand(out, 1, PostIndex)); 749 750 __ Add(index_m, index_m, 1); 751 __ Cmp(index_m, inputs_length); 752 __ B(lo, &loop_m); 753 754 __ Add(index_n, index_n, 1); 755 __ Cmp(index_n, inputs_length); 756 __ B(lo, &loop_n); 757 758 END(); 759 TRY_RUN(skipped); 760 TEARDOWN(); 761 } 762 763 764 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 765 // rawbits representations of doubles or floats. This ensures that exact bit 766 // comparisons can be performed. 767 template <typename T> 768 static void TestCmp(const char* name, 769 TestFPCmpHelper_t helper, 770 const T inputs[], 771 unsigned inputs_length, 772 const uint8_t expected[], 773 unsigned expected_length) { 774 VIXL_ASSERT(inputs_length > 0); 775 776 const unsigned results_length = inputs_length * inputs_length; 777 uint8_t* results = new uint8_t[results_length]; 778 779 const unsigned bits = sizeof(T) * 8; 780 bool skipped; 781 782 TestCmp_Helper(helper, 783 reinterpret_cast<uintptr_t>(inputs), 784 inputs_length, 785 reinterpret_cast<uintptr_t>(results), 786 bits, 787 &skipped); 788 789 if (Test::generate_test_trace()) { 790 // Print the results. 791 printf("const uint8_t kExpected_%s[] = {\n", name); 792 for (unsigned d = 0; d < results_length; d++) { 793 // Each NZCV result only requires 4 bits. 794 VIXL_ASSERT((results[d] & 0xf) == results[d]); 795 printf(" 0x%" PRIx8 ",\n", results[d]); 796 } 797 printf("};\n"); 798 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 799 } else if (!skipped) { 800 // Check the results. 801 VIXL_CHECK(expected_length == results_length); 802 unsigned error_count = 0; 803 unsigned d = 0; 804 for (unsigned n = 0; n < inputs_length; n++) { 805 for (unsigned m = 0; m < inputs_length; m++, d++) { 806 if (results[d] != expected[d]) { 807 if (++error_count > kErrorReportLimit) continue; 808 809 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n", 810 name, 811 bits / 4, 812 static_cast<uint64_t>(inputs[n]), 813 bits / 4, 814 static_cast<uint64_t>(inputs[m]), 815 name, 816 rawbits_to_fp(inputs[n]), 817 rawbits_to_fp(inputs[m])); 818 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 819 (expected[d] & 0x8) ? 'N' : 'n', 820 (expected[d] & 0x4) ? 'Z' : 'z', 821 (expected[d] & 0x2) ? 'C' : 'c', 822 (expected[d] & 0x1) ? 'V' : 'v', 823 expected[d]); 824 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 825 (results[d] & 0x8) ? 'N' : 'n', 826 (results[d] & 0x4) ? 'Z' : 'z', 827 (results[d] & 0x2) ? 'C' : 'c', 828 (results[d] & 0x1) ? 'V' : 'v', 829 results[d]); 830 printf("\n"); 831 } 832 } 833 } 834 VIXL_ASSERT(d == expected_length); 835 if (error_count > kErrorReportLimit) { 836 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 837 } 838 VIXL_CHECK(error_count == 0); 839 } 840 delete[] results; 841 } 842 843 844 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper, 845 uintptr_t inputs, 846 unsigned inputs_length, 847 uintptr_t results, 848 unsigned reg_size, 849 bool* skipped) { 850 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize)); 851 852 SETUP_WITH_FEATURES(CPUFeatures::kFP); 853 START(); 854 855 // Roll up the loop to keep the code size down. 856 Label loop_n, loop_m; 857 858 Register out = x0; 859 Register inputs_base = x1; 860 Register length = w2; 861 Register index_n = w3; 862 Register flags = x4; 863 864 bool double_op = reg_size == kDRegSize; 865 const int index_shift = 866 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2; 867 868 FPRegister fn = double_op ? d1 : s1; 869 870 __ Mov(out, results); 871 __ Mov(inputs_base, inputs); 872 __ Mov(length, inputs_length); 873 874 __ Mov(index_n, 0); 875 __ Bind(&loop_n); 876 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift)); 877 878 { 879 SingleEmissionCheckScope guard(&masm); 880 (masm.*helper)(fn, 0.0); 881 } 882 __ Mrs(flags, NZCV); 883 __ Ubfx(flags, flags, 28, 4); 884 __ Strb(flags, MemOperand(out, 1, PostIndex)); 885 886 __ Add(index_n, index_n, 1); 887 __ Cmp(index_n, inputs_length); 888 __ B(lo, &loop_n); 889 890 END(); 891 TRY_RUN(skipped); 892 TEARDOWN(); 893 } 894 895 896 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of 897 // rawbits representations of doubles or floats. This ensures that exact bit 898 // comparisons can be performed. 899 template <typename T> 900 static void TestCmpZero(const char* name, 901 TestFPCmpZeroHelper_t helper, 902 const T inputs[], 903 unsigned inputs_length, 904 const uint8_t expected[], 905 unsigned expected_length) { 906 VIXL_ASSERT(inputs_length > 0); 907 908 const unsigned results_length = inputs_length; 909 uint8_t* results = new uint8_t[results_length]; 910 911 const unsigned bits = sizeof(T) * 8; 912 bool skipped; 913 914 TestCmpZero_Helper(helper, 915 reinterpret_cast<uintptr_t>(inputs), 916 inputs_length, 917 reinterpret_cast<uintptr_t>(results), 918 bits, 919 &skipped); 920 921 if (Test::generate_test_trace()) { 922 // Print the results. 923 printf("const uint8_t kExpected_%s[] = {\n", name); 924 for (unsigned d = 0; d < results_length; d++) { 925 // Each NZCV result only requires 4 bits. 926 VIXL_ASSERT((results[d] & 0xf) == results[d]); 927 printf(" 0x%" PRIx8 ",\n", results[d]); 928 } 929 printf("};\n"); 930 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 931 } else if (!skipped) { 932 // Check the results. 933 VIXL_CHECK(expected_length == results_length); 934 unsigned error_count = 0; 935 unsigned d = 0; 936 for (unsigned n = 0; n < inputs_length; n++, d++) { 937 if (results[d] != expected[d]) { 938 if (++error_count > kErrorReportLimit) continue; 939 940 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n", 941 name, 942 bits / 4, 943 static_cast<uint64_t>(inputs[n]), 944 bits / 4, 945 0, 946 name, 947 rawbits_to_fp(inputs[n])); 948 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n", 949 (expected[d] & 0x8) ? 'N' : 'n', 950 (expected[d] & 0x4) ? 'Z' : 'z', 951 (expected[d] & 0x2) ? 'C' : 'c', 952 (expected[d] & 0x1) ? 'V' : 'v', 953 expected[d]); 954 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n", 955 (results[d] & 0x8) ? 'N' : 'n', 956 (results[d] & 0x4) ? 'Z' : 'z', 957 (results[d] & 0x2) ? 'C' : 'c', 958 (results[d] & 0x1) ? 'V' : 'v', 959 results[d]); 960 printf("\n"); 961 } 962 } 963 VIXL_ASSERT(d == expected_length); 964 if (error_count > kErrorReportLimit) { 965 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 966 } 967 VIXL_CHECK(error_count == 0); 968 } 969 delete[] results; 970 } 971 972 973 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper, 974 uintptr_t inputs, 975 unsigned inputs_length, 976 uintptr_t results, 977 unsigned d_size, 978 unsigned n_size, 979 bool* skipped) { 980 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 981 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 982 (n_size == kHRegSize)); 983 984 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf); 985 START(); 986 987 // Roll up the loop to keep the code size down. 988 Label loop_n; 989 990 Register out = x0; 991 Register inputs_base = x1; 992 Register length = w2; 993 Register index_n = w3; 994 995 int n_index_shift; 996 if (n_size == kDRegSize) { 997 n_index_shift = kDRegSizeInBytesLog2; 998 } else if (n_size == kSRegSize) { 999 n_index_shift = kSRegSizeInBytesLog2; 1000 } else { 1001 n_index_shift = kHRegSizeInBytesLog2; 1002 } 1003 1004 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 1005 FPRegister fn; 1006 if (n_size == kDRegSize) { 1007 fn = d1; 1008 } else if (n_size == kSRegSize) { 1009 fn = s1; 1010 } else { 1011 fn = h1; 1012 } 1013 1014 __ Mov(out, results); 1015 __ Mov(inputs_base, inputs); 1016 __ Mov(length, inputs_length); 1017 1018 __ Mov(index_n, 0); 1019 __ Bind(&loop_n); 1020 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 1021 1022 for (unsigned fbits = 0; fbits <= d_size; ++fbits) { 1023 { 1024 SingleEmissionCheckScope guard(&masm); 1025 (masm.*helper)(rd, fn, fbits); 1026 } 1027 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 1028 } 1029 1030 __ Add(index_n, index_n, 1); 1031 __ Cmp(index_n, inputs_length); 1032 __ B(lo, &loop_n); 1033 1034 END(); 1035 TRY_RUN(skipped); 1036 TEARDOWN(); 1037 } 1038 1039 1040 static void TestFPToInt_Helper(TestFPToIntHelper_t helper, 1041 uintptr_t inputs, 1042 unsigned inputs_length, 1043 uintptr_t results, 1044 unsigned d_size, 1045 unsigned n_size, 1046 bool* skipped) { 1047 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize)); 1048 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) || 1049 (n_size == kHRegSize)); 1050 1051 SETUP_WITH_FEATURES(CPUFeatures::kFP, 1052 CPUFeatures::kFPHalf, 1053 CPUFeatures::kJSCVT); 1054 START(); 1055 1056 // Roll up the loop to keep the code size down. 1057 Label loop_n; 1058 1059 Register out = x0; 1060 Register inputs_base = x1; 1061 Register length = w2; 1062 Register index_n = w3; 1063 1064 int n_index_shift; 1065 if (n_size == kDRegSize) { 1066 n_index_shift = kDRegSizeInBytesLog2; 1067 } else if (n_size == kSRegSize) { 1068 n_index_shift = kSRegSizeInBytesLog2; 1069 } else { 1070 n_index_shift = kHRegSizeInBytesLog2; 1071 } 1072 1073 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10); 1074 FPRegister fn; 1075 if (n_size == kDRegSize) { 1076 fn = d1; 1077 } else if (n_size == kSRegSize) { 1078 fn = s1; 1079 } else { 1080 fn = h1; 1081 } 1082 1083 __ Mov(out, results); 1084 __ Mov(inputs_base, inputs); 1085 __ Mov(length, inputs_length); 1086 1087 __ Mov(index_n, 0); 1088 __ Bind(&loop_n); 1089 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift)); 1090 1091 { 1092 SingleEmissionCheckScope guard(&masm); 1093 (masm.*helper)(rd, fn); 1094 } 1095 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex)); 1096 1097 __ Add(index_n, index_n, 1); 1098 __ Cmp(index_n, inputs_length); 1099 __ B(lo, &loop_n); 1100 1101 END(); 1102 TRY_RUN(skipped); 1103 TEARDOWN(); 1104 } 1105 1106 1107 // Test FP instructions. 1108 // - The inputs[] array should be an array of rawbits representations of 1109 // doubles or floats. This ensures that exact bit comparisons can be 1110 // performed. 1111 // - The expected[] array should be an array of signed integers. 1112 template <typename Tn, typename Td> 1113 static void TestFPToS(const char* name, 1114 TestFPToIntHelper_t helper, 1115 const Tn inputs[], 1116 unsigned inputs_length, 1117 const Td expected[], 1118 unsigned expected_length) { 1119 VIXL_ASSERT(inputs_length > 0); 1120 1121 const unsigned results_length = inputs_length; 1122 Td* results = new Td[results_length]; 1123 1124 const unsigned d_bits = sizeof(Td) * 8; 1125 const unsigned n_bits = sizeof(Tn) * 8; 1126 bool skipped; 1127 1128 TestFPToInt_Helper(helper, 1129 reinterpret_cast<uintptr_t>(inputs), 1130 inputs_length, 1131 reinterpret_cast<uintptr_t>(results), 1132 d_bits, 1133 n_bits, 1134 &skipped); 1135 1136 if (Test::generate_test_trace()) { 1137 // Print the results. 1138 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1139 // There is no simple C++ literal for INT*_MIN that doesn't produce 1140 // warnings, so we use an appropriate constant in that case instead. 1141 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1142 // the like) avoids warnings about comparing values with differing ranges. 1143 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1144 const int64_t int_d_min = -(int_d_max)-1; 1145 for (unsigned d = 0; d < results_length; d++) { 1146 if (results[d] == int_d_min) { 1147 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1148 } else { 1149 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1150 // trigger compiler warnings. To avoid these warnings, use an 1151 // appropriate macro to make the type explicit. 1152 int64_t result_int64 = static_cast<int64_t>(results[d]); 1153 if (result_int64 >= 0) { 1154 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1155 } else { 1156 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1157 } 1158 } 1159 } 1160 printf("};\n"); 1161 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1162 } else if (!skipped) { 1163 // Check the results. 1164 VIXL_CHECK(expected_length == results_length); 1165 unsigned error_count = 0; 1166 unsigned d = 0; 1167 for (unsigned n = 0; n < inputs_length; n++, d++) { 1168 if (results[d] != expected[d]) { 1169 if (++error_count > kErrorReportLimit) continue; 1170 1171 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1172 name, 1173 n_bits / 4, 1174 static_cast<uint64_t>(inputs[n]), 1175 name, 1176 rawbits_to_fp(inputs[n])); 1177 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1178 d_bits / 4, 1179 static_cast<uint64_t>(expected[d]), 1180 static_cast<int64_t>(expected[d])); 1181 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1182 d_bits / 4, 1183 static_cast<uint64_t>(results[d]), 1184 static_cast<int64_t>(results[d])); 1185 printf("\n"); 1186 } 1187 } 1188 VIXL_ASSERT(d == expected_length); 1189 if (error_count > kErrorReportLimit) { 1190 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1191 } 1192 VIXL_CHECK(error_count == 0); 1193 } 1194 delete[] results; 1195 } 1196 1197 1198 // Test FP instructions. 1199 // - The inputs[] array should be an array of rawbits representations of 1200 // doubles or floats. This ensures that exact bit comparisons can be 1201 // performed. 1202 // - The expected[] array should be an array of unsigned integers. 1203 template <typename Tn, typename Td> 1204 static void TestFPToU(const char* name, 1205 TestFPToIntHelper_t helper, 1206 const Tn inputs[], 1207 unsigned inputs_length, 1208 const Td expected[], 1209 unsigned expected_length) { 1210 VIXL_ASSERT(inputs_length > 0); 1211 1212 const unsigned results_length = inputs_length; 1213 Td* results = new Td[results_length]; 1214 1215 const unsigned d_bits = sizeof(Td) * 8; 1216 const unsigned n_bits = sizeof(Tn) * 8; 1217 bool skipped; 1218 1219 TestFPToInt_Helper(helper, 1220 reinterpret_cast<uintptr_t>(inputs), 1221 inputs_length, 1222 reinterpret_cast<uintptr_t>(results), 1223 d_bits, 1224 n_bits, 1225 &skipped); 1226 1227 if (Test::generate_test_trace()) { 1228 // Print the results. 1229 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1230 for (unsigned d = 0; d < results_length; d++) { 1231 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1232 } 1233 printf("};\n"); 1234 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1235 } else if (!skipped) { 1236 // Check the results. 1237 VIXL_CHECK(expected_length == results_length); 1238 unsigned error_count = 0; 1239 unsigned d = 0; 1240 for (unsigned n = 0; n < inputs_length; n++, d++) { 1241 if (results[d] != expected[d]) { 1242 if (++error_count > kErrorReportLimit) continue; 1243 1244 printf("%s 0x%0*" PRIx64 " (%s %g):\n", 1245 name, 1246 n_bits / 4, 1247 static_cast<uint64_t>(inputs[n]), 1248 name, 1249 rawbits_to_fp(inputs[n])); 1250 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1251 d_bits / 4, 1252 static_cast<uint64_t>(expected[d]), 1253 static_cast<uint64_t>(expected[d])); 1254 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1255 d_bits / 4, 1256 static_cast<uint64_t>(results[d]), 1257 static_cast<uint64_t>(results[d])); 1258 printf("\n"); 1259 } 1260 } 1261 VIXL_ASSERT(d == expected_length); 1262 if (error_count > kErrorReportLimit) { 1263 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1264 } 1265 VIXL_CHECK(error_count == 0); 1266 } 1267 delete[] results; 1268 } 1269 1270 1271 // Test FP instructions. 1272 // - The inputs[] array should be an array of rawbits representations of 1273 // doubles or floats. This ensures that exact bit comparisons can be 1274 // performed. 1275 // - The expected[] array should be an array of signed integers. 1276 template <typename Tn, typename Td> 1277 static void TestFPToFixedS(const char* name, 1278 TestFPToFixedHelper_t helper, 1279 const Tn inputs[], 1280 unsigned inputs_length, 1281 const Td expected[], 1282 unsigned expected_length) { 1283 VIXL_ASSERT(inputs_length > 0); 1284 1285 const unsigned d_bits = sizeof(Td) * 8; 1286 const unsigned n_bits = sizeof(Tn) * 8; 1287 1288 const unsigned results_length = inputs_length * (d_bits + 1); 1289 Td* results = new Td[results_length]; 1290 1291 bool skipped; 1292 1293 TestFPToFixed_Helper(helper, 1294 reinterpret_cast<uintptr_t>(inputs), 1295 inputs_length, 1296 reinterpret_cast<uintptr_t>(results), 1297 d_bits, 1298 n_bits, 1299 &skipped); 1300 1301 if (Test::generate_test_trace()) { 1302 // Print the results. 1303 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name); 1304 // There is no simple C++ literal for INT*_MIN that doesn't produce 1305 // warnings, so we use an appropriate constant in that case instead. 1306 // Deriving int_d_min in this way (rather than just checking INT64_MIN and 1307 // the like) avoids warnings about comparing values with differing ranges. 1308 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1; 1309 const int64_t int_d_min = -(int_d_max)-1; 1310 for (unsigned d = 0; d < results_length; d++) { 1311 if (results[d] == int_d_min) { 1312 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max); 1313 } else { 1314 // Some constants (such as those between INT32_MAX and UINT32_MAX) 1315 // trigger compiler warnings. To avoid these warnings, use an 1316 // appropriate macro to make the type explicit. 1317 int64_t result_int64 = static_cast<int64_t>(results[d]); 1318 if (result_int64 >= 0) { 1319 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64); 1320 } else { 1321 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64); 1322 } 1323 } 1324 } 1325 printf("};\n"); 1326 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1327 } else if (!skipped) { 1328 // Check the results. 1329 VIXL_CHECK(expected_length == results_length); 1330 unsigned error_count = 0; 1331 unsigned d = 0; 1332 for (unsigned n = 0; n < inputs_length; n++) { 1333 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1334 if (results[d] != expected[d]) { 1335 if (++error_count > kErrorReportLimit) continue; 1336 1337 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1338 name, 1339 n_bits / 4, 1340 static_cast<uint64_t>(inputs[n]), 1341 fbits, 1342 name, 1343 rawbits_to_fp(inputs[n]), 1344 fbits); 1345 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1346 d_bits / 4, 1347 static_cast<uint64_t>(expected[d]), 1348 static_cast<int64_t>(expected[d])); 1349 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n", 1350 d_bits / 4, 1351 static_cast<uint64_t>(results[d]), 1352 static_cast<int64_t>(results[d])); 1353 printf("\n"); 1354 } 1355 } 1356 } 1357 VIXL_ASSERT(d == expected_length); 1358 if (error_count > kErrorReportLimit) { 1359 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1360 } 1361 VIXL_CHECK(error_count == 0); 1362 } 1363 delete[] results; 1364 } 1365 1366 1367 // Test FP instructions. 1368 // - The inputs[] array should be an array of rawbits representations of 1369 // doubles or floats. This ensures that exact bit comparisons can be 1370 // performed. 1371 // - The expected[] array should be an array of unsigned integers. 1372 template <typename Tn, typename Td> 1373 static void TestFPToFixedU(const char* name, 1374 TestFPToFixedHelper_t helper, 1375 const Tn inputs[], 1376 unsigned inputs_length, 1377 const Td expected[], 1378 unsigned expected_length) { 1379 VIXL_ASSERT(inputs_length > 0); 1380 1381 const unsigned d_bits = sizeof(Td) * 8; 1382 const unsigned n_bits = sizeof(Tn) * 8; 1383 1384 const unsigned results_length = inputs_length * (d_bits + 1); 1385 Td* results = new Td[results_length]; 1386 1387 bool skipped; 1388 1389 TestFPToFixed_Helper(helper, 1390 reinterpret_cast<uintptr_t>(inputs), 1391 inputs_length, 1392 reinterpret_cast<uintptr_t>(results), 1393 d_bits, 1394 n_bits, 1395 &skipped); 1396 1397 if (Test::generate_test_trace()) { 1398 // Print the results. 1399 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name); 1400 for (unsigned d = 0; d < results_length; d++) { 1401 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d])); 1402 } 1403 printf("};\n"); 1404 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length); 1405 } else if (!skipped) { 1406 // Check the results. 1407 VIXL_CHECK(expected_length == results_length); 1408 unsigned error_count = 0; 1409 unsigned d = 0; 1410 for (unsigned n = 0; n < inputs_length; n++) { 1411 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) { 1412 if (results[d] != expected[d]) { 1413 if (++error_count > kErrorReportLimit) continue; 1414 1415 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n", 1416 name, 1417 n_bits / 4, 1418 static_cast<uint64_t>(inputs[n]), 1419 fbits, 1420 name, 1421 rawbits_to_fp(inputs[n]), 1422 fbits); 1423 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1424 d_bits / 4, 1425 static_cast<uint64_t>(expected[d]), 1426 static_cast<uint64_t>(expected[d])); 1427 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n", 1428 d_bits / 4, 1429 static_cast<uint64_t>(results[d]), 1430 static_cast<uint64_t>(results[d])); 1431 printf("\n"); 1432 } 1433 } 1434 } 1435 VIXL_ASSERT(d == expected_length); 1436 if (error_count > kErrorReportLimit) { 1437 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1438 } 1439 VIXL_CHECK(error_count == 0); 1440 } 1441 delete[] results; 1442 } 1443 1444 1445 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== 1446 1447 1448 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, 1449 uintptr_t inputs_n, 1450 unsigned inputs_n_length, 1451 uintptr_t results, 1452 VectorFormat vd_form, 1453 VectorFormat vn_form, 1454 bool* skipped) { 1455 VIXL_ASSERT(vd_form != kFormatUndefined); 1456 VIXL_ASSERT(vn_form != kFormatUndefined); 1457 1458 SETUP_WITH_FEATURES(CPUFeatures::kNEON, 1459 CPUFeatures::kFP, 1460 CPUFeatures::kRDM, 1461 CPUFeatures::kNEONHalf); 1462 START(); 1463 1464 // Roll up the loop to keep the code size down. 1465 Label loop_n; 1466 1467 Register out = x0; 1468 Register inputs_n_base = x1; 1469 Register inputs_n_last_16bytes = x3; 1470 Register index_n = x5; 1471 1472 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1473 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1474 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1475 1476 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1477 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1478 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1479 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1480 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1481 1482 1483 // These will be either a D- or a Q-register form, with a single lane 1484 // (for use in scalar load and store operations). 1485 VRegister vd = VRegister(0, vd_bits); 1486 VRegister vn = v1.V16B(); 1487 VRegister vntmp = v3.V16B(); 1488 1489 // These will have the correct format for use when calling 'helper'. 1490 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 1491 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1492 1493 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1494 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1495 1496 __ Mov(out, results); 1497 1498 __ Mov(inputs_n_base, inputs_n); 1499 __ Mov(inputs_n_last_16bytes, 1500 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 1501 1502 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1503 1504 __ Mov(index_n, 0); 1505 __ Bind(&loop_n); 1506 1507 __ Ldr(vntmp_single, 1508 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1509 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1510 1511 // Set the destination to zero. 1512 // TODO: Setting the destination to values other than zero 1513 // might be a better test for instructions such as sqxtn2 1514 // which may leave parts of V registers unchanged. 1515 __ Movi(vd.V16B(), 0); 1516 1517 { 1518 SingleEmissionCheckScope guard(&masm); 1519 (masm.*helper)(vd_helper, vn_helper); 1520 } 1521 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1522 1523 __ Add(index_n, index_n, 1); 1524 __ Cmp(index_n, inputs_n_length); 1525 __ B(lo, &loop_n); 1526 1527 END(); 1528 TRY_RUN(skipped); 1529 TEARDOWN(); 1530 } 1531 1532 1533 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1534 // arrays of rawbit representation of input values. This ensures that 1535 // exact bit comparisons can be performed. 1536 template <typename Td, typename Tn> 1537 static void Test1OpNEON(const char* name, 1538 Test1OpNEONHelper_t helper, 1539 const Tn inputs_n[], 1540 unsigned inputs_n_length, 1541 const Td expected[], 1542 unsigned expected_length, 1543 VectorFormat vd_form, 1544 VectorFormat vn_form) { 1545 VIXL_ASSERT(inputs_n_length > 0); 1546 1547 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1548 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1549 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1550 1551 const unsigned results_length = inputs_n_length; 1552 Td* results = new Td[results_length * vd_lane_count]; 1553 const unsigned lane_bit = sizeof(Td) * 8; 1554 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1555 1556 bool skipped; 1557 1558 Test1OpNEON_Helper(helper, 1559 reinterpret_cast<uintptr_t>(inputs_n), 1560 inputs_n_length, 1561 reinterpret_cast<uintptr_t>(results), 1562 vd_form, 1563 vn_form, 1564 &skipped); 1565 1566 if (Test::generate_test_trace()) { 1567 // Print the results. 1568 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1569 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1570 printf(" "); 1571 // Output a separate result for each element of the result vector. 1572 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1573 unsigned index = lane + (iteration * vd_lane_count); 1574 printf(" 0x%0*" PRIx64 ",", 1575 lane_len_in_hex, 1576 static_cast<uint64_t>(results[index])); 1577 } 1578 printf("\n"); 1579 } 1580 1581 printf("};\n"); 1582 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1583 name, 1584 results_length); 1585 } else if (!skipped) { 1586 // Check the results. 1587 VIXL_CHECK(expected_length == results_length); 1588 unsigned error_count = 0; 1589 unsigned d = 0; 1590 const char* padding = " "; 1591 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1592 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1593 bool error_in_vector = false; 1594 1595 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1596 unsigned output_index = (n * vd_lane_count) + lane; 1597 1598 if (results[output_index] != expected[output_index]) { 1599 error_in_vector = true; 1600 break; 1601 } 1602 } 1603 1604 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1605 printf("%s\n", name); 1606 printf(" Vn%.*s| Vd%.*s| Expected\n", 1607 lane_len_in_hex + 1, 1608 padding, 1609 lane_len_in_hex + 1, 1610 padding); 1611 1612 const unsigned first_index_n = 1613 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 1614 1615 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 1616 lane++) { 1617 unsigned output_index = (n * vd_lane_count) + lane; 1618 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 1619 1620 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 1621 " " 1622 "| 0x%0*" PRIx64 "\n", 1623 results[output_index] != expected[output_index] ? '*' : ' ', 1624 lane_len_in_hex, 1625 static_cast<uint64_t>(inputs_n[input_index_n]), 1626 lane_len_in_hex, 1627 static_cast<uint64_t>(results[output_index]), 1628 lane_len_in_hex, 1629 static_cast<uint64_t>(expected[output_index])); 1630 } 1631 } 1632 } 1633 VIXL_ASSERT(d == expected_length); 1634 if (error_count > kErrorReportLimit) { 1635 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1636 } 1637 VIXL_CHECK(error_count == 0); 1638 } 1639 delete[] results; 1640 } 1641 1642 1643 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== 1644 // where <V> is one of B, H, S or D registers. 1645 // e.g. saddlv H1, v0.8B 1646 1647 // TODO: Change tests to store all lanes of the resulting V register. 1648 // Some tests store all 128 bits of the resulting V register to 1649 // check the simulator's behaviour on the rest of the register. 1650 // This is better than storing the affected lanes only. 1651 // Change any tests such as the 'Across' template to do the same. 1652 1653 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, 1654 uintptr_t inputs_n, 1655 unsigned inputs_n_length, 1656 uintptr_t results, 1657 VectorFormat vd_form, 1658 VectorFormat vn_form, 1659 bool* skipped) { 1660 VIXL_ASSERT(vd_form != kFormatUndefined); 1661 VIXL_ASSERT(vn_form != kFormatUndefined); 1662 1663 SETUP_WITH_FEATURES(CPUFeatures::kNEON, 1664 CPUFeatures::kFP, 1665 CPUFeatures::kNEONHalf); 1666 START(); 1667 1668 // Roll up the loop to keep the code size down. 1669 Label loop_n; 1670 1671 Register out = x0; 1672 Register inputs_n_base = x1; 1673 Register inputs_n_last_vector = x3; 1674 Register index_n = x5; 1675 1676 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1677 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1678 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1679 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1680 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1681 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1682 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1683 1684 // Test destructive operations by (arbitrarily) using the same register for 1685 // B and S lane sizes. 1686 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); 1687 1688 // Create two aliases for v0; the first is the destination for the tested 1689 // instruction, the second, the whole Q register to check the results. 1690 VRegister vd = VRegister(0, vd_bits); 1691 VRegister vdstr = VRegister(0, kQRegSize); 1692 1693 VRegister vn = VRegister(1, vn_bits); 1694 VRegister vntmp = VRegister(3, vn_bits); 1695 1696 // These will have the correct format for use when calling 'helper'. 1697 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count); 1698 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1699 1700 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1701 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1702 1703 // Same registers for use in the 'ext' instructions. 1704 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 1705 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 1706 1707 __ Mov(out, results); 1708 1709 __ Mov(inputs_n_base, inputs_n); 1710 __ Mov(inputs_n_last_vector, 1711 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 1712 1713 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 1714 1715 __ Mov(index_n, 0); 1716 __ Bind(&loop_n); 1717 1718 __ Ldr(vntmp_single, 1719 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1720 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 1721 1722 if (destructive) { 1723 __ Mov(vd_helper, vn_helper); 1724 SingleEmissionCheckScope guard(&masm); 1725 (masm.*helper)(vd, vd_helper); 1726 } else { 1727 SingleEmissionCheckScope guard(&masm); 1728 (masm.*helper)(vd, vn_helper); 1729 } 1730 1731 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex)); 1732 1733 __ Add(index_n, index_n, 1); 1734 __ Cmp(index_n, inputs_n_length); 1735 __ B(lo, &loop_n); 1736 1737 END(); 1738 TRY_RUN(skipped); 1739 TEARDOWN(); 1740 } 1741 1742 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1743 // arrays of rawbit representation of input values. This ensures that 1744 // exact bit comparisons can be performed. 1745 template <typename Td, typename Tn> 1746 static void Test1OpAcrossNEON(const char* name, 1747 Test1OpNEONHelper_t helper, 1748 const Tn inputs_n[], 1749 unsigned inputs_n_length, 1750 const Td expected[], 1751 unsigned expected_length, 1752 VectorFormat vd_form, 1753 VectorFormat vn_form) { 1754 VIXL_ASSERT(inputs_n_length > 0); 1755 1756 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1757 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); 1758 1759 const unsigned results_length = inputs_n_length; 1760 Td* results = new Td[results_length * vd_lanes_per_q]; 1761 const unsigned lane_bit = sizeof(Td) * 8; 1762 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 1763 1764 bool skipped; 1765 1766 Test1OpAcrossNEON_Helper(helper, 1767 reinterpret_cast<uintptr_t>(inputs_n), 1768 inputs_n_length, 1769 reinterpret_cast<uintptr_t>(results), 1770 vd_form, 1771 vn_form, 1772 &skipped); 1773 1774 if (Test::generate_test_trace()) { 1775 // Print the results. 1776 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 1777 for (unsigned iteration = 0; iteration < results_length; iteration++) { 1778 printf(" "); 1779 // Output a separate result for each element of the result vector. 1780 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1781 unsigned index = lane + (iteration * vd_lanes_per_q); 1782 printf(" 0x%0*" PRIx64 ",", 1783 lane_len_in_hex, 1784 static_cast<uint64_t>(results[index])); 1785 } 1786 printf("\n"); 1787 } 1788 1789 printf("};\n"); 1790 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 1791 name, 1792 results_length); 1793 } else if (!skipped) { 1794 // Check the results. 1795 VIXL_CHECK(expected_length == results_length); 1796 unsigned error_count = 0; 1797 unsigned d = 0; 1798 const char* padding = " "; 1799 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 1800 for (unsigned n = 0; n < inputs_n_length; n++, d++) { 1801 bool error_in_vector = false; 1802 1803 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 1804 unsigned expected_index = (n * vd_lane_count) + lane; 1805 unsigned results_index = (n * vd_lanes_per_q) + lane; 1806 1807 if (results[results_index] != expected[expected_index]) { 1808 error_in_vector = true; 1809 break; 1810 } 1811 } 1812 1813 // For across operations, the remaining lanes should be zero. 1814 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { 1815 unsigned results_index = (n * vd_lanes_per_q) + lane; 1816 if (results[results_index] != 0) { 1817 error_in_vector = true; 1818 break; 1819 } 1820 } 1821 1822 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 1823 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1824 1825 printf("%s\n", name); 1826 printf(" Vn%.*s| Vd%.*s| Expected\n", 1827 lane_len_in_hex + 1, 1828 padding, 1829 lane_len_in_hex + 1, 1830 padding); 1831 1832 // TODO: In case of an error, all tests print out as many elements as 1833 // there are lanes in the output or input vectors. This way 1834 // the viewer can read all the values that were needed for the 1835 // operation but the output contains also unnecessary values. 1836 // These prints can be improved according to the arguments 1837 // passed to test functions. 1838 // This output for the 'Across' category has the required 1839 // modifications. 1840 for (unsigned lane = 0; lane < vn_lane_count; lane++) { 1841 unsigned results_index = 1842 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); 1843 unsigned input_index_n = 1844 (inputs_n_length - vn_lane_count + n + 1 + lane) % 1845 inputs_n_length; 1846 1847 Td expect = 0; 1848 if ((vn_lane_count - 1) == lane) { 1849 // This is the last lane to be printed, ie. the least-significant 1850 // lane, so use the expected value; any other lane should be zero. 1851 unsigned expected_index = n * vd_lane_count; 1852 expect = expected[expected_index]; 1853 } 1854 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 1855 results[results_index] != expect ? '*' : ' ', 1856 lane_len_in_hex, 1857 static_cast<uint64_t>(inputs_n[input_index_n]), 1858 lane_len_in_hex, 1859 static_cast<uint64_t>(results[results_index]), 1860 lane_len_in_hex, 1861 static_cast<uint64_t>(expect)); 1862 } 1863 } 1864 } 1865 VIXL_ASSERT(d == expected_length); 1866 if (error_count > kErrorReportLimit) { 1867 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 1868 } 1869 VIXL_CHECK(error_count == 0); 1870 } 1871 delete[] results; 1872 } 1873 1874 1875 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== 1876 1877 // TODO: Iterate over inputs_d once the traces file is split. 1878 1879 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, 1880 uintptr_t inputs_d, 1881 uintptr_t inputs_n, 1882 unsigned inputs_n_length, 1883 uintptr_t inputs_m, 1884 unsigned inputs_m_length, 1885 uintptr_t results, 1886 VectorFormat vd_form, 1887 VectorFormat vn_form, 1888 VectorFormat vm_form, 1889 bool* skipped) { 1890 VIXL_ASSERT(vd_form != kFormatUndefined); 1891 VIXL_ASSERT(vn_form != kFormatUndefined); 1892 VIXL_ASSERT(vm_form != kFormatUndefined); 1893 1894 CPUFeatures features; 1895 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf); 1896 features.Combine(CPUFeatures::kFP); 1897 features.Combine(CPUFeatures::kRDM); 1898 features.Combine(CPUFeatures::kDotProduct); 1899 SETUP_WITH_FEATURES(features); 1900 START(); 1901 1902 // Roll up the loop to keep the code size down. 1903 Label loop_n, loop_m; 1904 1905 Register out = x0; 1906 Register inputs_n_base = x1; 1907 Register inputs_m_base = x2; 1908 Register inputs_d_base = x3; 1909 Register inputs_n_last_16bytes = x4; 1910 Register inputs_m_last_16bytes = x5; 1911 Register index_n = x6; 1912 Register index_m = x7; 1913 1914 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 1915 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 1916 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 1917 1918 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 1919 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 1920 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 1921 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 1922 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 1923 1924 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 1925 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 1926 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 1927 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 1928 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 1929 1930 1931 // Always load and store 128 bits regardless of the format. 1932 VRegister vd = v0.V16B(); 1933 VRegister vn = v1.V16B(); 1934 VRegister vm = v2.V16B(); 1935 VRegister vntmp = v3.V16B(); 1936 VRegister vmtmp = v4.V16B(); 1937 VRegister vres = v5.V16B(); 1938 1939 // These will have the correct format for calling the 'helper'. 1940 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 1941 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count); 1942 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 1943 1944 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 1945 VRegister vntmp_single = VRegister(3, vn_lane_bits); 1946 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 1947 1948 __ Mov(out, results); 1949 1950 __ Mov(inputs_d_base, inputs_d); 1951 1952 __ Mov(inputs_n_base, inputs_n); 1953 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 1954 __ Mov(inputs_m_base, inputs_m); 1955 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 1956 1957 __ Ldr(vd, MemOperand(inputs_d_base)); 1958 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 1959 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 1960 1961 __ Mov(index_n, 0); 1962 __ Bind(&loop_n); 1963 1964 __ Ldr(vntmp_single, 1965 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 1966 __ Ext(vn, vn, vntmp, vn_lane_bytes); 1967 1968 __ Mov(index_m, 0); 1969 __ Bind(&loop_m); 1970 1971 __ Ldr(vmtmp_single, 1972 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 1973 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 1974 1975 __ Mov(vres, vd); 1976 { 1977 SingleEmissionCheckScope guard(&masm); 1978 (masm.*helper)(vres_helper, vn_helper, vm_helper); 1979 } 1980 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 1981 1982 __ Add(index_m, index_m, 1); 1983 __ Cmp(index_m, inputs_m_length); 1984 __ B(lo, &loop_m); 1985 1986 __ Add(index_n, index_n, 1); 1987 __ Cmp(index_n, inputs_n_length); 1988 __ B(lo, &loop_n); 1989 1990 END(); 1991 TRY_RUN(skipped); 1992 TEARDOWN(); 1993 } 1994 1995 1996 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 1997 // arrays of rawbit representation of input values. This ensures that 1998 // exact bit comparisons can be performed. 1999 template <typename Td, typename Tn, typename Tm> 2000 static void Test2OpNEON(const char* name, 2001 Test2OpNEONHelper_t helper, 2002 const Td inputs_d[], 2003 const Tn inputs_n[], 2004 unsigned inputs_n_length, 2005 const Tm inputs_m[], 2006 unsigned inputs_m_length, 2007 const Td expected[], 2008 unsigned expected_length, 2009 VectorFormat vd_form, 2010 VectorFormat vn_form, 2011 VectorFormat vm_form) { 2012 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2013 2014 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 2015 2016 const unsigned results_length = inputs_n_length * inputs_m_length; 2017 Td* results = new Td[results_length * vd_lane_count]; 2018 const unsigned lane_bit = sizeof(Td) * 8; 2019 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 2020 2021 bool skipped; 2022 2023 Test2OpNEON_Helper(helper, 2024 reinterpret_cast<uintptr_t>(inputs_d), 2025 reinterpret_cast<uintptr_t>(inputs_n), 2026 inputs_n_length, 2027 reinterpret_cast<uintptr_t>(inputs_m), 2028 inputs_m_length, 2029 reinterpret_cast<uintptr_t>(results), 2030 vd_form, 2031 vn_form, 2032 vm_form, 2033 &skipped); 2034 2035 if (Test::generate_test_trace()) { 2036 // Print the results. 2037 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2038 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2039 printf(" "); 2040 // Output a separate result for each element of the result vector. 2041 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2042 unsigned index = lane + (iteration * vd_lane_count); 2043 printf(" 0x%0*" PRIx64 ",", 2044 lane_len_in_hex, 2045 static_cast<uint64_t>(results[index])); 2046 } 2047 printf("\n"); 2048 } 2049 2050 printf("};\n"); 2051 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2052 name, 2053 results_length); 2054 } else if (!skipped) { 2055 // Check the results. 2056 VIXL_CHECK(expected_length == results_length); 2057 unsigned error_count = 0; 2058 unsigned d = 0; 2059 const char* padding = " "; 2060 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2061 for (unsigned n = 0; n < inputs_n_length; n++) { 2062 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2063 bool error_in_vector = false; 2064 2065 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2066 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2067 (m * vd_lane_count) + lane; 2068 2069 if (results[output_index] != expected[output_index]) { 2070 error_in_vector = true; 2071 break; 2072 } 2073 } 2074 2075 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2076 printf("%s\n", name); 2077 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", 2078 lane_len_in_hex + 1, 2079 padding, 2080 lane_len_in_hex + 1, 2081 padding, 2082 lane_len_in_hex + 1, 2083 padding, 2084 lane_len_in_hex + 1, 2085 padding); 2086 2087 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2088 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2089 (m * vd_lane_count) + lane; 2090 unsigned input_index_n = 2091 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2092 inputs_n_length; 2093 unsigned input_index_m = 2094 (inputs_m_length - vd_lane_count + m + 1 + lane) % 2095 inputs_m_length; 2096 2097 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2098 " " 2099 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2100 results[output_index] != expected[output_index] ? '*' : ' ', 2101 lane_len_in_hex, 2102 static_cast<uint64_t>(inputs_d[lane]), 2103 lane_len_in_hex, 2104 static_cast<uint64_t>(inputs_n[input_index_n]), 2105 lane_len_in_hex, 2106 static_cast<uint64_t>(inputs_m[input_index_m]), 2107 lane_len_in_hex, 2108 static_cast<uint64_t>(results[output_index]), 2109 lane_len_in_hex, 2110 static_cast<uint64_t>(expected[output_index])); 2111 } 2112 } 2113 } 2114 } 2115 VIXL_ASSERT(d == expected_length); 2116 if (error_count > kErrorReportLimit) { 2117 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2118 } 2119 VIXL_CHECK(error_count == 0); 2120 } 2121 delete[] results; 2122 } 2123 2124 2125 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== 2126 2127 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, 2128 uintptr_t inputs_d, 2129 uintptr_t inputs_n, 2130 unsigned inputs_n_length, 2131 uintptr_t inputs_m, 2132 unsigned inputs_m_length, 2133 const int indices[], 2134 unsigned indices_length, 2135 uintptr_t results, 2136 VectorFormat vd_form, 2137 VectorFormat vn_form, 2138 VectorFormat vm_form, 2139 unsigned vm_subvector_count, 2140 bool* skipped) { 2141 VIXL_ASSERT(vd_form != kFormatUndefined); 2142 VIXL_ASSERT(vn_form != kFormatUndefined); 2143 VIXL_ASSERT(vm_form != kFormatUndefined); 2144 VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count)); 2145 2146 CPUFeatures features; 2147 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf); 2148 features.Combine(CPUFeatures::kFP); 2149 features.Combine(CPUFeatures::kRDM); 2150 features.Combine(CPUFeatures::kDotProduct); 2151 SETUP_WITH_FEATURES(features); 2152 2153 START(); 2154 2155 // Roll up the loop to keep the code size down. 2156 Label loop_n, loop_m; 2157 2158 Register out = x0; 2159 Register inputs_n_base = x1; 2160 Register inputs_m_base = x2; 2161 Register inputs_d_base = x3; 2162 Register inputs_n_last_16bytes = x4; 2163 Register inputs_m_last_16bytes = x5; 2164 Register index_n = x6; 2165 Register index_m = x7; 2166 2167 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2168 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2169 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2170 2171 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2172 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2173 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2174 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2175 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2176 2177 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); 2178 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); 2179 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); 2180 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); 2181 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); 2182 2183 VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize); 2184 2185 // Always load and store 128 bits regardless of the format. 2186 VRegister vd = v0.V16B(); 2187 VRegister vn = v1.V16B(); 2188 VRegister vm = v2.V16B(); 2189 VRegister vntmp = v3.V16B(); 2190 VRegister vmtmp = v4.V16B(); 2191 VRegister vres = v5.V16B(); 2192 2193 // These will have the correct format for calling the 'helper'. 2194 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2195 VRegister vm_helper = 2196 VRegister(2, vm_bits * vm_subvector_count, vm_lane_count); 2197 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2198 2199 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2200 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2201 VRegister vmtmp_single = VRegister(4, vm_lane_bits); 2202 2203 __ Mov(out, results); 2204 2205 __ Mov(inputs_d_base, inputs_d); 2206 2207 __ Mov(inputs_n_base, inputs_n); 2208 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); 2209 __ Mov(inputs_m_base, inputs_m); 2210 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); 2211 2212 __ Ldr(vd, MemOperand(inputs_d_base)); 2213 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2214 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); 2215 2216 __ Mov(index_n, 0); 2217 __ Bind(&loop_n); 2218 2219 __ Ldr(vntmp_single, 2220 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2221 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2222 2223 __ Mov(index_m, 0); 2224 __ Bind(&loop_m); 2225 2226 __ Ldr(vmtmp_single, 2227 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); 2228 __ Ext(vm, vm, vmtmp, vm_lane_bytes); 2229 2230 __ Mov(vres, vd); 2231 { 2232 for (unsigned i = 0; i < indices_length; i++) { 2233 { 2234 SingleEmissionCheckScope guard(&masm); 2235 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); 2236 } 2237 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2238 } 2239 } 2240 2241 __ Add(index_m, index_m, 1); 2242 __ Cmp(index_m, inputs_m_length); 2243 __ B(lo, &loop_m); 2244 2245 __ Add(index_n, index_n, 1); 2246 __ Cmp(index_n, inputs_n_length); 2247 __ B(lo, &loop_n); 2248 2249 END(); 2250 TRY_RUN(skipped); 2251 TEARDOWN(); 2252 } 2253 2254 2255 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2256 // arrays of rawbit representation of input values. This ensures that 2257 // exact bit comparisons can be performed. 2258 template <typename Td, typename Tn, typename Tm> 2259 static void TestByElementNEON(const char* name, 2260 TestByElementNEONHelper_t helper, 2261 const Td inputs_d[], 2262 const Tn inputs_n[], 2263 unsigned inputs_n_length, 2264 const Tm inputs_m[], 2265 unsigned inputs_m_length, 2266 const int indices[], 2267 unsigned indices_length, 2268 const Td expected[], 2269 unsigned expected_length, 2270 VectorFormat vd_form, 2271 VectorFormat vn_form, 2272 VectorFormat vm_form, 2273 unsigned vm_subvector_count = 1) { 2274 VIXL_ASSERT(inputs_n_length > 0); 2275 VIXL_ASSERT(inputs_m_length > 0); 2276 VIXL_ASSERT(indices_length > 0); 2277 2278 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); 2279 2280 const unsigned results_length = 2281 inputs_n_length * inputs_m_length * indices_length; 2282 Td* results = new Td[results_length * vd_lane_count]; 2283 const unsigned lane_bit = sizeof(Td) * 8; 2284 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); 2285 2286 bool skipped; 2287 2288 TestByElementNEON_Helper(helper, 2289 reinterpret_cast<uintptr_t>(inputs_d), 2290 reinterpret_cast<uintptr_t>(inputs_n), 2291 inputs_n_length, 2292 reinterpret_cast<uintptr_t>(inputs_m), 2293 inputs_m_length, 2294 indices, 2295 indices_length, 2296 reinterpret_cast<uintptr_t>(results), 2297 vd_form, 2298 vn_form, 2299 vm_form, 2300 vm_subvector_count, 2301 &skipped); 2302 2303 if (Test::generate_test_trace()) { 2304 // Print the results. 2305 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2306 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2307 printf(" "); 2308 // Output a separate result for each element of the result vector. 2309 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2310 unsigned index = lane + (iteration * vd_lane_count); 2311 printf(" 0x%0*" PRIx64 ",", 2312 lane_len_in_hex, 2313 static_cast<uint64_t>(results[index])); 2314 } 2315 printf("\n"); 2316 } 2317 2318 printf("};\n"); 2319 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2320 name, 2321 results_length); 2322 } else if (!skipped) { 2323 // Check the results. 2324 VIXL_CHECK(expected_length == results_length); 2325 unsigned error_count = 0; 2326 unsigned d = 0; 2327 const char* padding = " "; 2328 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2329 for (unsigned n = 0; n < inputs_n_length; n++) { 2330 for (unsigned m = 0; m < inputs_m_length; m++) { 2331 for (unsigned index = 0; index < indices_length; index++, d++) { 2332 bool error_in_vector = false; 2333 2334 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2335 unsigned output_index = 2336 (n * inputs_m_length * indices_length * vd_lane_count) + 2337 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + 2338 lane; 2339 2340 if (results[output_index] != expected[output_index]) { 2341 error_in_vector = true; 2342 break; 2343 } 2344 } 2345 2346 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2347 printf("%s\n", name); 2348 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", 2349 lane_len_in_hex + 1, 2350 padding, 2351 lane_len_in_hex + 1, 2352 padding, 2353 lane_len_in_hex + 1, 2354 padding, 2355 lane_len_in_hex + 1, 2356 padding); 2357 2358 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2359 unsigned output_index = 2360 (n * inputs_m_length * indices_length * vd_lane_count) + 2361 (m * indices_length * vd_lane_count) + 2362 (index * vd_lane_count) + lane; 2363 unsigned input_index_n = 2364 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2365 inputs_n_length; 2366 unsigned input_index_m = 2367 (inputs_m_length - vd_lane_count + m + 1 + lane) % 2368 inputs_m_length; 2369 2370 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2371 " " 2372 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2373 results[output_index] != expected[output_index] ? '*' 2374 : ' ', 2375 lane_len_in_hex, 2376 static_cast<uint64_t>(inputs_d[lane]), 2377 lane_len_in_hex, 2378 static_cast<uint64_t>(inputs_n[input_index_n]), 2379 lane_len_in_hex, 2380 static_cast<uint64_t>(inputs_m[input_index_m]), 2381 indices[index], 2382 lane_len_in_hex, 2383 static_cast<uint64_t>(results[output_index]), 2384 lane_len_in_hex, 2385 static_cast<uint64_t>(expected[output_index])); 2386 } 2387 } 2388 } 2389 } 2390 } 2391 VIXL_ASSERT(d == expected_length); 2392 if (error_count > kErrorReportLimit) { 2393 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2394 } 2395 VIXL_CHECK(error_count == 0); 2396 } 2397 delete[] results; 2398 } 2399 2400 2401 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== 2402 2403 2404 template <typename Tm> 2405 void Test2OpImmNEON_Helper( 2406 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2407 uintptr_t inputs_n, 2408 unsigned inputs_n_length, 2409 const Tm inputs_m[], 2410 unsigned inputs_m_length, 2411 uintptr_t results, 2412 VectorFormat vd_form, 2413 VectorFormat vn_form, 2414 bool* skipped) { 2415 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined); 2416 2417 SETUP_WITH_FEATURES(CPUFeatures::kNEON, 2418 CPUFeatures::kFP, 2419 CPUFeatures::kNEONHalf); 2420 START(); 2421 2422 // Roll up the loop to keep the code size down. 2423 Label loop_n; 2424 2425 Register out = x0; 2426 Register inputs_n_base = x1; 2427 Register inputs_n_last_16bytes = x3; 2428 Register index_n = x5; 2429 2430 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2431 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2432 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2433 2434 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2435 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2436 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2437 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2438 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2439 2440 2441 // These will be either a D- or a Q-register form, with a single lane 2442 // (for use in scalar load and store operations). 2443 VRegister vd = VRegister(0, vd_bits); 2444 VRegister vn = v1.V16B(); 2445 VRegister vntmp = v3.V16B(); 2446 2447 // These will have the correct format for use when calling 'helper'. 2448 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count); 2449 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2450 2451 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2452 VRegister vntmp_single = VRegister(3, vn_lane_bits); 2453 2454 __ Mov(out, results); 2455 2456 __ Mov(inputs_n_base, inputs_n); 2457 __ Mov(inputs_n_last_16bytes, 2458 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); 2459 2460 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); 2461 2462 __ Mov(index_n, 0); 2463 __ Bind(&loop_n); 2464 2465 __ Ldr(vntmp_single, 2466 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2467 __ Ext(vn, vn, vntmp, vn_lane_bytes); 2468 2469 // Set the destination to zero for tests such as '[r]shrn2'. 2470 // TODO: Setting the destination to values other than zero might be a better 2471 // test for shift and accumulate instructions (srsra/ssra/usra/ursra). 2472 __ Movi(vd.V16B(), 0); 2473 2474 { 2475 for (unsigned i = 0; i < inputs_m_length; i++) { 2476 { 2477 SingleEmissionCheckScope guard(&masm); 2478 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); 2479 } 2480 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2481 } 2482 } 2483 2484 __ Add(index_n, index_n, 1); 2485 __ Cmp(index_n, inputs_n_length); 2486 __ B(lo, &loop_n); 2487 2488 END(); 2489 TRY_RUN(skipped); 2490 TEARDOWN(); 2491 } 2492 2493 2494 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2495 // arrays of rawbit representation of input values. This ensures that 2496 // exact bit comparisons can be performed. 2497 template <typename Td, typename Tn, typename Tm> 2498 static void Test2OpImmNEON( 2499 const char* name, 2500 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, 2501 const Tn inputs_n[], 2502 unsigned inputs_n_length, 2503 const Tm inputs_m[], 2504 unsigned inputs_m_length, 2505 const Td expected[], 2506 unsigned expected_length, 2507 VectorFormat vd_form, 2508 VectorFormat vn_form) { 2509 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0); 2510 2511 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2512 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2513 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2514 2515 const unsigned results_length = inputs_n_length * inputs_m_length; 2516 Td* results = new Td[results_length * vd_lane_count]; 2517 const unsigned lane_bit = sizeof(Td) * 8; 2518 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2519 2520 bool skipped; 2521 2522 Test2OpImmNEON_Helper(helper, 2523 reinterpret_cast<uintptr_t>(inputs_n), 2524 inputs_n_length, 2525 inputs_m, 2526 inputs_m_length, 2527 reinterpret_cast<uintptr_t>(results), 2528 vd_form, 2529 vn_form, 2530 &skipped); 2531 2532 if (Test::generate_test_trace()) { 2533 // Print the results. 2534 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2535 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2536 printf(" "); 2537 // Output a separate result for each element of the result vector. 2538 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2539 unsigned index = lane + (iteration * vd_lane_count); 2540 printf(" 0x%0*" PRIx64 ",", 2541 lane_len_in_hex, 2542 static_cast<uint64_t>(results[index])); 2543 } 2544 printf("\n"); 2545 } 2546 2547 printf("};\n"); 2548 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2549 name, 2550 results_length); 2551 } else if (!skipped) { 2552 // Check the results. 2553 VIXL_CHECK(expected_length == results_length); 2554 unsigned error_count = 0; 2555 unsigned d = 0; 2556 const char* padding = " "; 2557 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2558 for (unsigned n = 0; n < inputs_n_length; n++) { 2559 for (unsigned m = 0; m < inputs_m_length; m++, d++) { 2560 bool error_in_vector = false; 2561 2562 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2563 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2564 (m * vd_lane_count) + lane; 2565 2566 if (results[output_index] != expected[output_index]) { 2567 error_in_vector = true; 2568 break; 2569 } 2570 } 2571 2572 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2573 printf("%s\n", name); 2574 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2575 lane_len_in_hex + 1, 2576 padding, 2577 lane_len_in_hex, 2578 padding, 2579 lane_len_in_hex + 1, 2580 padding); 2581 2582 const unsigned first_index_n = 2583 inputs_n_length - (16 / vn_lane_bytes) + n + 1; 2584 2585 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); 2586 lane++) { 2587 unsigned output_index = (n * inputs_m_length * vd_lane_count) + 2588 (m * vd_lane_count) + lane; 2589 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; 2590 unsigned input_index_m = m; 2591 2592 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 2593 " " 2594 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2595 results[output_index] != expected[output_index] ? '*' : ' ', 2596 lane_len_in_hex, 2597 static_cast<uint64_t>(inputs_n[input_index_n]), 2598 lane_len_in_hex, 2599 static_cast<uint64_t>(inputs_m[input_index_m]), 2600 lane_len_in_hex, 2601 static_cast<uint64_t>(results[output_index]), 2602 lane_len_in_hex, 2603 static_cast<uint64_t>(expected[output_index])); 2604 } 2605 } 2606 } 2607 } 2608 VIXL_ASSERT(d == expected_length); 2609 if (error_count > kErrorReportLimit) { 2610 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2611 } 2612 VIXL_CHECK(error_count == 0); 2613 } 2614 delete[] results; 2615 } 2616 2617 2618 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== 2619 2620 2621 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, 2622 uintptr_t inputs_d, 2623 const int inputs_imm1[], 2624 unsigned inputs_imm1_length, 2625 uintptr_t inputs_n, 2626 unsigned inputs_n_length, 2627 const int inputs_imm2[], 2628 unsigned inputs_imm2_length, 2629 uintptr_t results, 2630 VectorFormat vd_form, 2631 VectorFormat vn_form, 2632 bool* skipped) { 2633 VIXL_ASSERT(vd_form != kFormatUndefined); 2634 VIXL_ASSERT(vn_form != kFormatUndefined); 2635 2636 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP); 2637 START(); 2638 2639 // Roll up the loop to keep the code size down. 2640 Label loop_n; 2641 2642 Register out = x0; 2643 Register inputs_d_base = x1; 2644 Register inputs_n_base = x2; 2645 Register inputs_n_last_vector = x4; 2646 Register index_n = x6; 2647 2648 // TODO: Refactor duplicate definitions below with a VRegister::As() routine. 2649 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); 2650 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2651 2652 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); 2653 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); 2654 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); 2655 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); 2656 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); 2657 2658 2659 // These will be either a D- or a Q-register form, with a single lane 2660 // (for use in scalar load and store operations). 2661 VRegister vd = VRegister(0, vd_bits); 2662 VRegister vn = VRegister(1, vn_bits); 2663 VRegister vntmp = VRegister(4, vn_bits); 2664 VRegister vres = VRegister(5, vn_bits); 2665 2666 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count); 2667 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count); 2668 2669 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. 2670 VRegister vntmp_single = VRegister(4, vn_lane_bits); 2671 2672 // Same registers for use in the 'ext' instructions. 2673 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); 2674 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); 2675 2676 __ Mov(out, results); 2677 2678 __ Mov(inputs_d_base, inputs_d); 2679 2680 __ Mov(inputs_n_base, inputs_n); 2681 __ Mov(inputs_n_last_vector, 2682 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); 2683 2684 __ Ldr(vd, MemOperand(inputs_d_base)); 2685 2686 __ Ldr(vn, MemOperand(inputs_n_last_vector)); 2687 2688 __ Mov(index_n, 0); 2689 __ Bind(&loop_n); 2690 2691 __ Ldr(vntmp_single, 2692 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); 2693 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); 2694 2695 { 2696 EmissionCheckScope guard(&masm, 2697 kInstructionSize * inputs_imm1_length * 2698 inputs_imm2_length * 3); 2699 for (unsigned i = 0; i < inputs_imm1_length; i++) { 2700 for (unsigned j = 0; j < inputs_imm2_length; j++) { 2701 __ Mov(vres, vd); 2702 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); 2703 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex)); 2704 } 2705 } 2706 } 2707 2708 __ Add(index_n, index_n, 1); 2709 __ Cmp(index_n, inputs_n_length); 2710 __ B(lo, &loop_n); 2711 2712 END(); 2713 TRY_RUN(skipped); 2714 TEARDOWN(); 2715 } 2716 2717 2718 // Test NEON instructions. The inputs_*[] and expected[] arrays should be 2719 // arrays of rawbit representation of input values. This ensures that 2720 // exact bit comparisons can be performed. 2721 template <typename Td, typename Tn> 2722 static void TestOpImmOpImmNEON(const char* name, 2723 TestOpImmOpImmVdUpdateNEONHelper_t helper, 2724 const Td inputs_d[], 2725 const int inputs_imm1[], 2726 unsigned inputs_imm1_length, 2727 const Tn inputs_n[], 2728 unsigned inputs_n_length, 2729 const int inputs_imm2[], 2730 unsigned inputs_imm2_length, 2731 const Td expected[], 2732 unsigned expected_length, 2733 VectorFormat vd_form, 2734 VectorFormat vn_form) { 2735 VIXL_ASSERT(inputs_n_length > 0); 2736 VIXL_ASSERT(inputs_imm1_length > 0); 2737 VIXL_ASSERT(inputs_imm2_length > 0); 2738 2739 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); 2740 2741 const unsigned results_length = 2742 inputs_n_length * inputs_imm1_length * inputs_imm2_length; 2743 2744 Td* results = new Td[results_length * vd_lane_count]; 2745 const unsigned lane_bit = sizeof(Td) * 8; 2746 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); 2747 2748 bool skipped; 2749 2750 TestOpImmOpImmNEON_Helper(helper, 2751 reinterpret_cast<uintptr_t>(inputs_d), 2752 inputs_imm1, 2753 inputs_imm1_length, 2754 reinterpret_cast<uintptr_t>(inputs_n), 2755 inputs_n_length, 2756 inputs_imm2, 2757 inputs_imm2_length, 2758 reinterpret_cast<uintptr_t>(results), 2759 vd_form, 2760 vn_form, 2761 &skipped); 2762 2763 if (Test::generate_test_trace()) { 2764 // Print the results. 2765 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); 2766 for (unsigned iteration = 0; iteration < results_length; iteration++) { 2767 printf(" "); 2768 // Output a separate result for each element of the result vector. 2769 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2770 unsigned index = lane + (iteration * vd_lane_count); 2771 printf(" 0x%0*" PRIx64 ",", 2772 lane_len_in_hex, 2773 static_cast<uint64_t>(results[index])); 2774 } 2775 printf("\n"); 2776 } 2777 2778 printf("};\n"); 2779 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", 2780 name, 2781 results_length); 2782 } else if (!skipped) { 2783 // Check the results. 2784 VIXL_CHECK(expected_length == results_length); 2785 unsigned error_count = 0; 2786 unsigned counted_length = 0; 2787 const char* padding = " "; 2788 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1)); 2789 for (unsigned n = 0; n < inputs_n_length; n++) { 2790 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { 2791 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { 2792 bool error_in_vector = false; 2793 2794 counted_length++; 2795 2796 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2797 unsigned output_index = 2798 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + 2799 (imm1 * inputs_imm2_length * vd_lane_count) + 2800 (imm2 * vd_lane_count) + lane; 2801 2802 if (results[output_index] != expected[output_index]) { 2803 error_in_vector = true; 2804 break; 2805 } 2806 } 2807 2808 if (error_in_vector && (++error_count <= kErrorReportLimit)) { 2809 printf("%s\n", name); 2810 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", 2811 lane_len_in_hex + 1, 2812 padding, 2813 lane_len_in_hex, 2814 padding, 2815 lane_len_in_hex + 1, 2816 padding, 2817 lane_len_in_hex, 2818 padding, 2819 lane_len_in_hex + 1, 2820 padding); 2821 2822 for (unsigned lane = 0; lane < vd_lane_count; lane++) { 2823 unsigned output_index = 2824 (n * inputs_imm1_length * inputs_imm2_length * 2825 vd_lane_count) + 2826 (imm1 * inputs_imm2_length * vd_lane_count) + 2827 (imm2 * vd_lane_count) + lane; 2828 unsigned input_index_n = 2829 (inputs_n_length - vd_lane_count + n + 1 + lane) % 2830 inputs_n_length; 2831 unsigned input_index_imm1 = imm1; 2832 unsigned input_index_imm2 = imm2; 2833 2834 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 2835 " " 2836 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", 2837 results[output_index] != expected[output_index] ? '*' 2838 : ' ', 2839 lane_len_in_hex, 2840 static_cast<uint64_t>(inputs_d[lane]), 2841 lane_len_in_hex, 2842 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), 2843 lane_len_in_hex, 2844 static_cast<uint64_t>(inputs_n[input_index_n]), 2845 lane_len_in_hex, 2846 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), 2847 lane_len_in_hex, 2848 static_cast<uint64_t>(results[output_index]), 2849 lane_len_in_hex, 2850 static_cast<uint64_t>(expected[output_index])); 2851 } 2852 } 2853 } 2854 } 2855 } 2856 VIXL_ASSERT(counted_length == expected_length); 2857 if (error_count > kErrorReportLimit) { 2858 printf("%u other errors follow.\n", error_count - kErrorReportLimit); 2859 } 2860 VIXL_CHECK(error_count == 0); 2861 } 2862 delete[] results; 2863 } 2864 2865 2866 // ==== Floating-point tests. ==== 2867 2868 2869 // Standard floating-point test expansion for both double- and single-precision 2870 // operations. 2871 #define STRINGIFY(s) #s 2872 2873 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \ 2874 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \ 2875 &MacroAssembler::mnemonic, \ 2876 input, \ 2877 sizeof(input) / sizeof(input[0]), \ 2878 kExpected_##mnemonic##_##variant, \ 2879 kExpectedCount_##mnemonic##_##variant) 2880 2881 #define DEFINE_TEST_FP(mnemonic, type, input) \ 2882 TEST(mnemonic##_d) { \ 2883 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2884 } \ 2885 TEST(mnemonic##_s) { \ 2886 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2887 } 2888 2889 #define DEFINE_TEST_FP_FP16(mnemonic, type, input) \ 2890 TEST(mnemonic##_d) { \ 2891 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \ 2892 } \ 2893 TEST(mnemonic##_s) { \ 2894 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \ 2895 } \ 2896 TEST(mnemonic##_h) { \ 2897 CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \ 2898 } 2899 2900 2901 // TODO: Test with a newer version of valgrind. 2902 // 2903 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64. 2904 // Therefore this test will be exiting though an ASSERT and thus leaking 2905 // memory. 2906 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic) 2907 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic) 2908 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic) 2909 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic) 2910 2911 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic) 2912 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic) 2913 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic) 2914 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic) 2915 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic) 2916 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic) 2917 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic) 2918 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic) 2919 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic) 2920 2921 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic) 2922 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic) 2923 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic) 2924 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic) 2925 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions) 2926 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions) 2927 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions) 2928 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions) 2929 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions) 2930 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions) 2931 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions) 2932 2933 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); } 2934 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); } 2935 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); } 2936 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); } 2937 2938 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); } 2939 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); } 2940 2941 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \ 2942 TEST(mnemonic##_xd) { \ 2943 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \ 2944 } \ 2945 TEST(mnemonic##_xs) { \ 2946 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \ 2947 } \ 2948 TEST(mnemonic##_xh) { \ 2949 CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \ 2950 } \ 2951 TEST(mnemonic##_wd) { \ 2952 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2953 } \ 2954 TEST(mnemonic##_ws) { \ 2955 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \ 2956 } \ 2957 TEST(mnemonic##_wh) { \ 2958 CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \ 2959 } 2960 2961 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions) 2962 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions) 2963 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions) 2964 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions) 2965 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions) 2966 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions) 2967 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions) 2968 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions) 2969 2970 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input) \ 2971 TEST(mnemonic##_wd) { \ 2972 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \ 2973 } 2974 2975 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions) 2976 2977 // TODO: Scvtf-fixed-point 2978 // TODO: Scvtf-integer 2979 // TODO: Ucvtf-fixed-point 2980 // TODO: Ucvtf-integer 2981 2982 // TODO: Fccmp 2983 // TODO: Fcsel 2984 2985 2986 // ==== NEON Tests. ==== 2987 2988 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ 2989 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 2990 &MacroAssembler::mnemonic, \ 2991 input_n, \ 2992 (sizeof(input_n) / sizeof(input_n[0])), \ 2993 kExpected_NEON_##mnemonic##_##vdform, \ 2994 kExpectedCount_NEON_##mnemonic##_##vdform, \ 2995 kFormat##vdform, \ 2996 kFormat##vnform) 2997 2998 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ 2999 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3000 vnform), \ 3001 &MacroAssembler::mnemonic, \ 3002 input_n, \ 3003 (sizeof(input_n) / sizeof(input_n[0])), \ 3004 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ 3005 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \ 3006 kFormat##vdform, \ 3007 kFormat##vnform) 3008 3009 #define CALL_TEST_NEON_HELPER_2Op( \ 3010 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \ 3011 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 3012 &MacroAssembler::mnemonic, \ 3013 input_d, \ 3014 input_n, \ 3015 (sizeof(input_n) / sizeof(input_n[0])), \ 3016 input_m, \ 3017 (sizeof(input_m) / sizeof(input_m[0])), \ 3018 kExpected_NEON_##mnemonic##_##vdform, \ 3019 kExpectedCount_NEON_##mnemonic##_##vdform, \ 3020 kFormat##vdform, \ 3021 kFormat##vnform, \ 3022 kFormat##vmform) 3023 3024 #define CALL_TEST_NEON_HELPER_2OpImm( \ 3025 mnemonic, vdform, vnform, input_n, input_m) \ 3026 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ 3027 &MacroAssembler::mnemonic, \ 3028 input_n, \ 3029 (sizeof(input_n) / sizeof(input_n[0])), \ 3030 input_m, \ 3031 (sizeof(input_m) / sizeof(input_m[0])), \ 3032 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ 3033 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ 3034 kFormat##vdform, \ 3035 kFormat##vnform) 3036 3037 #define CALL_TEST_NEON_HELPER_ByElement( \ 3038 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \ 3039 TestByElementNEON( \ 3040 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3041 vnform) "_" STRINGIFY(vmform), \ 3042 &MacroAssembler::mnemonic, \ 3043 input_d, \ 3044 input_n, \ 3045 (sizeof(input_n) / sizeof(input_n[0])), \ 3046 input_m, \ 3047 (sizeof(input_m) / sizeof(input_m[0])), \ 3048 indices, \ 3049 (sizeof(indices) / sizeof(indices[0])), \ 3050 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3051 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3052 kFormat##vdform, \ 3053 kFormat##vnform, \ 3054 kFormat##vmform) 3055 3056 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \ 3057 vdform, \ 3058 vnform, \ 3059 vmform, \ 3060 input_d, \ 3061 input_n, \ 3062 input_m, \ 3063 indices, \ 3064 vm_subvector_count) \ 3065 TestByElementNEON( \ 3066 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ 3067 vnform) "_" STRINGIFY(vmform), \ 3068 &MacroAssembler::mnemonic, \ 3069 input_d, \ 3070 input_n, \ 3071 (sizeof(input_n) / sizeof(input_n[0])), \ 3072 input_m, \ 3073 (sizeof(input_m) / sizeof(input_m[0])), \ 3074 indices, \ 3075 (sizeof(indices) / sizeof(indices[0])), \ 3076 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3077 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ 3078 kFormat##vdform, \ 3079 kFormat##vnform, \ 3080 kFormat##vmform, \ 3081 vm_subvector_count) 3082 3083 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \ 3084 mnemonic, \ 3085 vdform, \ 3086 vnform, \ 3087 input_d, \ 3088 input_imm1, \ 3089 input_n, \ 3090 input_imm2) \ 3091 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ 3092 helper, \ 3093 input_d, \ 3094 input_imm1, \ 3095 (sizeof(input_imm1) / sizeof(input_imm1[0])), \ 3096 input_n, \ 3097 (sizeof(input_n) / sizeof(input_n[0])), \ 3098 input_imm2, \ 3099 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ 3100 kExpected_NEON_##mnemonic##_##vdform, \ 3101 kExpectedCount_NEON_##mnemonic##_##vdform, \ 3102 kFormat##vdform, \ 3103 kFormat##vnform) 3104 3105 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ 3106 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) 3107 3108 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 3109 TEST(mnemonic##_8B) { \ 3110 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ 3111 } \ 3112 TEST(mnemonic##_16B) { \ 3113 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ 3114 } 3115 3116 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ 3117 TEST(mnemonic##_4H) { \ 3118 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ 3119 } \ 3120 TEST(mnemonic##_8H) { \ 3121 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ 3122 } 3123 3124 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 3125 TEST(mnemonic##_2S) { \ 3126 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ 3127 } \ 3128 TEST(mnemonic##_4S) { \ 3129 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ 3130 } 3131 3132 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 3133 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ 3134 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) 3135 3136 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 3137 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ 3138 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) 3139 3140 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ 3141 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ 3142 TEST(mnemonic##_2D) { \ 3143 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 3144 } 3145 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ 3146 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ 3147 TEST(mnemonic##_2D) { \ 3148 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ 3149 } 3150 3151 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 3152 TEST(mnemonic##_2S) { \ 3153 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ 3154 } \ 3155 TEST(mnemonic##_4S) { \ 3156 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ 3157 } \ 3158 TEST(mnemonic##_2D) { \ 3159 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ 3160 } 3161 3162 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input) \ 3163 DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ 3164 TEST(mnemonic##_4H) { \ 3165 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \ 3166 } \ 3167 TEST(mnemonic##_8H) { \ 3168 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \ 3169 } 3170 3171 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input) \ 3172 TEST(mnemonic##_H) { \ 3173 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \ 3174 } \ 3175 TEST(mnemonic##_S) { \ 3176 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ 3177 } \ 3178 TEST(mnemonic##_D) { \ 3179 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ 3180 } 3181 3182 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 3183 TEST(mnemonic##_B) { \ 3184 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ 3185 } 3186 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 3187 TEST(mnemonic##_H) { \ 3188 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ 3189 } 3190 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3191 TEST(mnemonic##_S) { \ 3192 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ 3193 } 3194 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ 3195 TEST(mnemonic##_D) { \ 3196 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ 3197 } 3198 3199 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ 3200 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ 3201 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ 3202 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3203 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 3204 3205 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ 3206 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ 3207 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) 3208 3209 3210 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ 3211 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) 3212 3213 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ 3214 TEST(mnemonic##_B_8B) { \ 3215 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ 3216 } \ 3217 TEST(mnemonic##_B_16B) { \ 3218 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ 3219 } \ 3220 TEST(mnemonic##_H_4H) { \ 3221 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ 3222 } \ 3223 TEST(mnemonic##_H_8H) { \ 3224 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ 3225 } \ 3226 TEST(mnemonic##_S_4S) { \ 3227 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ 3228 } 3229 3230 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ 3231 TEST(mnemonic##_H_8B) { \ 3232 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ 3233 } \ 3234 TEST(mnemonic##_H_16B) { \ 3235 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ 3236 } \ 3237 TEST(mnemonic##_S_4H) { \ 3238 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ 3239 } \ 3240 TEST(mnemonic##_S_8H) { \ 3241 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ 3242 } \ 3243 TEST(mnemonic##_D_4S) { \ 3244 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ 3245 } 3246 3247 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ 3248 TEST(mnemonic##_H_4H) { \ 3249 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \ 3250 } \ 3251 TEST(mnemonic##_H_8H) { \ 3252 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \ 3253 } \ 3254 TEST(mnemonic##_S_4S) { \ 3255 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ 3256 } 3257 3258 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ 3259 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) 3260 3261 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ 3262 TEST(mnemonic##_4H) { \ 3263 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ 3264 } \ 3265 TEST(mnemonic##_8H) { \ 3266 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ 3267 } \ 3268 TEST(mnemonic##_2S) { \ 3269 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ 3270 } \ 3271 TEST(mnemonic##_4S) { \ 3272 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ 3273 } \ 3274 TEST(mnemonic##_1D) { \ 3275 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ 3276 } \ 3277 TEST(mnemonic##_2D) { \ 3278 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ 3279 } 3280 3281 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ 3282 TEST(mnemonic##_8B) { \ 3283 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ 3284 } \ 3285 TEST(mnemonic##_4H) { \ 3286 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ 3287 } \ 3288 TEST(mnemonic##_2S) { \ 3289 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ 3290 } \ 3291 TEST(mnemonic##2_16B) { \ 3292 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ 3293 } \ 3294 TEST(mnemonic##2_8H) { \ 3295 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ 3296 } \ 3297 TEST(mnemonic##2_4S) { \ 3298 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ 3299 } 3300 3301 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ 3302 TEST(mnemonic##_4S) { \ 3303 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ 3304 } \ 3305 TEST(mnemonic##_2D) { \ 3306 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ 3307 } \ 3308 TEST(mnemonic##2_4S) { \ 3309 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ 3310 } \ 3311 TEST(mnemonic##2_2D) { \ 3312 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ 3313 } 3314 3315 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ 3316 TEST(mnemonic##_4H) { \ 3317 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ 3318 } \ 3319 TEST(mnemonic##_2S) { \ 3320 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3321 } \ 3322 TEST(mnemonic##2_8H) { \ 3323 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ 3324 } \ 3325 TEST(mnemonic##2_4S) { \ 3326 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3327 } 3328 3329 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ 3330 TEST(mnemonic##_2S) { \ 3331 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ 3332 } \ 3333 TEST(mnemonic##2_4S) { \ 3334 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ 3335 } 3336 3337 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ 3338 TEST(mnemonic##_B) { \ 3339 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ 3340 } \ 3341 TEST(mnemonic##_H) { \ 3342 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ 3343 } \ 3344 TEST(mnemonic##_S) { \ 3345 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ 3346 } 3347 3348 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ 3349 TEST(mnemonic##_S) { \ 3350 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ 3351 } \ 3352 TEST(mnemonic##_D) { \ 3353 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ 3354 } \ 3355 TEST(mnemonic##_H) { \ 3356 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \ 3357 } 3358 3359 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ 3360 { \ 3361 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3362 variant, \ 3363 variant, \ 3364 variant, \ 3365 input_d, \ 3366 input_nm, \ 3367 input_nm); \ 3368 } 3369 3370 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3371 TEST(mnemonic##_8B) { \ 3372 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3373 8B, \ 3374 kInput8bitsAccDestination, \ 3375 kInput8bits##input); \ 3376 } \ 3377 TEST(mnemonic##_16B) { \ 3378 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3379 16B, \ 3380 kInput8bitsAccDestination, \ 3381 kInput8bits##input); \ 3382 } 3383 3384 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ 3385 TEST(mnemonic##_4H) { \ 3386 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3387 4H, \ 3388 kInput16bitsAccDestination, \ 3389 kInput16bits##input); \ 3390 } \ 3391 TEST(mnemonic##_8H) { \ 3392 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3393 8H, \ 3394 kInput16bitsAccDestination, \ 3395 kInput16bits##input); \ 3396 } \ 3397 TEST(mnemonic##_2S) { \ 3398 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3399 2S, \ 3400 kInput32bitsAccDestination, \ 3401 kInput32bits##input); \ 3402 } \ 3403 TEST(mnemonic##_4S) { \ 3404 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3405 4S, \ 3406 kInput32bitsAccDestination, \ 3407 kInput32bits##input); \ 3408 } 3409 3410 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3411 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ 3412 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) 3413 3414 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ 3415 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ 3416 TEST(mnemonic##_2D) { \ 3417 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3418 2D, \ 3419 kInput64bitsAccDestination, \ 3420 kInput64bits##input); \ 3421 } 3422 3423 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ 3424 TEST(mnemonic##_4H) { \ 3425 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3426 4H, \ 3427 kInputFloat16AccDestination, \ 3428 kInputFloat16##input); \ 3429 } \ 3430 TEST(mnemonic##_8H) { \ 3431 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3432 8H, \ 3433 kInputFloat16AccDestination, \ 3434 kInputFloat16##input); \ 3435 } \ 3436 TEST(mnemonic##_2S) { \ 3437 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3438 2S, \ 3439 kInputFloatAccDestination, \ 3440 kInputFloat##input); \ 3441 } \ 3442 TEST(mnemonic##_4S) { \ 3443 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3444 4S, \ 3445 kInputFloatAccDestination, \ 3446 kInputFloat##input); \ 3447 } \ 3448 TEST(mnemonic##_2D) { \ 3449 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3450 2D, \ 3451 kInputDoubleAccDestination, \ 3452 kInputDouble##input); \ 3453 } 3454 3455 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ 3456 TEST(mnemonic##_D) { \ 3457 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3458 D, \ 3459 kInput64bitsAccDestination, \ 3460 kInput64bits##input); \ 3461 } 3462 3463 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ 3464 TEST(mnemonic##_H) { \ 3465 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3466 H, \ 3467 kInput16bitsAccDestination, \ 3468 kInput16bits##input); \ 3469 } \ 3470 TEST(mnemonic##_S) { \ 3471 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3472 S, \ 3473 kInput32bitsAccDestination, \ 3474 kInput32bits##input); \ 3475 } 3476 3477 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ 3478 TEST(mnemonic##_B) { \ 3479 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3480 B, \ 3481 kInput8bitsAccDestination, \ 3482 kInput8bits##input); \ 3483 } \ 3484 TEST(mnemonic##_H) { \ 3485 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3486 H, \ 3487 kInput16bitsAccDestination, \ 3488 kInput16bits##input); \ 3489 } \ 3490 TEST(mnemonic##_S) { \ 3491 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3492 S, \ 3493 kInput32bitsAccDestination, \ 3494 kInput32bits##input); \ 3495 } \ 3496 TEST(mnemonic##_D) { \ 3497 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3498 D, \ 3499 kInput64bitsAccDestination, \ 3500 kInput64bits##input); \ 3501 } 3502 3503 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ 3504 TEST(mnemonic##_H) { \ 3505 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3506 H, \ 3507 kInputFloat16AccDestination, \ 3508 kInputFloat16##input); \ 3509 } \ 3510 TEST(mnemonic##_S) { \ 3511 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3512 S, \ 3513 kInputFloatAccDestination, \ 3514 kInputFloat##input); \ 3515 } \ 3516 TEST(mnemonic##_D) { \ 3517 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \ 3518 D, \ 3519 kInputDoubleAccDestination, \ 3520 kInputDouble##input); \ 3521 } 3522 3523 #define CALL_TEST_NEON_HELPER_3DIFF( \ 3524 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \ 3525 { \ 3526 CALL_TEST_NEON_HELPER_2Op(mnemonic, \ 3527 vdform, \ 3528 vnform, \ 3529 vmform, \ 3530 input_d, \ 3531 input_n, \ 3532 input_m); \ 3533 } 3534 3535 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3536 TEST(mnemonic##_8H) { \ 3537 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3538 8H, \ 3539 8B, \ 3540 8B, \ 3541 kInput16bitsAccDestination, \ 3542 kInput8bits##input, \ 3543 kInput8bits##input); \ 3544 } \ 3545 TEST(mnemonic##2_8H) { \ 3546 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3547 8H, \ 3548 16B, \ 3549 16B, \ 3550 kInput16bitsAccDestination, \ 3551 kInput8bits##input, \ 3552 kInput8bits##input); \ 3553 } 3554 3555 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3556 TEST(mnemonic##_4S) { \ 3557 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3558 4S, \ 3559 4H, \ 3560 4H, \ 3561 kInput32bitsAccDestination, \ 3562 kInput16bits##input, \ 3563 kInput16bits##input); \ 3564 } \ 3565 TEST(mnemonic##2_4S) { \ 3566 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3567 4S, \ 3568 8H, \ 3569 8H, \ 3570 kInput32bitsAccDestination, \ 3571 kInput16bits##input, \ 3572 kInput16bits##input); \ 3573 } 3574 3575 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ 3576 TEST(mnemonic##_2D) { \ 3577 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3578 2D, \ 3579 2S, \ 3580 2S, \ 3581 kInput64bitsAccDestination, \ 3582 kInput32bits##input, \ 3583 kInput32bits##input); \ 3584 } \ 3585 TEST(mnemonic##2_2D) { \ 3586 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3587 2D, \ 3588 4S, \ 3589 4S, \ 3590 kInput64bitsAccDestination, \ 3591 kInput32bits##input, \ 3592 kInput32bits##input); \ 3593 } 3594 3595 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ 3596 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3597 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3598 3599 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ 3600 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ 3601 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ 3602 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) 3603 3604 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3605 TEST(mnemonic##_S) { \ 3606 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3607 S, \ 3608 H, \ 3609 H, \ 3610 kInput32bitsAccDestination, \ 3611 kInput16bits##input, \ 3612 kInput16bits##input); \ 3613 } 3614 3615 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ 3616 TEST(mnemonic##_D) { \ 3617 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3618 D, \ 3619 S, \ 3620 S, \ 3621 kInput64bitsAccDestination, \ 3622 kInput32bits##input, \ 3623 kInput32bits##input); \ 3624 } 3625 3626 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ 3627 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ 3628 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) 3629 3630 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ 3631 TEST(mnemonic##_8H) { \ 3632 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3633 8H, \ 3634 8H, \ 3635 8B, \ 3636 kInput16bitsAccDestination, \ 3637 kInput16bits##input, \ 3638 kInput8bits##input); \ 3639 } \ 3640 TEST(mnemonic##_4S) { \ 3641 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3642 4S, \ 3643 4S, \ 3644 4H, \ 3645 kInput32bitsAccDestination, \ 3646 kInput32bits##input, \ 3647 kInput16bits##input); \ 3648 } \ 3649 TEST(mnemonic##_2D) { \ 3650 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3651 2D, \ 3652 2D, \ 3653 2S, \ 3654 kInput64bitsAccDestination, \ 3655 kInput64bits##input, \ 3656 kInput32bits##input); \ 3657 } \ 3658 TEST(mnemonic##2_8H) { \ 3659 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3660 8H, \ 3661 8H, \ 3662 16B, \ 3663 kInput16bitsAccDestination, \ 3664 kInput16bits##input, \ 3665 kInput8bits##input); \ 3666 } \ 3667 TEST(mnemonic##2_4S) { \ 3668 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3669 4S, \ 3670 4S, \ 3671 8H, \ 3672 kInput32bitsAccDestination, \ 3673 kInput32bits##input, \ 3674 kInput16bits##input); \ 3675 } \ 3676 TEST(mnemonic##2_2D) { \ 3677 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3678 2D, \ 3679 2D, \ 3680 4S, \ 3681 kInput64bitsAccDestination, \ 3682 kInput64bits##input, \ 3683 kInput32bits##input); \ 3684 } 3685 3686 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ 3687 TEST(mnemonic##_8B) { \ 3688 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3689 8B, \ 3690 8H, \ 3691 8H, \ 3692 kInput8bitsAccDestination, \ 3693 kInput16bits##input, \ 3694 kInput16bits##input); \ 3695 } \ 3696 TEST(mnemonic##_4H) { \ 3697 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3698 4H, \ 3699 4S, \ 3700 4S, \ 3701 kInput16bitsAccDestination, \ 3702 kInput32bits##input, \ 3703 kInput32bits##input); \ 3704 } \ 3705 TEST(mnemonic##_2S) { \ 3706 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3707 2S, \ 3708 2D, \ 3709 2D, \ 3710 kInput32bitsAccDestination, \ 3711 kInput64bits##input, \ 3712 kInput64bits##input); \ 3713 } \ 3714 TEST(mnemonic##2_16B) { \ 3715 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3716 16B, \ 3717 8H, \ 3718 8H, \ 3719 kInput8bitsAccDestination, \ 3720 kInput16bits##input, \ 3721 kInput16bits##input); \ 3722 } \ 3723 TEST(mnemonic##2_8H) { \ 3724 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3725 8H, \ 3726 4S, \ 3727 4S, \ 3728 kInput16bitsAccDestination, \ 3729 kInput32bits##input, \ 3730 kInput32bits##input); \ 3731 } \ 3732 TEST(mnemonic##2_4S) { \ 3733 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \ 3734 4S, \ 3735 2D, \ 3736 2D, \ 3737 kInput32bitsAccDestination, \ 3738 kInput64bits##input, \ 3739 kInput64bits##input); \ 3740 } 3741 3742 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \ 3743 TEST(mnemonic##_2S) { \ 3744 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3745 2S, \ 3746 8B, \ 3747 8B, \ 3748 kInput32bitsAccDestination, \ 3749 kInput8bits##input, \ 3750 kInput8bits##input); \ 3751 } \ 3752 TEST(mnemonic##_4S) { \ 3753 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \ 3754 4S, \ 3755 16B, \ 3756 16B, \ 3757 kInput32bitsAccDestination, \ 3758 kInput8bits##input, \ 3759 kInput8bits##input); \ 3760 } 3761 3762 3763 #define CALL_TEST_NEON_HELPER_2OPIMM( \ 3764 mnemonic, vdform, vnform, input_n, input_imm) \ 3765 { \ 3766 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \ 3767 vdform, \ 3768 vnform, \ 3769 input_n, \ 3770 input_imm); \ 3771 } 3772 3773 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ 3774 TEST(mnemonic##_8B_2OPIMM) { \ 3775 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3776 8B, \ 3777 8B, \ 3778 kInput8bits##input, \ 3779 kInput8bitsImm##input_imm); \ 3780 } \ 3781 TEST(mnemonic##_16B_2OPIMM) { \ 3782 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3783 16B, \ 3784 16B, \ 3785 kInput8bits##input, \ 3786 kInput8bitsImm##input_imm); \ 3787 } \ 3788 TEST(mnemonic##_4H_2OPIMM) { \ 3789 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3790 4H, \ 3791 4H, \ 3792 kInput16bits##input, \ 3793 kInput16bitsImm##input_imm); \ 3794 } \ 3795 TEST(mnemonic##_8H_2OPIMM) { \ 3796 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3797 8H, \ 3798 8H, \ 3799 kInput16bits##input, \ 3800 kInput16bitsImm##input_imm); \ 3801 } \ 3802 TEST(mnemonic##_2S_2OPIMM) { \ 3803 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3804 2S, \ 3805 2S, \ 3806 kInput32bits##input, \ 3807 kInput32bitsImm##input_imm); \ 3808 } \ 3809 TEST(mnemonic##_4S_2OPIMM) { \ 3810 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3811 4S, \ 3812 4S, \ 3813 kInput32bits##input, \ 3814 kInput32bitsImm##input_imm); \ 3815 } \ 3816 TEST(mnemonic##_2D_2OPIMM) { \ 3817 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3818 2D, \ 3819 2D, \ 3820 kInput64bits##input, \ 3821 kInput64bitsImm##input_imm); \ 3822 } 3823 3824 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ 3825 TEST(mnemonic##_8B_2OPIMM) { \ 3826 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3827 8B, \ 3828 B, \ 3829 kInput8bits##input, \ 3830 kInput8bitsImm##input_imm); \ 3831 } \ 3832 TEST(mnemonic##_16B_2OPIMM) { \ 3833 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3834 16B, \ 3835 B, \ 3836 kInput8bits##input, \ 3837 kInput8bitsImm##input_imm); \ 3838 } \ 3839 TEST(mnemonic##_4H_2OPIMM) { \ 3840 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3841 4H, \ 3842 H, \ 3843 kInput16bits##input, \ 3844 kInput16bitsImm##input_imm); \ 3845 } \ 3846 TEST(mnemonic##_8H_2OPIMM) { \ 3847 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3848 8H, \ 3849 H, \ 3850 kInput16bits##input, \ 3851 kInput16bitsImm##input_imm); \ 3852 } \ 3853 TEST(mnemonic##_2S_2OPIMM) { \ 3854 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3855 2S, \ 3856 S, \ 3857 kInput32bits##input, \ 3858 kInput32bitsImm##input_imm); \ 3859 } \ 3860 TEST(mnemonic##_4S_2OPIMM) { \ 3861 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3862 4S, \ 3863 S, \ 3864 kInput32bits##input, \ 3865 kInput32bitsImm##input_imm); \ 3866 } \ 3867 TEST(mnemonic##_2D_2OPIMM) { \ 3868 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3869 2D, \ 3870 D, \ 3871 kInput64bits##input, \ 3872 kInput64bitsImm##input_imm); \ 3873 } 3874 3875 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ 3876 TEST(mnemonic##_8B_2OPIMM) { \ 3877 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3878 8B, \ 3879 8H, \ 3880 kInput16bits##input, \ 3881 kInput8bitsImm##input_imm); \ 3882 } \ 3883 TEST(mnemonic##_4H_2OPIMM) { \ 3884 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3885 4H, \ 3886 4S, \ 3887 kInput32bits##input, \ 3888 kInput16bitsImm##input_imm); \ 3889 } \ 3890 TEST(mnemonic##_2S_2OPIMM) { \ 3891 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3892 2S, \ 3893 2D, \ 3894 kInput64bits##input, \ 3895 kInput32bitsImm##input_imm); \ 3896 } \ 3897 TEST(mnemonic##2_16B_2OPIMM) { \ 3898 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3899 16B, \ 3900 8H, \ 3901 kInput16bits##input, \ 3902 kInput8bitsImm##input_imm); \ 3903 } \ 3904 TEST(mnemonic##2_8H_2OPIMM) { \ 3905 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3906 8H, \ 3907 4S, \ 3908 kInput32bits##input, \ 3909 kInput16bitsImm##input_imm); \ 3910 } \ 3911 TEST(mnemonic##2_4S_2OPIMM) { \ 3912 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 3913 4S, \ 3914 2D, \ 3915 kInput64bits##input, \ 3916 kInput32bitsImm##input_imm); \ 3917 } 3918 3919 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ 3920 TEST(mnemonic##_B_2OPIMM) { \ 3921 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3922 B, \ 3923 H, \ 3924 kInput16bits##input, \ 3925 kInput8bitsImm##input_imm); \ 3926 } \ 3927 TEST(mnemonic##_H_2OPIMM) { \ 3928 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3929 H, \ 3930 S, \ 3931 kInput32bits##input, \ 3932 kInput16bitsImm##input_imm); \ 3933 } \ 3934 TEST(mnemonic##_S_2OPIMM) { \ 3935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3936 S, \ 3937 D, \ 3938 kInput64bits##input, \ 3939 kInput32bitsImm##input_imm); \ 3940 } 3941 3942 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ 3943 TEST(mnemonic##_4H_2OPIMM) { \ 3944 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3945 4H, \ 3946 4H, \ 3947 kInputFloat16##input, \ 3948 kInputDoubleImm##input_imm); \ 3949 } \ 3950 TEST(mnemonic##_8H_2OPIMM) { \ 3951 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3952 8H, \ 3953 8H, \ 3954 kInputFloat16##input, \ 3955 kInputDoubleImm##input_imm); \ 3956 } \ 3957 TEST(mnemonic##_2S_2OPIMM) { \ 3958 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3959 2S, \ 3960 2S, \ 3961 kInputFloat##Basic, \ 3962 kInputDoubleImm##input_imm) \ 3963 } \ 3964 TEST(mnemonic##_4S_2OPIMM) { \ 3965 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3966 4S, \ 3967 4S, \ 3968 kInputFloat##input, \ 3969 kInputDoubleImm##input_imm); \ 3970 } \ 3971 TEST(mnemonic##_2D_2OPIMM) { \ 3972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3973 2D, \ 3974 2D, \ 3975 kInputDouble##input, \ 3976 kInputDoubleImm##input_imm); \ 3977 } 3978 3979 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ 3980 TEST(mnemonic##_4H_2OPIMM) { \ 3981 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3982 4H, \ 3983 4H, \ 3984 kInputFloat16##input, \ 3985 kInput16bitsImm##input_imm) \ 3986 } \ 3987 TEST(mnemonic##_8H_2OPIMM) { \ 3988 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3989 8H, \ 3990 8H, \ 3991 kInputFloat16##input, \ 3992 kInput16bitsImm##input_imm) \ 3993 } \ 3994 TEST(mnemonic##_2S_2OPIMM) { \ 3995 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 3996 2S, \ 3997 2S, \ 3998 kInputFloat##Basic, \ 3999 kInput32bitsImm##input_imm) \ 4000 } \ 4001 TEST(mnemonic##_4S_2OPIMM) { \ 4002 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4003 4S, \ 4004 4S, \ 4005 kInputFloat##input, \ 4006 kInput32bitsImm##input_imm) \ 4007 } \ 4008 TEST(mnemonic##_2D_2OPIMM) { \ 4009 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4010 2D, \ 4011 2D, \ 4012 kInputDouble##input, \ 4013 kInput64bitsImm##input_imm) \ 4014 } 4015 4016 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ 4017 TEST(mnemonic##_H_2OPIMM) { \ 4018 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4019 H, \ 4020 H, \ 4021 kInputFloat16##Basic, \ 4022 kInput16bitsImm##input_imm) \ 4023 } \ 4024 TEST(mnemonic##_S_2OPIMM) { \ 4025 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4026 S, \ 4027 S, \ 4028 kInputFloat##Basic, \ 4029 kInput32bitsImm##input_imm) \ 4030 } \ 4031 TEST(mnemonic##_D_2OPIMM) { \ 4032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4033 D, \ 4034 D, \ 4035 kInputDouble##input, \ 4036 kInput64bitsImm##input_imm) \ 4037 } 4038 4039 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \ 4040 TEST(mnemonic##_4H_2OPIMM) { \ 4041 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4042 4H, \ 4043 4H, \ 4044 kInput16bits##input, \ 4045 kInput16bitsImm##input_imm); \ 4046 } \ 4047 TEST(mnemonic##_8H_2OPIMM) { \ 4048 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4049 8H, \ 4050 8H, \ 4051 kInput16bits##input, \ 4052 kInput16bitsImm##input_imm); \ 4053 } \ 4054 TEST(mnemonic##_2S_2OPIMM) { \ 4055 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4056 2S, \ 4057 2S, \ 4058 kInput32bits##input, \ 4059 kInput32bitsImm##input_imm); \ 4060 } \ 4061 TEST(mnemonic##_4S_2OPIMM) { \ 4062 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4063 4S, \ 4064 4S, \ 4065 kInput32bits##input, \ 4066 kInput32bitsImm##input_imm); \ 4067 } \ 4068 TEST(mnemonic##_2D_2OPIMM) { \ 4069 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4070 2D, \ 4071 2D, \ 4072 kInput64bits##input, \ 4073 kInput64bitsImm##input_imm); \ 4074 } 4075 4076 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ 4077 TEST(mnemonic##_D_2OPIMM) { \ 4078 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4079 D, \ 4080 D, \ 4081 kInput64bits##input, \ 4082 kInput64bitsImm##input_imm); \ 4083 } 4084 4085 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \ 4086 TEST(mnemonic##_H_2OPIMM) { \ 4087 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4088 H, \ 4089 H, \ 4090 kInput16bits##input, \ 4091 kInput16bitsImm##input_imm); \ 4092 } \ 4093 TEST(mnemonic##_S_2OPIMM) { \ 4094 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4095 S, \ 4096 S, \ 4097 kInput32bits##input, \ 4098 kInput32bitsImm##input_imm); \ 4099 } \ 4100 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) 4101 4102 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ 4103 TEST(mnemonic##_D_2OPIMM) { \ 4104 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4105 D, \ 4106 D, \ 4107 kInputDouble##input, \ 4108 kInputDoubleImm##input_imm); \ 4109 } 4110 4111 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \ 4112 TEST(mnemonic##_H_2OPIMM) { \ 4113 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4114 H, \ 4115 H, \ 4116 kInputFloat16##input, \ 4117 kInputDoubleImm##input_imm); \ 4118 } \ 4119 TEST(mnemonic##_S_2OPIMM) { \ 4120 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4121 S, \ 4122 S, \ 4123 kInputFloat##input, \ 4124 kInputDoubleImm##input_imm); \ 4125 } \ 4126 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) 4127 4128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ 4129 TEST(mnemonic##_B_2OPIMM) { \ 4130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4131 B, \ 4132 B, \ 4133 kInput8bits##input, \ 4134 kInput8bitsImm##input_imm); \ 4135 } \ 4136 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) 4137 4138 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ 4139 TEST(mnemonic##_8H_2OPIMM) { \ 4140 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4141 8H, \ 4142 8B, \ 4143 kInput8bits##input, \ 4144 kInput8bitsImm##input_imm); \ 4145 } \ 4146 TEST(mnemonic##_4S_2OPIMM) { \ 4147 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4148 4S, \ 4149 4H, \ 4150 kInput16bits##input, \ 4151 kInput16bitsImm##input_imm); \ 4152 } \ 4153 TEST(mnemonic##_2D_2OPIMM) { \ 4154 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \ 4155 2D, \ 4156 2S, \ 4157 kInput32bits##input, \ 4158 kInput32bitsImm##input_imm); \ 4159 } \ 4160 TEST(mnemonic##2_8H_2OPIMM) { \ 4161 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4162 8H, \ 4163 16B, \ 4164 kInput8bits##input, \ 4165 kInput8bitsImm##input_imm); \ 4166 } \ 4167 TEST(mnemonic##2_4S_2OPIMM) { \ 4168 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4169 4S, \ 4170 8H, \ 4171 kInput16bits##input, \ 4172 kInput16bitsImm##input_imm); \ 4173 } \ 4174 TEST(mnemonic##2_2D_2OPIMM) { \ 4175 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \ 4176 2D, \ 4177 4S, \ 4178 kInput32bits##input, \ 4179 kInput32bitsImm##input_imm); \ 4180 } 4181 4182 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4183 vdform, \ 4184 vnform, \ 4185 vmform, \ 4186 input_d, \ 4187 input_n, \ 4188 input_m, \ 4189 indices, \ 4190 vm_subvector_count) \ 4191 { \ 4192 CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \ 4193 vdform, \ 4194 vnform, \ 4195 vmform, \ 4196 input_d, \ 4197 input_n, \ 4198 input_m, \ 4199 indices, \ 4200 vm_subvector_count); \ 4201 } 4202 4203 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT( \ 4204 mnemonic, input_d, input_n, input_m) \ 4205 TEST(mnemonic##_2S_8B_B) { \ 4206 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4207 2S, \ 4208 8B, \ 4209 B, \ 4210 kInput32bits##input_d, \ 4211 kInput8bits##input_n, \ 4212 kInput8bits##input_m, \ 4213 kInputSIndices, \ 4214 4); \ 4215 } \ 4216 TEST(mnemonic##_4S_16B_B) { \ 4217 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \ 4218 4S, \ 4219 16B, \ 4220 B, \ 4221 kInput32bits##input_d, \ 4222 kInput8bits##input_n, \ 4223 kInput8bits##input_m, \ 4224 kInputSIndices, \ 4225 4); \ 4226 } 4227 4228 #define CALL_TEST_NEON_HELPER_BYELEMENT( \ 4229 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \ 4230 { \ 4231 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \ 4232 vdform, \ 4233 vnform, \ 4234 vmform, \ 4235 input_d, \ 4236 input_n, \ 4237 input_m, \ 4238 indices); \ 4239 } 4240 4241 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 4242 TEST(mnemonic##_4H_4H_H) { \ 4243 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4244 4H, \ 4245 4H, \ 4246 H, \ 4247 kInput16bits##input_d, \ 4248 kInput16bits##input_n, \ 4249 kInput16bits##input_m, \ 4250 kInputHIndices); \ 4251 } \ 4252 TEST(mnemonic##_8H_8H_H) { \ 4253 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4254 8H, \ 4255 8H, \ 4256 H, \ 4257 kInput16bits##input_d, \ 4258 kInput16bits##input_n, \ 4259 kInput16bits##input_m, \ 4260 kInputHIndices); \ 4261 } \ 4262 TEST(mnemonic##_2S_2S_S) { \ 4263 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4264 2S, \ 4265 2S, \ 4266 S, \ 4267 kInput32bits##input_d, \ 4268 kInput32bits##input_n, \ 4269 kInput32bits##input_m, \ 4270 kInputSIndices); \ 4271 } \ 4272 TEST(mnemonic##_4S_4S_S) { \ 4273 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4274 4S, \ 4275 4S, \ 4276 S, \ 4277 kInput32bits##input_d, \ 4278 kInput32bits##input_n, \ 4279 kInput32bits##input_m, \ 4280 kInputSIndices); \ 4281 } 4282 4283 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ 4284 TEST(mnemonic##_H_H_H) { \ 4285 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4286 H, \ 4287 H, \ 4288 H, \ 4289 kInput16bits##input_d, \ 4290 kInput16bits##input_n, \ 4291 kInput16bits##input_m, \ 4292 kInputHIndices); \ 4293 } \ 4294 TEST(mnemonic##_S_S_S) { \ 4295 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4296 S, \ 4297 S, \ 4298 S, \ 4299 kInput32bits##input_d, \ 4300 kInput32bits##input_n, \ 4301 kInput32bits##input_m, \ 4302 kInputSIndices); \ 4303 } 4304 4305 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ 4306 TEST(mnemonic##_4H_4H_H) { \ 4307 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4308 4H, \ 4309 4H, \ 4310 H, \ 4311 kInputFloat16##input_d, \ 4312 kInputFloat16##input_n, \ 4313 kInputFloat16##input_m, \ 4314 kInputHIndices); \ 4315 } \ 4316 TEST(mnemonic##_8H_8H_H) { \ 4317 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4318 8H, \ 4319 8H, \ 4320 H, \ 4321 kInputFloat16##input_d, \ 4322 kInputFloat16##input_n, \ 4323 kInputFloat16##input_m, \ 4324 kInputHIndices); \ 4325 } \ 4326 TEST(mnemonic##_2S_2S_S) { \ 4327 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4328 2S, \ 4329 2S, \ 4330 S, \ 4331 kInputFloat##input_d, \ 4332 kInputFloat##input_n, \ 4333 kInputFloat##input_m, \ 4334 kInputSIndices); \ 4335 } \ 4336 TEST(mnemonic##_4S_4S_S) { \ 4337 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4338 4S, \ 4339 4S, \ 4340 S, \ 4341 kInputFloat##input_d, \ 4342 kInputFloat##input_n, \ 4343 kInputFloat##input_m, \ 4344 kInputSIndices); \ 4345 } \ 4346 TEST(mnemonic##_2D_2D_D) { \ 4347 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4348 2D, \ 4349 2D, \ 4350 D, \ 4351 kInputDouble##input_d, \ 4352 kInputDouble##input_n, \ 4353 kInputDouble##input_m, \ 4354 kInputDIndices); \ 4355 } 4356 4357 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ 4358 TEST(mnemonic##_H_H_H) { \ 4359 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4360 H, \ 4361 H, \ 4362 H, \ 4363 kInputFloat16##inp_d, \ 4364 kInputFloat16##inp_n, \ 4365 kInputFloat16##inp_m, \ 4366 kInputHIndices); \ 4367 } \ 4368 TEST(mnemonic##_S_S_S) { \ 4369 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4370 S, \ 4371 S, \ 4372 S, \ 4373 kInputFloat##inp_d, \ 4374 kInputFloat##inp_n, \ 4375 kInputFloat##inp_m, \ 4376 kInputSIndices); \ 4377 } \ 4378 TEST(mnemonic##_D_D_D) { \ 4379 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4380 D, \ 4381 D, \ 4382 D, \ 4383 kInputDouble##inp_d, \ 4384 kInputDouble##inp_n, \ 4385 kInputDouble##inp_m, \ 4386 kInputDIndices); \ 4387 } 4388 4389 4390 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ 4391 TEST(mnemonic##_4S_4H_H) { \ 4392 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4393 4S, \ 4394 4H, \ 4395 H, \ 4396 kInput32bits##input_d, \ 4397 kInput16bits##input_n, \ 4398 kInput16bits##input_m, \ 4399 kInputHIndices); \ 4400 } \ 4401 TEST(mnemonic##2_4S_8H_H) { \ 4402 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 4403 4S, \ 4404 8H, \ 4405 H, \ 4406 kInput32bits##input_d, \ 4407 kInput16bits##input_n, \ 4408 kInput16bits##input_m, \ 4409 kInputHIndices); \ 4410 } \ 4411 TEST(mnemonic##_2D_2S_S) { \ 4412 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4413 2D, \ 4414 2S, \ 4415 S, \ 4416 kInput64bits##input_d, \ 4417 kInput32bits##input_n, \ 4418 kInput32bits##input_m, \ 4419 kInputSIndices); \ 4420 } \ 4421 TEST(mnemonic##2_2D_4S_S) { \ 4422 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \ 4423 2D, \ 4424 4S, \ 4425 S, \ 4426 kInput64bits##input_d, \ 4427 kInput32bits##input_n, \ 4428 kInput32bits##input_m, \ 4429 kInputSIndices); \ 4430 } 4431 4432 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR( \ 4433 mnemonic, input_d, input_n, input_m) \ 4434 TEST(mnemonic##_S_H_H) { \ 4435 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4436 S, \ 4437 H, \ 4438 H, \ 4439 kInput32bits##input_d, \ 4440 kInput16bits##input_n, \ 4441 kInput16bits##input_m, \ 4442 kInputHIndices); \ 4443 } \ 4444 TEST(mnemonic##_D_S_S) { \ 4445 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \ 4446 D, \ 4447 S, \ 4448 S, \ 4449 kInput64bits##input_d, \ 4450 kInput32bits##input_n, \ 4451 kInput32bits##input_m, \ 4452 kInputSIndices); \ 4453 } 4454 4455 4456 #define CALL_TEST_NEON_HELPER_2OP2IMM( \ 4457 mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \ 4458 { \ 4459 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \ 4460 mnemonic, \ 4461 variant, \ 4462 variant, \ 4463 input_d, \ 4464 input_imm1, \ 4465 input_n, \ 4466 input_imm2); \ 4467 } 4468 4469 #define DEFINE_TEST_NEON_2OP2IMM( \ 4470 mnemonic, input_d, input_imm1, input_n, input_imm2) \ 4471 TEST(mnemonic##_B) { \ 4472 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4473 16B, \ 4474 kInput8bits##input_d, \ 4475 kInput8bitsImm##input_imm1, \ 4476 kInput8bits##input_n, \ 4477 kInput8bitsImm##input_imm2); \ 4478 } \ 4479 TEST(mnemonic##_H) { \ 4480 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4481 8H, \ 4482 kInput16bits##input_d, \ 4483 kInput16bitsImm##input_imm1, \ 4484 kInput16bits##input_n, \ 4485 kInput16bitsImm##input_imm2); \ 4486 } \ 4487 TEST(mnemonic##_S) { \ 4488 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4489 4S, \ 4490 kInput32bits##input_d, \ 4491 kInput32bitsImm##input_imm1, \ 4492 kInput32bits##input_n, \ 4493 kInput32bitsImm##input_imm2); \ 4494 } \ 4495 TEST(mnemonic##_D) { \ 4496 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \ 4497 2D, \ 4498 kInput64bits##input_d, \ 4499 kInput64bitsImm##input_imm1, \ 4500 kInput64bits##input_n, \ 4501 kInput64bitsImm##input_imm2); \ 4502 } 4503 4504 4505 // Advanced SIMD copy. 4506 DEFINE_TEST_NEON_2OP2IMM( 4507 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero) 4508 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) 4509 4510 4511 // Advanced SIMD scalar copy. 4512 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) 4513 4514 4515 // Advanced SIMD three same. 4516 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) 4517 DEFINE_TEST_NEON_3SAME(sqadd, Basic) 4518 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) 4519 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) 4520 DEFINE_TEST_NEON_3SAME(sqsub, Basic) 4521 DEFINE_TEST_NEON_3SAME(cmgt, Basic) 4522 DEFINE_TEST_NEON_3SAME(cmge, Basic) 4523 DEFINE_TEST_NEON_3SAME(sshl, Basic) 4524 DEFINE_TEST_NEON_3SAME(sqshl, Basic) 4525 DEFINE_TEST_NEON_3SAME(srshl, Basic) 4526 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) 4527 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) 4528 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) 4529 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) 4530 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) 4531 DEFINE_TEST_NEON_3SAME(add, Basic) 4532 DEFINE_TEST_NEON_3SAME(cmtst, Basic) 4533 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) 4534 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) 4535 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) 4536 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) 4537 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) 4538 DEFINE_TEST_NEON_3SAME(addp, Basic) 4539 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) 4540 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) 4541 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) 4542 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) 4543 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) 4544 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) 4545 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) 4546 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) 4547 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) 4548 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) 4549 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) 4550 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) 4551 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) 4552 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) 4553 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) 4554 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) 4555 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) 4556 DEFINE_TEST_NEON_3SAME(uqadd, Basic) 4557 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) 4558 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) 4559 DEFINE_TEST_NEON_3SAME(uqsub, Basic) 4560 DEFINE_TEST_NEON_3SAME(cmhi, Basic) 4561 DEFINE_TEST_NEON_3SAME(cmhs, Basic) 4562 DEFINE_TEST_NEON_3SAME(ushl, Basic) 4563 DEFINE_TEST_NEON_3SAME(uqshl, Basic) 4564 DEFINE_TEST_NEON_3SAME(urshl, Basic) 4565 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) 4566 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) 4567 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) 4568 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) 4569 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) 4570 DEFINE_TEST_NEON_3SAME(sub, Basic) 4571 DEFINE_TEST_NEON_3SAME(cmeq, Basic) 4572 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) 4573 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) 4574 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) 4575 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) 4576 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) 4577 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic) 4578 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic) 4579 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic) 4580 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic) 4581 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) 4582 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) 4583 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) 4584 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) 4585 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) 4586 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) 4587 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) 4588 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) 4589 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) 4590 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) 4591 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) 4592 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) 4593 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) 4594 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) 4595 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) 4596 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) 4597 4598 4599 // Advanced SIMD scalar three same. 4600 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) 4601 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) 4602 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) 4603 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) 4604 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) 4605 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) 4606 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) 4607 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) 4608 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) 4609 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) 4610 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) 4611 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) 4612 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) 4613 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) 4614 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) 4615 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) 4616 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) 4617 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) 4618 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) 4619 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) 4620 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) 4621 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) 4622 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) 4623 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) 4624 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) 4625 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) 4626 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic) 4627 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic) 4628 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) 4629 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) 4630 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) 4631 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) 4632 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) 4633 4634 4635 // Advanced SIMD three different. 4636 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) 4637 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) 4638 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) 4639 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) 4640 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) 4641 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) 4642 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) 4643 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) 4644 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) 4645 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) 4646 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) 4647 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) 4648 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) 4649 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) 4650 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) 4651 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) 4652 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) 4653 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) 4654 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) 4655 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) 4656 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) 4657 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) 4658 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) 4659 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) 4660 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) 4661 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) 4662 4663 4664 // Advanced SIMD scalar three different. 4665 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) 4666 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) 4667 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) 4668 4669 4670 // Advanced SIMD scalar pairwise. 4671 TEST(addp_SCALAR) { 4672 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); 4673 } 4674 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) 4675 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) 4676 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) 4677 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) 4678 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) 4679 4680 4681 // Advanced SIMD shift by immediate. 4682 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) 4683 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) 4684 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) 4685 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) 4686 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) 4687 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) 4688 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) 4689 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) 4690 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) 4691 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) 4692 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) 4693 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf, 4694 FixedPointConversions, 4695 TypeWidthFromZeroToWidth) 4696 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4697 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) 4698 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) 4699 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) 4700 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) 4701 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) 4702 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) 4703 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) 4704 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) 4705 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) 4706 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) 4707 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) 4708 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) 4709 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) 4710 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf, 4711 FixedPointConversions, 4712 TypeWidthFromZeroToWidth) 4713 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4714 4715 4716 // Advanced SIMD scalar shift by immediate.. 4717 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) 4718 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) 4719 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) 4720 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) 4721 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) 4722 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) 4723 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) 4724 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) 4725 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf, 4726 FixedPointConversions, 4727 TypeWidthFromZeroToWidth) 4728 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) 4729 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) 4730 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) 4731 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) 4732 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) 4733 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) 4734 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) 4735 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) 4736 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) 4737 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) 4738 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) 4739 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) 4740 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) 4741 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf, 4742 FixedPointConversions, 4743 TypeWidthFromZeroToWidth) 4744 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) 4745 4746 4747 // Advanced SIMD two-register miscellaneous. 4748 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) 4749 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) 4750 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) 4751 DEFINE_TEST_NEON_2SAME(suqadd, Basic) 4752 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) 4753 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) 4754 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) 4755 DEFINE_TEST_NEON_2SAME(sqabs, Basic) 4756 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) 4757 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) 4758 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) 4759 DEFINE_TEST_NEON_2SAME(abs, Basic) 4760 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) 4761 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) 4762 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) 4763 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) 4764 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions) 4765 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions) 4766 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions) 4767 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions) 4768 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions) 4769 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4770 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) 4771 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) 4772 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) 4773 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic) 4774 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions) 4775 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions) 4776 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions) 4777 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4778 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) 4779 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic) 4780 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) 4781 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) 4782 DEFINE_TEST_NEON_2SAME(usqadd, Basic) 4783 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) 4784 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) 4785 DEFINE_TEST_NEON_2SAME(sqneg, Basic) 4786 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) 4787 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) 4788 DEFINE_TEST_NEON_2SAME(neg, Basic) 4789 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) 4790 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) 4791 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) 4792 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) 4793 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions) 4794 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions) 4795 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions) 4796 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions) 4797 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions) 4798 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4799 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) 4800 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) 4801 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) 4802 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) 4803 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic) 4804 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions) 4805 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions) 4806 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4807 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) 4808 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic) 4809 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic) 4810 4811 4812 // Advanced SIMD scalar two-register miscellaneous. 4813 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) 4814 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) 4815 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) 4816 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) 4817 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) 4818 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) 4819 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) 4820 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions) 4821 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions) 4822 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions) 4823 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. 4824 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero) 4825 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero) 4826 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero) 4827 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions) 4828 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. 4829 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic) 4830 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic) 4831 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) 4832 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) 4833 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) 4834 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) 4835 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) 4836 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) 4837 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) 4838 TEST(fcvtxn_SCALAR) { 4839 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); 4840 } 4841 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions) 4842 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions) 4843 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions) 4844 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. 4845 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero) 4846 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero) 4847 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions) 4848 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. 4849 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic) 4850 4851 4852 // Advanced SIMD across lanes. 4853 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) 4854 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) 4855 DEFINE_TEST_NEON_ACROSS(sminv, Basic) 4856 DEFINE_TEST_NEON_ACROSS(addv, Basic) 4857 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) 4858 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) 4859 DEFINE_TEST_NEON_ACROSS(uminv, Basic) 4860 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) 4861 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) 4862 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) 4863 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) 4864 4865 4866 // Advanced SIMD permute. 4867 DEFINE_TEST_NEON_3SAME(uzp1, Basic) 4868 DEFINE_TEST_NEON_3SAME(trn1, Basic) 4869 DEFINE_TEST_NEON_3SAME(zip1, Basic) 4870 DEFINE_TEST_NEON_3SAME(uzp2, Basic) 4871 DEFINE_TEST_NEON_3SAME(trn2, Basic) 4872 DEFINE_TEST_NEON_3SAME(zip2, Basic) 4873 4874 4875 // Advanced SIMD vector x indexed element. 4876 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) 4877 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) 4878 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) 4879 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) 4880 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) 4881 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) 4882 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) 4883 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) 4884 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) 4885 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic) 4886 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic) 4887 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic) 4888 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic) 4889 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) 4890 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) 4891 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) 4892 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) 4893 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) 4894 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) 4895 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) 4896 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) 4897 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) 4898 4899 4900 // Advanced SIMD scalar x indexed element. 4901 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) 4902 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) 4903 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) 4904 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) 4905 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) 4906 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic) 4907 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic) 4908 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) 4909 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) 4910 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) 4911 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) 4912 4913 4914 #undef __ 4915 #define __ masm-> 4916 4917 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && \ 4918 defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \ 4919 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) 4920 4921 // Generate a function that stores zero to a hard-coded address. 4922 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) { 4923 masm->Reset(); 4924 4925 UseScratchRegisterScope temps(masm); 4926 Register temp = temps.AcquireX(); 4927 __ Mov(temp, reinterpret_cast<intptr_t>(target)); 4928 __ Str(wzr, MemOperand(temp)); 4929 __ Ret(); 4930 4931 masm->FinalizeCode(); 4932 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 4933 } 4934 4935 4936 // Generate a function that stores the `int32_t` argument to a hard-coded 4937 // address. 4938 // In this example and the other below, we use the `abi` object to retrieve 4939 // argument and return locations even though we could easily hard code them. 4940 // This mirrors how more generic code (e.g. templated) user would use these 4941 // mechanisms. 4942 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) { 4943 masm->Reset(); 4944 4945 ABI abi; 4946 Register input = 4947 Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister()); 4948 4949 UseScratchRegisterScope temps(masm); 4950 Register temp = temps.AcquireX(); 4951 __ Mov(temp, reinterpret_cast<intptr_t>(target)); 4952 __ Str(input, MemOperand(temp)); 4953 __ Ret(); 4954 4955 masm->FinalizeCode(); 4956 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 4957 } 4958 4959 4960 // A minimal implementation of a `pow` function. 4961 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) { 4962 masm->Reset(); 4963 4964 ABI abi; 4965 Register input = 4966 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister()); 4967 Register result = 4968 Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister()); 4969 UseScratchRegisterScope temps(masm); 4970 Register temp = temps.AcquireX(); 4971 4972 __ Mov(temp, 1); 4973 for (unsigned i = 0; i < pow; i++) { 4974 __ Mul(temp, temp, input); 4975 } 4976 __ Mov(result, temp); 4977 __ Ret(); 4978 4979 masm->FinalizeCode(); 4980 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 4981 } 4982 4983 4984 Instruction* GenerateSum(MacroAssembler* masm) { 4985 masm->Reset(); 4986 4987 ABI abi; 4988 FPRegister input_1 = 4989 FPRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister()); 4990 Register input_2 = 4991 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister()); 4992 FPRegister input_3 = 4993 FPRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister()); 4994 FPRegister result = 4995 FPRegister(abi.GetReturnGenericOperand<double>().GetCPURegister()); 4996 4997 UseScratchRegisterScope temps(masm); 4998 FPRegister temp = temps.AcquireD(); 4999 5000 __ Fcvt(input_1.D(), input_1); 5001 __ Scvtf(temp, input_2); 5002 __ Fadd(temp, temp, input_1.D()); 5003 __ Fadd(result, temp, input_3); 5004 __ Ret(); 5005 5006 masm->FinalizeCode(); 5007 return masm->GetBuffer()->GetStartAddress<Instruction*>(); 5008 } 5009 5010 5011 TEST(RunFrom) { 5012 SETUP_WITH_FEATURES(CPUFeatures::kFP); 5013 5014 // Run a function returning `void` and taking no argument. 5015 int32_t value = 0xbad; 5016 simulator.RunFrom(GenerateStoreZero(&masm, &value)); 5017 VIXL_CHECK(value == 0); 5018 5019 // Run a function returning `void` and taking one argument. 5020 int32_t argument = 0xf00d; 5021 simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument); 5022 VIXL_CHECK(value == 0xf00d); 5023 5024 // Run a function taking one argument and returning a value. 5025 int64_t res_int64_t; 5026 res_int64_t = 5027 simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad); 5028 VIXL_CHECK(res_int64_t == 1); 5029 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123); 5030 VIXL_CHECK(res_int64_t == 123); 5031 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2); 5032 VIXL_CHECK(res_int64_t == 1024); 5033 5034 // Run a function taking multiple arguments in registers. 5035 double res_double = 5036 simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm), 5037 1.0, 5038 2, 5039 3.0); 5040 VIXL_CHECK(res_double == 6.0); 5041 } 5042 #endif 5043 5044 5045 } // namespace aarch64 5046 } // namespace vixl 5047